diff --git a/.github/workflows/citation.yml b/.github/workflows/citation.yml
new file mode 100644
index 00000000000..412eb89ac67
--- /dev/null
+++ b/.github/workflows/citation.yml
@@ -0,0 +1,20 @@
+on:
+#  push:
+#    paths:
+#      - CITATION.cff
+  workflow_dispatch:
+
+name: CITATION.cff
+jobs:
+  Validate-CITATION-cff:
+    runs-on: ubuntu-latest
+    name: Validate CITATION.cff
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Validate CITATION.cff
+        uses: dieghernan/cff-validator@v3
diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml
index f2905615394..39dbd4d4ce2 100644
--- a/.github/workflows/develop.yml
+++ b/.github/workflows/develop.yml
@@ -1,14 +1,29 @@
 name: develop
 
-# Controls when the action will run. 
+# Controls when the action will run.
 on:
   pull_request:
      branches: [develop,release/*]
+     types:
+       - opened
+       - synchronize
+       - reopened
+       - ready_for_review
   workflow_dispatch:
-  
+
 jobs:
+  is_draft:
+    name: Check if PR is a draft
+    if: github.event.pull_request.draft == true
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check if PR is a draft
+        run: echo "This PR is a draft"
+
   linting:
       name: "Flint"
+      if: github.event.pull_request.draft != true
       runs-on: ubuntu-20.04
       steps:
         - name: Setup env.
@@ -16,23 +31,24 @@ jobs:
             sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk
             pip install nobvisual==0.2.0 flinter==0.4.0
         - name: Checkout
-          uses: actions/checkout@v3
+          uses: actions/checkout@v4
           with:
             fetch-depth: 1
         - name: Lint
           run: |
             flint score src/ -d 10 -r flinter_rc.yml | tee flint.txt
             score=$(awk '$1==0{print $3}' flint.txt)
-            if (( $(echo "$score < 8.13" |bc -l) )) ; then
+            if (( $(echo "$score < 8.06" |bc -l) )) ; then
               exit 1
             fi
         - name: Archive linter report
-          uses: actions/upload-artifact@v3
+          uses: actions/upload-artifact@v4
           with:
             name: flint-report
             path: flint.txt
             retention-days: 5
   GNU:
+    if: github.event.pull_request.draft != true
     needs: linting
     runs-on: ${{matrix.os}}
     strategy:
@@ -58,7 +74,7 @@ jobs:
         include:
         - os: ubuntu-20.04
           setup-env: |
-            sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3  cmake-curses-gui 
+            sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3  cmake-curses-gui
         - os: macos-13
           setup-env: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && brew install openmpi && brew install automake && brew install gcc@11 && brew install gcc@12
     env:
@@ -74,7 +90,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache pFUnit
         id: cache-pfunit
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/pfunit
           key: pfunit-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -96,14 +112,14 @@ jobs:
           +         error stop '*** Encountered 1 or more failures/errors during testing. ***'
            #endif
                  end if
-           
+
           _ACEOF
           git apply pfunit_error_stop.patch && mkdir b && cd b
           cmake -DCMAKE_INSTALL_PREFIX=${HOME}/pkg/pfunit ..
           make -j$(nproc) && make install && cd ../../
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -121,36 +137,36 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build (CPU backend)
         if: matrix.backend == 'cpu'
         run: |
           echo $PKG_CONFIG_PATH
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --with-pfunit=${HOME}/pkg/pfunit/PFUNIT-4.4 --enable-real=${RP}
           make -j$(nproc)
-      - name: Build (CUDA backend)        
+      - name: Build (CUDA backend)
         if: matrix.backend == 'cuda'
         run: |
           sudo apt-get install -y nvidia-cuda-toolkit
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --enable-real=${RP} --with-cuda=/usr
           make -j$(nproc)
-      - name: Build (HIP backend)        
+      - name: Build (HIP backend)
         if: matrix.backend == 'HIP'
         run: |
           wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
           echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
           sudo apt-get update && sudo apt-get install -y rocm-dev
-          ./regen.sh 
-          ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.0.0
+          ./regen.sh
+          ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.1.2
           make -j$(nproc)
       - name: Build (OpenCL backend)
         if: matrix.backend == 'opencl'
         run: |
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --enable-real=${RP} --with-opencl
           make -j$(nproc)
       - name: Check
@@ -190,7 +206,7 @@ jobs:
           mkdir releng
           tar xf neko-*.tar.gz -C releng
           cd releng/neko-*
-          ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.0.0
+          ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.1.2
           make -j $(nproc)
       - name: Dist (OpenCL backend)
         if: matrix.backend == 'opencl'
@@ -201,8 +217,9 @@ jobs:
           cd releng/neko-*
           ./configure FC=${FC} --enable-real=${RP} --with-opencl
           make -j $(nproc)
-    
+
   Intel:
+    if: github.event.pull_request.draft != true
     needs: linting
     runs-on: ${{matrix.os}}
     strategy:
@@ -218,7 +235,8 @@ jobs:
             wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list && sudo apt-get update -y && sudo apt install -y --no-install-recommends intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
             source /opt/intel/oneapi/setvars.sh
             sudo apt install -y autoconf automake autotools-dev libopenblas-dev make git m4 python3 ca-certificates cmake
-            export PATH=/opt/intel/oneapi/compiler/2024.0/bin:${PATH}
+            INTEL_PATH=$(find /opt/intel/oneapi/compiler/ -name "${FC}" -type f | xargs dirname)
+            export PATH=$INTEL_PATH:${PATH}
             printenv >> $GITHUB_ENV
     env:
       CC: icc
@@ -230,7 +248,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -248,16 +266,17 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build
         run: |
           echo $PKG_CONFIG_PATH
           ./regen.sh
-          ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} 
+          ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP}
           make FCFLAGS="-O2 -stand f08 -warn errors `pkg-config --cflags json-fortran`" -j$(nproc)
   NVIDIA:
+    if: github.event.pull_request.draft != true
     needs: linting
     runs-on: ${{matrix.os}}
     strategy:
@@ -269,7 +288,7 @@ jobs:
         precision: [dp]
         include:
         - os: ubuntu-20.04
-          setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-23-7 &&  NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/23.7/compilers/bin:$PATH; export PATH &&  export PATH=$NVCOMPILERS/$NVARCH/23.7/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV
+          setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-24-3 &&  NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/24.3/compilers/bin:$PATH; export PATH &&  export PATH=$NVCOMPILERS/$NVARCH/24.3/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV
     env:
       CC: gcc
       FC: ${{ matrix.compiler }}
@@ -285,7 +304,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -303,7 +322,7 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build (CPU backend)
@@ -318,9 +337,32 @@ jobs:
         run: |
           git apply patches/nvhpc_bge.patch
           ./regen.sh
-          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/
           make
+      - name: Dist (CPU backend)
+        if: matrix.backend == 'cpu'
+        run: |
+          git stash
+          make dist
+          mkdir releng
+          tar xf neko-*.tar.gz -C releng
+          cd releng/neko-*
+          patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP}
+          make -j $(nproc)
+      - name: Dist (CUDA backend)
+        if: matrix.backend == 'cuda'
+        run: |
+          git stash
+          make dist
+          mkdir releng
+          tar xf neko-*.tar.gz -C releng
+          cd releng/neko-*
+          patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/
+          make -j $(nproc)
   ReFrame:
+    if: github.event.pull_request.draft != true
     needs: GNU
     runs-on: ubuntu-20.04
     strategy:
@@ -340,10 +382,10 @@ jobs:
       - name: Setup env.
         run: |
           sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3
-          pip install reframe-hpc 
+          pip install reframe-hpc
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -364,7 +406,7 @@ jobs:
         run: |
           reframe --detect-host-topology
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Regression tests
diff --git a/.github/workflows/docsdev.yml b/.github/workflows/docsdev.yml
index f495c548c06..116bcfb4e56 100644
--- a/.github/workflows/docsdev.yml
+++ b/.github/workflows/docsdev.yml
@@ -3,11 +3,13 @@ name: Documentation (develop)
 on:
   schedule:
     # * is a special character in YAML so you have to quote this string
-    - cron:  '55 19 * * *'
+    - cron:  '30 03 * * *'
 #  workflow_dispatch:
-  
+
 jobs:
   build-dev-documentation:
+    # This workflow runs only on the official repository.
+    if: github.repository == 'ExtremeFLOW/neko'
     runs-on: ubuntu-latest
 
     steps:
@@ -28,7 +30,7 @@ jobs:
         ./regen.sh
         ./configure FC=${FC}
         make html
-    
+
     # Deploy the HTML documentation to GitHub Pages
     - name: GH Pages Deployment
       uses: peaceiris/actions-gh-pages@v3
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 502562818cd..85fb25bb182 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Setup env.
       run: |
         sudo apt-get update && sudo apt-get install -y git openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 doxygen fonts-freefont-ttf graphviz
@@ -30,13 +30,13 @@ jobs:
     
     # Deploy the HTML documentation to GitHub Pages
     - name: GH Pages Deployment
-      uses: peaceiris/actions-gh-pages@v3
+      uses: peaceiris/actions-gh-pages@v4
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         publish_dir: ./doc/html/
         allow_empty_commit: false
         force_orphan: false
         publish_branch: gh-pages
-        destination_dir: docs
+        destination_dir: docs/release
         keep_files: false
         enable_jekyll: true
diff --git a/.github/workflows/formatter.yml b/.github/workflows/formatter.yml
new file mode 100644
index 00000000000..5d3bcdddf05
--- /dev/null
+++ b/.github/workflows/formatter.yml
@@ -0,0 +1,31 @@
+name: Formatting
+
+# Controls when the action will run.
+on:
+#  pull_request:
+#     branches: [develop,release/*,master]
+  workflow_dispatch:
+
+jobs:
+  formatting:
+      name: "Formatter (findent)"
+      description: "Check if the code is formatted according to the rules"
+      runs-on: ubuntu-20.04
+      steps:
+        - name: Setup env.
+          run: |
+            sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk
+            pip install findent
+        - name: Checkout
+          uses: actions/checkout@v3
+          with:
+            fetch-depth: 1
+        - name: Check Format
+          run: |
+            export FINDENT_FLAGS="-i2 -d3 -f3 -s3 -w3 -t3 -j3 -k- -Rr -c3"
+            find src/ -name "*.f90" -exec bash -c 'findent < {} > {}.tmp; mv -f {}.tmp {}' \;
+            if [ "$(git diff --exit-code)" != 0 ]; then
+              >&2 echo "Formatting rules violated"
+              git diff --exit-code
+              exit 1
+            fi
\ No newline at end of file
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f59adf2b45d..ce32b4973e0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,11 +1,11 @@
 name: CI
 
-# Controls when the action will run. 
+# Controls when the action will run.
 on:
   pull_request:
      branches: [master]
   workflow_dispatch:
-  
+
 jobs:
   linting:
       name: "Flint"
@@ -16,18 +16,18 @@ jobs:
             sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk
             pip install nobvisual==0.2.0 flinter==0.4.0
         - name: Checkout
-          uses: actions/checkout@v3
+          uses: actions/checkout@v4
           with:
             fetch-depth: 1
         - name: Lint
           run: |
             flint score src/ -d 10 -r flinter_rc.yml | tee flint.txt
             score=$(awk '$1==0{print $3}' flint.txt)
-            if (( $(echo "$score < 8.13" |bc -l) )) ; then
+            if (( $(echo "$score < 8.02" |bc -l) )) ; then
               exit 1
             fi
         - name: Archive linter report
-          uses: actions/upload-artifact@v3
+          uses: actions/upload-artifact@v4
           with:
             name: flint-report
             path: flint.txt
@@ -58,7 +58,7 @@ jobs:
         include:
         - os: ubuntu-20.04
           setup-env: |
-            sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3  cmake-curses-gui 
+            sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3  cmake-curses-gui
         - os: macos-13
           setup-env: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && brew install openmpi && brew install automake && brew install gcc@11 && brew install gcc@12
     env:
@@ -74,7 +74,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache pFUnit
         id: cache-pfunit
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/pfunit
           key: pfunit-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -96,14 +96,14 @@ jobs:
           +         error stop '*** Encountered 1 or more failures/errors during testing. ***'
            #endif
                  end if
-           
+
           _ACEOF
           git apply pfunit_error_stop.patch && mkdir b && cd b
           cmake -DCMAKE_INSTALL_PREFIX=${HOME}/pkg/pfunit ..
           make -j$(nproc) && make install && cd ../../
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -121,36 +121,36 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build (CPU backend)
         if: matrix.backend == 'cpu'
         run: |
           echo $PKG_CONFIG_PATH
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --with-pfunit=${HOME}/pkg/pfunit/PFUNIT-4.4 --enable-real=${RP}
           make -j$(nproc)
-      - name: Build (CUDA backend)        
+      - name: Build (CUDA backend)
         if: matrix.backend == 'cuda'
         run: |
           sudo apt-get install -y nvidia-cuda-toolkit
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --enable-real=${RP} --with-cuda=/usr
           make -j$(nproc)
-      - name: Build (HIP backend)        
+      - name: Build (HIP backend)
         if: matrix.backend == 'HIP'
         run: |
           wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
           echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list
           sudo apt-get update && sudo apt-get install -y rocm-dev
-          ./regen.sh 
-          ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.0.0/
+          ./regen.sh
+          ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.1.2
           make -j$(nproc)
       - name: Build (OpenCL backend)
         if: matrix.backend == 'opencl'
         run: |
-          ./regen.sh 
+          ./regen.sh
           ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008"  --enable-real=${RP} --with-opencl
           make -j$(nproc)
       - name: Check
@@ -190,7 +190,7 @@ jobs:
           mkdir releng
           tar xf neko-*.tar.gz -C releng
           cd releng/neko-*
-          ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.0.0
+          ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.1.2
           make -j $(nproc)
       - name: Dist (OpenCL backend)
         if: matrix.backend == 'opencl'
@@ -201,7 +201,7 @@ jobs:
           cd releng/neko-*
           ./configure FC=${FC} --enable-real=${RP} --with-opencl
           make -j $(nproc)
-    
+
   Intel:
     needs: linting
     runs-on: ${{matrix.os}}
@@ -218,7 +218,8 @@ jobs:
             wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list && sudo apt-get update -y && sudo apt install -y --no-install-recommends intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
             source /opt/intel/oneapi/setvars.sh
             sudo apt install -y autoconf automake autotools-dev libopenblas-dev make git m4 python3 ca-certificates cmake
-            export PATH=/opt/intel/oneapi/compiler/2024.0/bin:${PATH}
+            INTEL_PATH=$(find /opt/intel/oneapi/compiler/ -name "${FC}" -type f | xargs dirname)
+            export PATH=$INTEL_PATH:${PATH}
             printenv >> $GITHUB_ENV
     env:
       CC: icc
@@ -230,7 +231,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -248,14 +249,14 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build
         run: |
           echo $PKG_CONFIG_PATH
           ./regen.sh
-          ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} 
+          ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP}
           make FCFLAGS="-O2 -stand f08 -warn errors `pkg-config --cflags json-fortran`" -j$(nproc)
   NVIDIA:
     needs: linting
@@ -269,7 +270,7 @@ jobs:
         precision: [dp]
         include:
         - os: ubuntu-20.04
-          setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-23-7 &&  NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/23.7/compilers/bin:$PATH; export PATH &&  export PATH=$NVCOMPILERS/$NVARCH/23.7/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV
+          setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-24-3 &&  NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/24.3/compilers/bin:$PATH; export PATH &&  export PATH=$NVCOMPILERS/$NVARCH/24.3/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV
     env:
       CC: gcc
       FC: ${{ matrix.compiler }}
@@ -285,7 +286,7 @@ jobs:
         run: ${{ matrix.setup-env }}
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -303,7 +304,7 @@ jobs:
           echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV
           echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Build (CPU backend)
@@ -318,8 +319,30 @@ jobs:
         run: |
           git apply patches/nvhpc_bge.patch
           ./regen.sh
-          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/
           make
+      - name: Dist (CPU backend)
+        if: matrix.backend == 'cpu'
+        run: |
+          git stash
+          make dist
+          mkdir releng
+          tar xf neko-*.tar.gz -C releng
+          cd releng/neko-*
+          patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP}
+          make -j $(nproc)
+      - name: Dist (CUDA backend)
+        if: matrix.backend == 'cuda'
+        run: |
+          git stash
+          make dist
+          mkdir releng
+          tar xf neko-*.tar.gz -C releng
+          cd releng/neko-*
+          patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch
+          ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/
+          make -j $(nproc)
   ReFrame:
     needs: GNU
     runs-on: ubuntu-20.04
@@ -340,10 +363,10 @@ jobs:
       - name: Setup env.
         run: |
           sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3
-          pip install reframe-hpc 
+          pip install reframe-hpc
       - name: Cache json-fortran
         id: cache-json-fortran
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/pkg/json-fortran
           key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }}
@@ -364,7 +387,7 @@ jobs:
         run: |
           reframe --detect-host-topology
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           fetch-depth: 1
       - name: Regression tests
@@ -378,7 +401,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Setup env.
       run: |
         sudo apt-get update && sudo apt-get install -y git openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 doxygen fonts-freefont-ttf graphviz
@@ -395,16 +418,16 @@ jobs:
         ./regen.sh
         ./configure FC=${FC}
         make html
-    
+
     # Deploy the HTML documentation to GitHub Pages
     - name: GH Pages Deployment
-      uses: peaceiris/actions-gh-pages@v3
+      uses: peaceiris/actions-gh-pages@v4
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         publish_dir: ./doc/html/
         allow_empty_commit: false
         force_orphan: false
         publish_branch: gh-pages
-        destination_dir: docs
+        destination_dir: docs/release
         keep_files: false
         enable_jekyll: true
diff --git a/.gitignore b/.gitignore
index c5a86768cbf..e507c2e03b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,13 @@
+# Ignore everything which does not have an extension or are a directory
+*
+!*.*
+!*/
+
+# Exceptions to the no-extension rule
+!AUTHORS
+!COPYING
+
+# General list of files to ignore
 *.mod
 *.o
 *.dirstamp
@@ -12,21 +22,17 @@ config.sub
 depcomp
 Makefile.in
 !tests/*/*Makefile.in
-genmeshbox
-prepart
-rea2nbin
 libneko.a
 .vscode
-neko
 neko.pc
 install-sh
 configure
+configure~
 cltostring.sh
 device_config.h
 neko_config.f90
 num_types.f90
 comm.F90
-makeneko
 missing
 test-driver
 tests/*/*.log
@@ -38,11 +44,21 @@ doxygen.stamp
 doc/html
 examples/*/*.nek5000
 examples/*/*0.f0*
-contrib/average_field_in_space
-contrib/average_fields_in_time
-contrib/postprocess_fluid_stats
 *.chkp
 *.lst
 
+# Ignore temporary files
+*~
+*.swp
 
-
+# Explicitly ignore binaries to avoid confusion
+neko
+makeneko
+rea2nbin
+genmeshbox
+prepart
+average_field_in_space
+average_fields_in_time
+postprocess_fluid_stats
+map_to_equidistant_1d
+calc_lift_from_field
diff --git a/AUTHORS b/AUTHORS
index 5365c1d7bd3..05e4030c6f9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,9 +1 @@
-Niclas Jansson <njansson@kth.se>
-Martin Karp <makarp@kth.se>
-Jacob Wahlgren <jacobwah@kth.se>
-Steven W.D. Chien <wdchien@kth.se>
-Adalberto Perez <adperez@kth.se>
-Adam Peplinski <adam@mech.kth.se>
-Timofey Mukha <tmu@kth.se>
-Philipp Schlatter <pschlatt@mech.kth.se>
-
+This information has been migrated to CITATION.cff
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
index 7a9ef473ee1..56476e88553 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -1,18 +1,44 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: Neko
+message: 'If you use Neko, please cite it as below.'
+type: software
 authors:
+  - family-names: Alekseenko
+    given-names: Andrey
+  - family-names: Baconnet
+    given-names: Victor
+  - family-names: Chien
+    given-names: Steven
+  - family-names: Du
+    given-names: Shiyu
   - family-names: Jansson
     given-names: Niclas
   - family-names: Karp
     given-names: Martin
   - family-names: Mukha
     given-names: Timofey
+  - family-names: Olsen
+    given-names: Time Felle
+  - family-names: Páll
+    given-names: Szilárd
+  - family-names: Peplinski
+    given-names: Adam
   - family-names: Perez
     given-names: Adalberto
-  - family-names: Baconnet
-    given-names: Victor
   - family-names: Schlatter
     given-names: Philipp
-cff-version: 1.2.0
-message: "If you use Neko, please cite it as below."
+  - family-names: Vincent
+    given-names: Jonathan
+  - family-names: Wahlgren
+    given-names: Jacob
+identifiers:
+  - type: doi
+    value: 10.5281/zenodo.6631055
+url: 'https://www.neko.cfd'
+license: BSD-3-Clause
 preferred-citation:
   authors:
     - family-names: Jansson
@@ -25,8 +51,12 @@ preferred-citation:
       given-names: Stefano
     - family-names: Schlatter
       given-names: Philipp
-  title: "Neko: A Modern, Portable, and Scalable Framework for High-Fidelity Computational Fluid Dynamics"
+  title: >-
+    Neko: A Modern, Portable, and Scalable Framework for High-Fidelity
+    Computational Fluid Dynamics
   type: article
-  year: 2021
-  url: https://arxiv.org/abs/2107.01243
-title: "Neko"
\ No newline at end of file
+  journal: Computer & Fluids
+  volume: 275
+  year: 2024
+  doi: '10.1016/j.compfluid.2024.106243'
+
diff --git a/Makefile.am b/Makefile.am
index 5af2de0821f..da4a3da9cc2 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,7 +1,11 @@
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = neko.pc
 
-SUBDIRS = src contrib
+SUBDIRS = src
+
+if ENABLE_CONTRIB
+SUBDIRS += contrib
+endif
 
 if ENABLE_PFUNIT
 SUBDIRS += tests
diff --git a/README.md b/README.md
index 89a44ba2e3b..96eec8b724b 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,8 @@
 # ![Neko](https://user-images.githubusercontent.com/750135/169531665-313c3471-50d1-4c44-964a-fee7312d6459.png)
-![CI](https://github.com/ExtremeFLOW/neko/workflows/CI/badge.svg) ![develop](https://github.com/ExtremeFLOW/neko/workflows/develop/badge.svg)
+![CI](https://github.com/ExtremeFLOW/neko/workflows/CI/badge.svg) ![develop](https://github.com/ExtremeFLOW/neko/workflows/develop/badge.svg) [![DOI](https://zenodo.org/badge/338607716.svg)](https://zenodo.org/doi/10.5281/zenodo.6631055)
 ## About
 Neko is a portable framework for high-order spectral element flow simulations. Written in modern Fortran, Neko adopts an object-oriented approach, allowing multi-tier abstractions of the solver stack and facilitating various hardware backends ranging from general-purpose processors, CUDA and HIP enabled accelerators to SX-Aurora vector processors. Neko has its roots in the spectral element code Nek5000 from UChicago/ANL, from where many of the namings, code structure and numerical methods are adopted.
 
-Neko is currently maintained and developed at KTH Royal Institute of Technology.
 
 ## Cloning the project
 
@@ -11,17 +10,33 @@ Neko is currently maintained and developed at KTH Royal Institute of Technology.
 git clone https://github.com/ExtremeFLOW/neko
 ```
 
+## Documentation
+Documentation for Neko is available at https://neko.cfd and most things related to the code, cases, and different features are described there. The documentation is always improving, in large part due to our active users and if something is missing or hard to understand, don't be afraid to start a [discussion](https://github.com/ExtremeFLOW/neko/discussions) or create a [Pull request](https://github.com/ExtremeFLOW/neko/pulls). It is a great way to help us improve and also to start getting involved in the project.
+
 ## Building the project
-To build the project you will need: A Fortran compiler supporting the Fortran-08 standard, a working MPI installation, JSON-Fortran, and BLAS/lapack. Optional dependencies are gslib and ParMETIS. We use automake to build the project. These instructions should work in general, but as the project is quickly developing, things might change.
+To build the project you will need: A Fortran compiler supporting the Fortran-08 standard, a working MPI installation, JSON-Fortran, and BLAS/lapack. Optional dependencies are gslib and ParMETIS. We use autotools to build the project. These instructions should work in general, but as the project is quickly developing, things might change. While we assume MPI and BLAS are installed, if JSON-Fortran is not already available it can be cloned, installed, and the correct paths set with the following commands (Skip this step if you already have an installation of JSON-Fortran).
+
+```bash
+export JSON_INSTALL=/path/to/json-fortran_install # Where you want to install json-fortran
+```
+```bash
+git clone --depth 1 https://github.com/ExtremeFLOW/json-fortran/
+cmake -S json-fortran -B json-fortran/build -DCMAKE_INSTALL_PREFIX=${JSON_INSTALL} -DUSE_GNU_INSTALL_CONVENTION=ON ..
+cmake --build json-fortran/build --parallel
+cmake --install json-fortran/build
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JSON_INSTALL}/lib/ #On some systems lib should be replaced with lib64
+export PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${JSON_INSTALL}/lib/pkgconfig 
+
+```
 
+A basic CPU version of Neko can then be installed according to the following
 ```bash
 cd neko
 ./regen.sh
-./configure --prefix=/path/to/neko_install --with-pfunit=/path/to/pFUnit/installed/PFUNIT-VERSION
+./configure --prefix=/path/to/neko_install # Where you want to install neko
 make install
 ```
-
-More detailed installation instructions can be found in the documentation.
+More detailed installation instructions and all the different options (such as how to install Neko for GPUs) can be found in the documentation available at https://neko.cfd. 
 
 ## Running examples
 After the project has been built
@@ -31,27 +46,20 @@ cd examples/tgv
 /path/to/neko_install/bin/makeneko tgv.f90
 mpirun -np 4 ./neko tgv.case
 ```
-
-## Testing the Code
-Assuming you configured with pFUnit you should be able to test the code with
+If there is not a .f90 (user) file in the example, the standard executable `neko` can also be used, for example:
 ```bash
-make check
+cd examples/hemi
+mpirun -np 4 /path/to/neko_install/bin/neko hemi.case
 ```
+only uses built-in functions and does not need a compiled user file. Whether you will need a user file or not depends on what functionality you want and this is also documented in the documentation.
 
-## Documentation
-Documentation for Neko is available at https://neko.cfd/docs.
-
-To generate the documentation, you need to have both doxygen and dot (part of the Graphviz package) installed (they will be picked up by configure). Once installed, you should be able to generate the documentation with
-```bash
-make html
-```
 ## Publications using Neko
-* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2021. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. arXiv preprint arXiv:2107.01243.
 * Jansson, N., 2021. *Spectral Element Simulations on the NEC SX-Aurora TSUBASA*. In proc. HPCAsia 2021.
 * Karp, M., Podobas, A., Kenter, T., Jansson, N., Plessl, C., Schlatter, P. and Markidis, S., 2022. *A high-fidelity flow solver for unstructured meshes on field-programmable gate arrays: Design, evaluation, and future challenges*. In proc. HPCAsia 2022.
 * Karp, M., Jansson, N., Podobas, A., Schlatter, P., and Markidis, S., 2022. *Reducing Communication in the Conjugate Gradient Method: A Case Study on High-Order Finite Elements*. In proc. PASC 2022.
-* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2022. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. arXiv preprint arXiv:2207:07098.
+* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2023. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. The International Journal of High Performance Computing Applications, 37, 5.
 * Jansson, N., Karp, M., Perez, A., Mukha, T., Ju, Y., Liu, J., Páll, S., Laure, E., Weinkauf, T., Schumacher, J., Schlatter, P., Markidis, S., 2023. *Exploring the Ultimate Regime of Turbulent Rayleigh–Bénard Convection Through Unprecedented Spectral-Element Simulations*. SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis.
+* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2024. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. Computer & Fluids, 275.
 
 ## Acknowledgments
 The development of Neko was supported by the European Commission Horizon 2020 project grant *EPiGRAM-HS: Exascale Programming Models for Heterogeneous Systems* (grant reference 801039), the Swedish Research Council project grant *Efficient Algorithms for Exascale Computational Fluid Dynamics* (grant reference 2019-04723) and the SeRC Exascale Simulation Software Initiative (SESSI). The Neko logo was designed by Robert Hansen Jagrelius.
diff --git a/bench/gs/driver.f90 b/bench/gs/driver.f90
index 49f97140200..47a16f44c31 100644
--- a/bench/gs/driver.f90
+++ b/bench/gs/driver.f90
@@ -46,7 +46,7 @@ program gsbench
   allocate(u(n))
   call rzero(u, n)
   call device_map(u, u_d, n)
-  call device_memcpy(u, u_d, n, HOST_TO_DEVICE)
+  call device_memcpy(u, u_d, n, HOST_TO_DEVICE, sync=.false.)
 
   ! warmup
   do i = 1, niter
diff --git a/bench/tgv32/tgv.case b/bench/tgv32/tgv.case
index 9e0793039ad..e448dae39b8 100644
--- a/bench/tgv32/tgv.case
+++ b/bench/tgv32/tgv.case
@@ -6,7 +6,7 @@
   "output_boundary": false,
   "output_checkpoints": false,
   "output_at_end": false,
-  "end_time": 20.0,
+  "end_time": 10.0,
   "timestep": 1e-3,
   "numerics": {
     "time_order": 3,
diff --git a/configure.ac b/configure.ac
index 6f9896279d5..40b93e3f388 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
 AC_PREREQ([2.69])
-AC_INIT([neko],[0.7.2])
+AC_INIT([neko],[0.8.0])
 AM_INIT_AUTOMAKE([foreign subdir-objects])
 AM_MAINTAINER_MODE
 AC_CONFIG_MACRO_DIR([m4])
@@ -73,6 +73,10 @@ if test "x$ax_cv_fc_compiler_vendor" = xportland; then
    FCFLAGS="$FCFLAGS  -Mbackslash"
 fi
 
+if test "x$ax_cv_fc_compiler_vendor" = xnvhpc; then
+   FCFLAGS="$FCFLAGS  -Mbackslash"
+fi
+
 # Store build information (date, host, FC)
 AC_SUBST(NEKO_BUILD_INFO,
         ["(build: $(date +%Y-%m-%d) on $host using $ax_cv_fc_compiler_vendor)"], [])
@@ -261,6 +265,7 @@ AM_CONDITIONAL([ENABLE_ADIOS2], [test "x${have_adios2}" = xyes])
 AM_CONDITIONAL([ENABLE_CUDA], [test "x${have_cuda}" = xyes])
 AM_CONDITIONAL([ENABLE_HIP], [test "x${have_hip}" = xyes])
 AM_CONDITIONAL([ENABLE_OPENCL], [test "x${have_opencl}" = xyes])
+AM_CONDITIONAL([ENABLE_CONTRIB], [test "x${enable_contrib}" = xyes])
 
 # Set device dependent flags
 AC_SUBST(CUDA_ARCH)
@@ -327,8 +332,10 @@ if test "x${enable_contrib}" = xyes; then
 		    contrib/rea2nbin/Makefile\
 		    contrib/genmeshbox/Makefile\
 		    contrib/average_fields_in_time/Makefile\
+		    contrib/calc_lift_from_field/Makefile\
 		    contrib/postprocess_fluid_stats/Makefile\
 		    contrib/average_field_in_space/Makefile\
+		    contrib/map_to_equidistant_1d/Makefile\
 		    contrib/prepart/Makefile])
 fi
 # Doxygen
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
index 0bcf5d2f7f2..2cd31c89925 100644
--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
@@ -3,4 +3,7 @@ SUBDIRS = rea2nbin\
 	  genmeshbox\
 	  average_fields_in_time\
 	  average_field_in_space\
-	  postprocess_fluid_stats
+	  calc_lift_from_field\
+	  postprocess_fluid_stats\
+	  map_to_equidistant_1d
+
diff --git a/contrib/average_field_in_space/average_field_in_space.f90 b/contrib/average_field_in_space/average_field_in_space.f90
index 1fd8cd67ac9..165646e3c30 100644
--- a/contrib/average_field_in_space/average_field_in_space.f90
+++ b/contrib/average_field_in_space/average_field_in_space.f90
@@ -2,7 +2,6 @@
 !! Martin Karp 17/02-23
 program average_field_in_space
   use neko
-  use mean_flow
   implicit none
   
   character(len=NEKO_FNAME_LEN) :: inputchar, mesh_fname, field_fname, hom_dir, output_fname
@@ -15,18 +14,30 @@ program average_field_in_space
   type(space_t) :: Xh
   type(mesh_t) :: msh
   type(gs_t) :: gs_h
+  type(map_1d_t) :: map_1d
   type(field_t), pointer :: u, avg_u, old_u, el_heights
   type(vector_ptr_t), allocatable :: fields(:)
-  integer, allocatable :: hom_dir_el(:)
-  integer :: argc, i, n, lx, j, e, n_levels
+  type(matrix_t) :: avg_matrix
+  type(vector_t) :: volume_per_gll_lvl
+  integer :: argc, i, n, lx, j, e, n_levels, dir, ierr, n_1d, tstep
+  logical :: avg_to_1d = .false.
+  real(kind=rp) :: coord
   
   argc = command_argument_count()
 
-  if ((argc .lt. 5) .or. (argc .gt. 5)) then
+  if ((argc .lt. 4) .or. (argc .gt. 4)) then
      if (pe_rank .eq. 0) then
-        write(*,*) 'Usage: ./average_field_in_space mesh.nmsh field.fld dir(x, y, z) n_levels  outfield.fld' 
-        write(*,*) 'Example command: ./average_field_in_space mesh.nmsh fieldblabla.fld x n_levels outfield.fld'
-        write(*,*) 'Computes the spatial average in x of the field fieldblabla.nek5000 and stores in outfield.fld'
+        write(*,*) 'Usage: ./average_field_in_space mesh.nmsh field.fld dir(x, y, z, xz, xy, yz)  outfield.(fld,csv)' 
+        write(*,*) '----'
+        write(*,*) 'Example command for avg in 1 direction: ./average_field_in_space mesh.nmsh fieldblabla.fld x  outfield.fld'
+        write(*,*) 'Computes spatial average in 1 direction and saves it in outfield.fld'
+        write(*,*) '----'
+        write(*,*) 'Example command: ./average_field_in_space mesh.nmsh fieldblabla.fld xy  out.csv'
+        write(*,*) 'Computes the spatial average in 2 directions directly of the field fieldblabla.nek5000 and stores in out.csv'
+        write(*,*) '----'
+        write(*,*) 'In out.csv the first col are the coords of the GLL points'
+        write(*,*) 'In columns 2-n_fields are the averages for all fields in fieldblabla.fld'
+        write(*,*) 'If averaging in 2 directions output must be .csv and for 1 direction .fld'
      end if
      stop
   end if
@@ -42,8 +53,6 @@ program average_field_in_space
   call get_command_argument(3, inputchar) 
   read(inputchar, *) hom_dir
   call get_command_argument(4, inputchar) 
-  read(inputchar, *) n_levels
-  call get_command_argument(5, inputchar) 
   read(inputchar, *) output_fname
   
   call mesh_file%read(msh)
@@ -96,58 +105,108 @@ program average_field_in_space
   old_u => neko_field_registry%get_field('old_u')
   call neko_field_registry%add_field(dof, 'el_heights')
   el_heights => neko_field_registry%get_field('el_heights')
-
-  !test
-  allocate(hom_dir_el(msh%nelv))
-
-  do i = 1, msh%nelv
-     !find height in hom-dir
-     !store direction which is hom
-     !set element to height
-     !we assume elements are stacked on eachother...
-     el_dim(1,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(2)%p%x)
-     el_dim(2,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(3)%p%x)
-     el_dim(3,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(5)%p%x)
-     ! 1 corresponds to r, 2 to s, 3 to t
-     if (trim(hom_dir) .eq. 'x') then
-        hom_dir_el(i) = maxloc(el_dim(:,1),dim=1)
-        el_h = el_dim(1,hom_dir_el(i))
-     else if (trim(hom_dir) .eq. 'y') then
-        hom_dir_el(i) = maxloc(el_dim(:,2),dim=1)
-        el_h = el_dim(2,hom_dir_el(i))
-     else if (trim(hom_dir) .eq. 'z') then
-        hom_dir_el(i) = maxloc(el_dim(:,3),dim=1)
-        el_h = el_dim(3,hom_dir_el(i))
-     else 
-        call neko_error('homogenous direction not supported')
-     end if
-     el_heights%x(:,:,:,i) = el_h
-  end do
+  ! 1 corresponds to x, 2 to y, 3 to z
+  if (trim(hom_dir) .eq. 'x') then
+     dir = 1
+     avg_to_1d = .false.
+  else if (trim(hom_dir) .eq. 'y') then
+     dir = 2
+     avg_to_1d = .false.
+  else if (trim(hom_dir) .eq. 'z') then
+     dir = 3
+     avg_to_1d = .false.
+  else if (trim(hom_dir) .eq. 'yz') then
+     dir = 1
+     avg_to_1d = .true.
+  else if (trim(hom_dir) .eq. 'xz') then
+     dir = 2
+     avg_to_1d = .true.
+  else if (trim(hom_dir) .eq. 'xy') then
+     dir = 3
+     avg_to_1d = .true.
+  else 
+     call neko_error('homogenous direction not supported')
+  end if
+  call map_1d%init(dof, gs_h,  dir, 1e-7_rp)
+  n_levels = map_1d%n_el_lvls
   n = u%dof%size()
-   
-  call copy(u%x,el_heights%x,n)
-  call copy(old_u%x,el_heights%x,n)
-  call copy(avg_u%x,el_heights%x,n)
-  call perform_global_summation(u, avg_u, old_u, n_levels, &
-       hom_dir_el,gs_h, coef%mult, msh%nelv, lx)
-  domain_height = u%x(1,1,1,1)
 
+  !allocate array with pointers to all vectors in the file
   allocate(fields(field_data%size()))
-  
-  call field_data%get_list(fields,field_data%size())
-
-  do i = 1, field_data%size()
-     call copy(old_u%x,fields(i)%v%x,n)
-     call perform_local_summation(u,old_u, el_heights, domain_height, &
-          hom_dir_el, coef, msh%nelv, lx)
-     call copy(old_u%x,u%x,n)
-     call copy(avg_u%x,u%x,n)
-     call perform_global_summation(u, avg_u, old_u, n_levels, &
-          hom_dir_el,gs_h, coef%mult, msh%nelv, lx)
-     call copy(fields(i)%v%x,u%x,n)
-  end do 
+  ! Compute average in two direction directly and store in a csv file
   output_file = file_t(trim(output_fname))
-  call output_file%write(field_data)
+  do tstep = 0, field_data%meta_nsamples-1
+     if (tstep .gt. 0) call field_file%read(field_data)
+     call field_data%get_list(fields,field_data%size())
+     if (pe_rank .eq. 0) write(*,*) 'Averaging field:', tstep
+     if (avg_to_1d) then
+        n_1d = n_levels*Xh%lx
+        call avg_matrix%init(n_1d,field_data%size()+1)
+        call volume_per_gll_lvl%init(n_1d)
+        do i = 1, n
+           volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) = &
+           volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) + coef%B(i,1,1,1)
+        end do
+        call MPI_Allreduce(MPI_IN_PLACE,volume_per_gll_lvl%x, n_1d, &
+           MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+        !ugly way of getting coordinates, computes average
+        do i = 1, n
+           if (dir .eq. 1) coord = dof%x(i,1,1,1)
+           if (dir .eq. 2) coord = dof%y(i,1,1,1)
+           if (dir .eq. 3) coord = dof%z(i,1,1,1)
+           avg_matrix%x(map_1d%pt_lvl(i,1,1,1),1) = &
+           avg_matrix%x(map_1d%pt_lvl(i,1,1,1),1) + coord*coef%B(i,1,1,1) &
+           /volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1))
+        end do
+        do j = 2, field_data%size()+1
+           do i = 1, n
+              avg_matrix%x(map_1d%pt_lvl(i,1,1,1),j) = &
+              avg_matrix%x(map_1d%pt_lvl(i,1,1,1),j) + fields(j-1)%v%x(i)*coef%B(i,1,1,1) &
+              /volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1))
+           end do
+        end do 
+        call MPI_Allreduce(MPI_IN_PLACE,avg_matrix%x, (field_data%size()+1)*n_1d, &
+           MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+
+        call output_file%write(avg_matrix,field_data%time)
+     ! Compute averages in 1 direction and store in a 3d field (lots of redundant data, sorry)
+     ! Should output a 2d field in principle
+     else
+        do i = 1, msh%nelv
+           !find height in hom-dir
+           !direction in local coords (r,s,t) that is hom is stored in map_1d%dir_el
+           !set element to height
+           !we assume elements are stacked on eachother...
+           el_dim(1,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(2)%p%x)
+           el_dim(2,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(3)%p%x)
+           el_dim(3,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(5)%p%x)
+           ! 1 corresponds to x, 2 to y, 3 to z
+           el_h = el_dim(map_1d%dir_el(i),dir)
+           el_heights%x(:,:,:,i) = el_h
+        end do
+      
+        call copy(u%x,el_heights%x,n)
+        call copy(old_u%x,el_heights%x,n)
+        call copy(avg_u%x,el_heights%x,n)
+        call perform_global_summation(u, avg_u, old_u, n_levels, &
+             map_1d%dir_el,gs_h, coef%mult, msh%nelv, lx)
+        domain_height = u%x(1,1,1,1)
+
+
+        do i = 1, field_data%size()
+           call copy(old_u%x,fields(i)%v%x,n)
+           call perform_local_summation(u,old_u, el_heights, domain_height, &
+                map_1d%dir_el, coef, msh%nelv, lx)
+           call copy(old_u%x,u%x,n)
+           call copy(avg_u%x,u%x,n)
+           call perform_global_summation(u, avg_u, old_u, n_levels, &
+                map_1d%dir_el,gs_h, coef%mult, msh%nelv, lx)
+           call copy(fields(i)%v%x,u%x,n)
+        end do 
+
+        call output_file%write(field_data,field_data%time)
+     end if
+  end do
   if (pe_rank .eq. 0) write(*,*) 'Done'
   
   call neko_finalize
@@ -170,10 +229,11 @@ subroutine perform_global_summation(u, avg_u, old_u, n_levels, hom_dir_el, gs_h,
   do i = 1, n_levels-1
      !compute average 
      if (NEKO_BCKND_DEVICE .eq. 1) &
-        call device_memcpy(u%x, u%x_d, n, HOST_TO_DEVICE)
+          call device_memcpy(u%x, u%x_d, n, &
+                             HOST_TO_DEVICE, sync=.false.)
      call gs_h%op(u,GS_OP_ADD)
      if (NEKO_BCKND_DEVICE .eq. 1) &
-        call device_memcpy(u%x, u%x_d, n, DEVICE_TO_HOST, sync=.true.)
+          call device_memcpy(u%x, u%x_d, n, DEVICE_TO_HOST, sync=.true.)
      call col2(u%x,mult,n)
      do e = 1, nelv
         temp_el = 2.0*u%x(:,:,:,e)-old_u%x(:,:,:,e)
diff --git a/contrib/calc_lift_from_field/Makefile.am b/contrib/calc_lift_from_field/Makefile.am
new file mode 100644
index 00000000000..aaa985f14ac
--- /dev/null
+++ b/contrib/calc_lift_from_field/Makefile.am
@@ -0,0 +1,5 @@
+bin_PROGRAMS = calc_lift_from_field
+calc_lift_from_field_SOURCES = calc_lift_from_field.f90
+calc_lift_from_field_LDADD = $(top_builddir)/src/libneko.a
+calc_lift_from_field_LDFLAGS = $(LDFLAGS) $(LIBS) 
+AM_FCFLAGS = -I@top_builddir@/src
diff --git a/contrib/calc_lift_from_field/calc_lift_from_field.f90 b/contrib/calc_lift_from_field/calc_lift_from_field.f90
new file mode 100644
index 00000000000..0718b9fe7f7
--- /dev/null
+++ b/contrib/calc_lift_from_field/calc_lift_from_field.f90
@@ -0,0 +1,203 @@
+!> Program to calculate the force and acting on a single boundary zone as well as
+!! the torque around a point if one changes the value of center (defaults to 0,0,0).
+!! Outputs the x,y,z prjections of the pressure and viscous forces and torques and
+!! additionally saves the distribution of these quantities along a selected homogenous
+!! direction to a csv file.
+!! Martin Karp 17/01-24
+program calc_lift_from_field
+  use neko
+  use mean_flow
+  use matrix
+  implicit none
+  
+  character(len=NEKO_FNAME_LEN) :: inputchar, mesh_fname, field_fname, hom_dir, output_fname
+  type(file_t) :: field_file, mesh_file, output_file
+  real(kind=rp) :: start_time
+  type(fld_file_data_t) :: field_data
+  type(coef_t) :: coef
+  type(dofmap_t), target :: dof
+  type(space_t) :: Xh
+  type(mesh_t) :: msh
+  type(gs_t) :: gs_h
+  type(map_1d_t) :: map_1d
+  type(field_t) :: u, v, w, p
+  type(field_t) :: s11, s22, s33, s12, s13, s23
+  real(kind=rp) :: s11_, s22_, s33_, s12_, s13_, s23_, center(3)
+  type(matrix_t) :: drag_torq
+  real(kind=rp), allocatable :: lvl_coords(:)
+  real(kind=rp), pointer :: line(:,:,:,:)
+  integer :: argc, i, j, k, n, lx, e, zone_id, dir, f, glb_n_gll_pts, ierr, mem, t
+  real(kind=rp) :: visc, nv(3), dgtq(12)
+  
+  argc = command_argument_count()
+
+  if ((argc .lt. 6) .or. (argc .gt. 6)) then
+     if (pe_rank .eq. 0) then
+        write(*,*) 'Usage: ./calc_lift_from_field mesh.nmsh field.fld zone_number viscosity function_of_coord output.csv' 
+        write(*,*) 'Example command: ./calc_lift_from_field mesh.nmsh fieldblabla.fld 5 0.04 y out.csv'
+        write(*,*) 'Outputs the total force and torque on zone 5 using velocity values from fieldblabla.fld'
+        write(*,*)  'as well as writes the distribution of the force and torque across y to output.csv'
+     end if
+     stop
+  end if
+  
+  call neko_init 
+
+  call get_command_argument(1, inputchar) 
+  read(inputchar, *) mesh_fname
+  mesh_file = file_t(trim(mesh_fname))
+  call get_command_argument(2, inputchar) 
+  read(inputchar, *) field_fname
+  field_file = file_t(trim(field_fname))
+  call get_command_argument(3, inputchar) 
+  read(inputchar, *) zone_id
+  call get_command_argument(4, inputchar) 
+  read(inputchar, *) visc
+  call get_command_argument(5, inputchar) 
+  read(inputchar, *) hom_dir
+  call get_command_argument(6, inputchar) 
+  read(inputchar, *) output_fname
+  output_file = file_t(trim(output_fname))
+
+  call mesh_file%read(msh)
+   
+  call field_data%init(msh%nelv,msh%offset_el)
+  call field_file%read(field_data)
+  
+  lx = field_data%lx
+  !To make sure any deformation made in the user file is passed onto here as well
+  do i = 1,msh%nelv
+     msh%elements(i)%e%pts(1)%p%x(1) = field_data%x%x(linear_index(1,1,1,i,lx,lx,lx))  
+     msh%elements(i)%e%pts(2)%p%x(1) = field_data%x%x(linear_index(lx,1,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(3)%p%x(1) = field_data%x%x(linear_index(1,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(4)%p%x(1) = field_data%x%x(linear_index(lx,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(5)%p%x(1) = field_data%x%x(linear_index(1,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(6)%p%x(1) = field_data%x%x(linear_index(lx,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(7)%p%x(1) = field_data%x%x(linear_index(1,lx,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(8)%p%x(1) = field_data%x%x(linear_index(lx,lx,lx,i,lx,lx,lx))
+
+     msh%elements(i)%e%pts(1)%p%x(2) = field_data%y%x(linear_index(1,1,1,i,lx,lx,lx))  
+     msh%elements(i)%e%pts(2)%p%x(2) = field_data%y%x(linear_index(lx,1,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(3)%p%x(2) = field_data%y%x(linear_index(1,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(4)%p%x(2) = field_data%y%x(linear_index(lx,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(5)%p%x(2) = field_data%y%x(linear_index(1,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(6)%p%x(2) = field_data%y%x(linear_index(lx,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(7)%p%x(2) = field_data%y%x(linear_index(1,lx,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(8)%p%x(2) = field_data%y%x(linear_index(lx,lx,lx,i,lx,lx,lx))
+
+     msh%elements(i)%e%pts(1)%p%x(3) = field_data%z%x(linear_index(1,1,1,i,lx,lx,lx))  
+     msh%elements(i)%e%pts(2)%p%x(3) = field_data%z%x(linear_index(lx,1,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(3)%p%x(3) = field_data%z%x(linear_index(1,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(4)%p%x(3) = field_data%z%x(linear_index(lx,lx,1,i,lx,lx,lx))
+     msh%elements(i)%e%pts(5)%p%x(3) = field_data%z%x(linear_index(1,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(6)%p%x(3) = field_data%z%x(linear_index(lx,1,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(7)%p%x(3) = field_data%z%x(linear_index(1,lx,lx,i,lx,lx,lx))
+     msh%elements(i)%e%pts(8)%p%x(3) = field_data%z%x(linear_index(lx,lx,lx,i,lx,lx,lx))
+  end do
+
+  call Xh%init(GLL, field_data%lx, field_data%ly, field_data%lz)
+
+  dof = dofmap_t(msh, Xh)
+  call gs_h%init(dof)
+  call coef%init(gs_h)
+  ! Center around which we calculate the torque
+  center = 0.0_rp
+
+  ! 1 corresponds to r, 2 to s, 3 to t
+  if (trim(hom_dir) .eq. 'x') then
+     dir = 1
+     line => dof%x
+  else if (trim(hom_dir) .eq. 'y') then
+     dir = 2
+     line => dof%y
+  else if (trim(hom_dir) .eq. 'z') then
+     dir = 3
+     line => dof%z
+  else 
+     call neko_error('The homogeneous direction should be "x", "y"or "z"')
+  end if
+  call map_1d%init(dof, gs_h, dir, 1e-7_rp)
+
+
+  call u%init(dof)
+  call v%init(dof)
+  call w%init(dof)
+  call p%init(dof)
+  call s11%init(dof)
+  call s22%init(dof)
+  call s33%init(dof)
+  call s12%init(dof)
+  call s13%init(dof)
+  call s23%init(dof)
+  glb_n_gll_pts = map_1d%n_el_lvls*lx
+  call drag_torq%init(glb_n_gll_pts, 14)
+  if (pe_rank .eq. 0) call output_file%set_header('time, coord, forcepx, forcepy, &
+             & forcepz, forcevx, forcevy, forcevz, torqpx, torqpy, &
+             & torqpz, torqvx, torqvy, torqvz')
+
+  do t = 1, field_data%meta_nsamples
+     if (t .ne. 1) call field_file%read(field_data)
+     if(pe_rank .eq. 0) write(*,*) 'Calc drag/lift at t= ', field_data%time
+
+     call copy(u%x,field_data%u%x,dof%size())
+     call copy(v%x,field_data%v%x,dof%size())
+     call copy(w%x,field_data%w%x,dof%size())
+     call copy(p%x,field_data%p%x,dof%size())
+     drag_torq = 0.0_rp
+     !set coords to somoething big
+     drag_torq%x(:,2) = 1e15
+   
+   
+     if(pe_rank .eq. 0) write(*,*) 'Total drag'
+   
+     call strain_rate(s11%x, s22%x, s33%x, s12%x, s13%x, s23%x, u, v, w, coef) 
+     call drag_torque_zone(dgtq,field_data%t_counter, msh%labeled_zones(zone_id), center,&
+                           s11%x, s22%x, s33%x, s12%x, s13%x, s23%x,&
+                           p, coef, visc)
+     if (pe_rank .eq. 0) then
+         write(*,*) field_data%t_counter,dgtq(1)+dgtq(4),dgtq(1),dgtq(4),'dragx'
+         write(*,*) field_data%t_counter,dgtq(2)+dgtq(5),dgtq(2),dgtq(5),'dragy'
+         write(*,*) field_data%t_counter,dgtq(3)+dgtq(6),dgtq(3),dgtq(6),'dragz'
+      end if
+
+
+     do mem  = 1,msh%labeled_zones(zone_id)%size
+        e = msh%labeled_zones(zone_id)%facet_el(mem)%x(2)
+        f = msh%labeled_zones(zone_id)%facet_el(mem)%x(1)
+        do k = 1, lx
+           do j = 1, lx
+              do i = 1, lx
+                 if (index_is_on_facet(i,j,k,lx,lx,lx,f)) then
+                    nv = coef%get_normal(i,j,k,e,f)*coef%get_area(i,j,k,e,f)
+                    s11_ = s11%x(i,j,k,e)
+                    s12_ = s12%x(i,j,k,e)
+                    s22_ = s22%x(i,j,k,e)
+                    s13_ = s13%x(i,j,k,e)
+                    s23_ = s23%x(i,j,k,e)
+                    s33_ = s33%x(i,j,k,e)
+                    call drag_torque_pt(dgtq,dof%x(i,j,k,e), dof%y(i,j,k,e),dof%z(i,j,k,e),&
+                                        center, &
+                                        s11_, s22_, s33_, s12_, s13_, s23_,&
+                                        p%x(i,j,k,e), nv(1), nv(2), nv(3), visc)
+                    drag_torq%x(map_1d%pt_lvl(i,j,k,e),3:14) = &
+                    drag_torq%x(map_1d%pt_lvl(i,j,k,e),3:14) + dgtq
+                    drag_torq%x(map_1d%pt_lvl(i,j,k,e),2) =  line(i,j,k,e)
+                 end if
+              end do
+           end do
+        end do
+     end do
+   
+     call MPI_Allreduce(MPI_IN_PLACE,drag_torq%x(1,3), 12*glb_n_gll_pts, &
+        MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+     call MPI_Allreduce(MPI_IN_PLACE,drag_torq%x(1,2), glb_n_gll_pts, &
+        MPI_REAL_PRECISION, MPI_MIN, NEKO_COMM, ierr)
+     drag_torq%x(:,1) = field_data%time
+     if (pe_rank .eq. 0) then
+        call output_file%write(drag_torq)
+     end if
+  end do
+     
+  call neko_finalize
+
+end program calc_lift_from_field
diff --git a/contrib/map_to_equidistant_1d/Makefile.am b/contrib/map_to_equidistant_1d/Makefile.am
new file mode 100644
index 00000000000..c33d59c9fe7
--- /dev/null
+++ b/contrib/map_to_equidistant_1d/Makefile.am
@@ -0,0 +1,5 @@
+bin_PROGRAMS = map_to_equidistant_1d
+map_to_equidistant_1d_SOURCES = map_to_equidistant_1d.f90
+map_to_equidistant_1d_LDADD = $(top_builddir)/src/libneko.a
+map_to_equidistant_1dLDFLAGS = $(LDFLAGS) $(LIBS) 
+AM_FCFLAGS = -I@top_builddir@/src
diff --git a/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90 b/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90
new file mode 100644
index 00000000000..11f5ad9ebcf
--- /dev/null
+++ b/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90
@@ -0,0 +1,126 @@
+!> Program to interpolate the field from GLL points onto equidistant points in elements in a homogenous direction
+!> The number of points inside an element of the input and output field file is supposed to be the same
+!> Shiyu Du 16/01-24 note: the program is designed for Cartesian Box meshes with equidistant elements on the hom-dir,
+!                          the version for rotational coordinates is not yet been thought about.
+!> Shiyu Du 16/01-24 note: the program is currently tested on CPUs, GPU is not tested yet.
+program map_to_equidistant_1d
+  use neko
+  use fast3d
+  use tensor
+  implicit none
+  
+  character(len=NEKO_FNAME_LEN) :: inputchar, field_fname, hom_dir, output_fname
+  type(file_t) :: field_file, output_file
+  real(kind=rp) :: x_equid
+  real(kind=rp), allocatable :: wt(:,:), wtt(:,:), ident(:,:)
+  type(fld_file_data_t) :: field_data
+  type(space_t) :: Xh
+  type(vector_ptr_t), allocatable :: fields(:)
+  integer :: argc, i, lx, j, file_precision
+  logical :: dp_precision
+  
+  argc = command_argument_count()
+
+  if ((argc .lt. 4) .or. (argc .gt. 4)) then
+     if (pe_rank .eq. 0) then
+        write(*,*) 'Usage: ./map_to_equidistant_1d field.fld dir(x, y, z) outfield.fld precision' 
+        write(*,*) 'Example command: ./map_to_equidistant_1d fieldblabla.fld x outfield.fld .true.'
+        write(*,*) 'Redistribute the points to be equidistant in elements '
+        write(*,*) 'in x of the field fieldblabla.nek5000 and stores in outfield.fld, both are double precision'
+     end if
+     stop
+  end if
+  
+  call neko_init 
+
+  call get_command_argument(1, inputchar) 
+  read(inputchar, fmt='(A)') field_fname
+  call get_command_argument(2, inputchar) 
+  read(inputchar, *) hom_dir
+  call get_command_argument(3, inputchar) 
+  read(inputchar, fmt='(A)') output_fname
+  call get_command_argument(4, inputchar) 
+  read(inputchar, *) dp_precision
+
+  if (dp_precision) then
+     file_precision = dp
+  else
+     file_precision = sp
+  end if
+
+  field_file = file_t(trim(field_fname),precision=file_precision)
+
+  if (trim(hom_dir) .ne. 'x' .and. trim(hom_dir) .ne. 'y' .and. trim(hom_dir) .ne. 'z') then
+     call neko_error('The homogenous direction should be "x", "y" or "z"')
+  end if
+  
+  call field_data%init()
+  if (pe_rank .eq. 0) write(*,*) 'Reading file:', 1
+  call field_file%read(field_data)
+
+  lx = field_data%lx
+
+  call Xh%init(GLL, field_data%lx, field_data%ly, field_data%lz)
+
+  ! construct the weight matrix
+  allocate(wt(lx,lx))
+  allocate(wtt(lx,lx))
+  allocate(ident(lx,lx))
+  ident = 0.0_rp
+  do i = 1, lx
+    x_equid = -1.0_rp + (i-1) * 2.0_rp/(lx-1)
+    call fd_weights_full(x_equid, Xh%zg(:,1), lx-1, 0, wtt(:,i))
+    wt(i,:) = wtt(:,i)
+    ident(i,i) = 1.0_rp
+  end do
+
+  ! redistribute the coordinates to be equidistant within elements
+  if (trim(hom_dir) .eq. 'x') then
+    call tnsr1_3d(field_data%x%x, lx, lx, wt, ident, ident, field_data%nelv)
+  else if (trim(hom_dir) .eq. 'y') then
+    call tnsr1_3d(field_data%y%x, lx, lx, ident, wtt, ident, field_data%nelv)
+  else if (trim(hom_dir) .eq. 'z') then
+    call tnsr1_3d(field_data%z%x, lx, lx, ident, ident, wtt, field_data%nelv)
+  end if
+
+  ! interpolate the field at t=0
+  allocate(fields(field_data%size()))
+  call field_data%get_list(fields,field_data%size())
+  do i = 1, field_data%size()
+     if (trim(hom_dir) .eq. 'x') then
+        call tnsr1_3d(fields(i)%v%x, lx, lx, wt, ident, ident, field_data%nelv)
+     else if (trim(hom_dir) .eq. 'y') then
+        call tnsr1_3d(fields(i)%v%x, lx, lx, ident, wtt, ident, field_data%nelv)
+     else if (trim(hom_dir) .eq. 'z') then
+        call tnsr1_3d(fields(i)%v%x, lx, lx, ident, ident, wtt, field_data%nelv)
+     end if
+  end do 
+
+  ! output at t=0  
+  output_file = file_t(trim(output_fname),precision=file_precision)
+  call output_file%write(field_data, field_data%time)
+
+  ! interpolate field for t>0
+  do i = 1, field_data%meta_nsamples-1
+     if (pe_rank .eq. 0) write(*,*) 'Reading file:', i+1
+     call field_file%read(field_data)
+     call field_data%get_list(fields,field_data%size())
+     do j = 1, field_data%size()
+        if (trim(hom_dir) .eq. 'x') then
+           call tnsr1_3d(fields(j)%v%x, lx, lx, wt, ident, ident, field_data%nelv)
+        else if (trim(hom_dir) .eq. 'y') then
+           call tnsr1_3d(fields(j)%v%x, lx, lx, ident, wtt, ident, field_data%nelv)
+        else if (trim(hom_dir) .eq. 'z') then
+           call tnsr1_3d(fields(j)%v%x, lx, lx, ident, ident, wtt, field_data%nelv)
+        end if
+     end do 
+     ! output for t>0
+     call output_file%write(field_data, field_data%time)
+  end do
+  
+  
+  if (pe_rank .eq. 0) write(*,*) 'Done'
+  
+  call neko_finalize
+
+end program map_to_equidistant_1d
diff --git a/contrib/pyneko/Makefile.am b/contrib/pyneko/Makefile.am
index f2607f00531..88c66db51cc 100644
--- a/contrib/pyneko/Makefile.am
+++ b/contrib/pyneko/Makefile.am
@@ -3,5 +3,4 @@ ACLOCAL_AMFLAGS = -I m4 --install
 SUBDIRS = src
 
 nobase_python_PYTHON = pyNeko/__init__.py\
-		       pyNeko/case.py\
                        pyNeko/intf.py
diff --git a/contrib/pyneko/examples/hemi/hemi.py b/contrib/pyneko/examples/hemi/hemi.py
index a918c0d2702..92614845089 100644
--- a/contrib/pyneko/examples/hemi/hemi.py
+++ b/contrib/pyneko/examples/hemi/hemi.py
@@ -1,14 +1,19 @@
 import pyNeko
+import json
 
 # Initialize Neko
 pyNeko.init()
 
 # Create a json case
-hemi_case = pyNeko.create_case(mesh_file="hemi.nmsh", lx=6, T_end=1e-3)
+hemi_case = json.load(open('hemi.case'))
 
 # Solve the case
 pyNeko.solve(hemi_case)
 
-# Increase sample rate and rerun to get some data
-hemi_case['parameters']['nsamples'] = 1
+# Increase sample rate and rerun to get more data
+hemi_case['case']['nsamples'] = 2
 pyNeko.solve(hemi_case)
+
+
+# Finalize Neko
+pyNeko.finalize()
diff --git a/contrib/pyneko/pyNeko/__init__.py b/contrib/pyneko/pyNeko/__init__.py
index 847049f7c69..a4c85f305ed 100644
--- a/contrib/pyneko/pyNeko/__init__.py
+++ b/contrib/pyneko/pyNeko/__init__.py
@@ -1,4 +1,3 @@
 """ A portable framework for high-order spectral element flow simulations """
 
-from pyNeko.case import create_case
 from pyNeko.intf import init, finalize, solve
diff --git a/contrib/pyneko/pyNeko/case.py b/contrib/pyneko/pyNeko/case.py
deleted file mode 100644
index 3a54bc3cb3b..00000000000
--- a/contrib/pyneko/pyNeko/case.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import json
-
-def create_case(mesh_file="", 
-                lx=4, 
-                source_term="noforce",
-                initial_condition="uniform",
-                nsamples=0, 
-                dt=0.001,                
-                T_end=0.0,
-                uinf=[1.0,0.0,0.0]) :
-    default = {
-        "case" : {
-            "mesh_file" : mesh_file,
-            "fluid_scheme" : "pnpn",
-            "lx" : lx,
-            "source_term" : source_term,
-            "initial_condition" : initial_condition
-        },
-        "parameters" : {
-            "nsamples" :  nsamples,        
-            "dt" : dt,
-            "T_end" : T_end,
-            "uinf" : uinf,
-            "ksp_vel" : {
-                "type" : "cg",
-                "pc" : "jacobi",
-                "abstol" : 1e-09
-                
-            },
-            "ksp_prs": {
-                "type" : "gmres",
-                "pc" : "hsmg",
-                "abstol" : 1e-09
-            }
-        }
-    }
-    return json.loads(json.dumps(default))
-
-
diff --git a/contrib/pyneko/src/Makefile.am b/contrib/pyneko/src/Makefile.am
index f8e3f2ec53f..a3c0a6dbd72 100644
--- a/contrib/pyneko/src/Makefile.am
+++ b/contrib/pyneko/src/Makefile.am
@@ -1,4 +1,3 @@
 
 lib_LTLIBRARIES = libnekointf.la
-libnekointf_la_SOURCES = json_case.f90\
-			 neko_intf.f90
+libnekointf_la_SOURCES = neko_intf.f90
diff --git a/contrib/pyneko/src/json_case.f90 b/contrib/pyneko/src/json_case.f90
deleted file mode 100644
index d2ae3bf9442..00000000000
--- a/contrib/pyneko/src/json_case.f90
+++ /dev/null
@@ -1,263 +0,0 @@
-! Copyright (c) 2022, The Neko Authors
-! All rights reserved.
-!
-! Redistribution and use in source and binary forms, with or without
-! modification, are permitted provided that the following conditions
-! are met:
-!
-!   * Redistributions of source code must retain the above copyright
-!     notice, this list of conditions and the following disclaimer.
-!
-!   * Redistributions in binary form must reproduce the above
-!     copyright notice, this list of conditions and the following
-!     disclaimer in the documentation and/or other materials provided
-!     with the distribution.
-!
-!   * Neither the name of the authors nor the names of its
-!     contributors may be used to endorse or promote products derived
-!     from this software without specific prior written permission.
-!
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-! POSSIBILITY OF SUCH DAMAGE.
-!
-module json_case
-  use neko
-  use json_module
-  implicit none
-
-contains
-  
-  subroutine json_case_create_neko_case(neko_case, json_case)
-    type(case_t), intent(inout) :: neko_case
-    type(json_file) :: json_case
-    type(json_core) :: jcore
-    type(json_value), pointer :: json_param, p
-    character(len=:), allocatable :: mesh_file
-    character(len=:), allocatable :: fluid_scheme
-    character(len=:), allocatable :: source_term
-    character(len=:), allocatable :: initial_condition
-    integer :: lx
-    logical found_key
-    type(file_t) :: msh_file
-
-
-
-    call neko_log%section('Case')
-    call neko_log%message('Reading case from pyNeko json file')
-
-    ! Fill default parameters 
-    call param_default(neko_case%params)
-    call json_case%get_core(jcore)
-    call json_case%get('parameters', json_param, found_key)
-    if (found_key) then
-       call json_case_create_neko_params(neko_case%params, json_param, jcore)
-    else
-       call neko_warning('No parameter block found')
-    end if
-
-    ! Read case description
-
-    call json_case%get('case.mesh_file', mesh_file, found_key)
-    if (.not. found_key) then
-       call neko_error('Key mesh_file not found')
-    else
-       msh_file = file_t(trim(mesh_file))
-       call msh_file%read(neko_case%msh)
-    end if
-
-    call neko_case%usr%init()
-    call neko_case%usr%usr_msh_setup(neko_case%msh)
-
-    call json_case%get('case.lx', lx, found_key)
-    if (.not. found_key) then
-       call neko_error('lx not defined')
-    end if
-
-    call json_case%get('case.fluid_scheme', fluid_scheme, found_key)
-    if (.not. found_key) then
-       call neko_error('Fluid scheme not defined')
-    else       
-       call fluid_scheme_factory(neko_case%fluid, trim(fluid_scheme))
-       call neko_case%fluid%init(neko_case%msh, lx, neko_case%params)
-    end if
-
-    call json_case%get('case.source_term', source_term, found_key)
-    if (.not. found_key) then
-       call neko_error('Source term not defined')
-    else
-       call neko_case%fluid%set_source(trim(source_term))
-    end if
-
-    call json_case%get('case.initial_condition', initial_condition, found_key)
-    if (.not. found_key) then
-       call neko_error('Intiail condition not defined')
-    else       
-       call set_flow_ic(neko_case%fluid%u, neko_case%fluid%v, neko_case%fluid%w,&
-       neko_case%fluid%p, neko_case%fluid%c_Xh, neko_case%fluid%gs_Xh,&
-       initial_condition, neko_case%params)
-    end if
-
-
-   select type(f => neko_case%fluid)
-    type is(fluid_pnpn_t)
-       call f%ulag%set(f%u)
-       call f%vlag%set(f%v)
-       call f%wlag%set(f%w)
-    end select
-
-    call neko_case%fluid%validate()
-
-    call neko_case%ab_bdf%set_time_order(neko_case%params%time_order)
-
-    call neko_case%s%init(neko_case%params%nsamples, neko_case%params%T_end)
-    neko_case%f_out = fluid_output_t(neko_case%fluid, &
-                                     path=neko_case%params%output_dir)
-    call neko_case%s%add(neko_case%f_out)
-
-    call neko_case%q%init(neko_case%params%stats_begin)
-    if (neko_case%params%stats_mean_flow) then
-       call neko_case%q%add(neko_case%fluid%mean%u)
-       call neko_case%q%add(neko_case%fluid%mean%v)
-       call neko_case%q%add(neko_case%fluid%mean%w)
-       call neko_case%q%add(neko_case%fluid%mean%p)
-
-       if (neko_case%params%output_mean_flow) then
-          neko_case%f_mf = mean_flow_output_t(neko_case%fluid%mean, &
-               neko_case%params%stats_begin, path=neko_case%params%output_dir)
-          call neko_case%s%add(neko_case%f_mf)
-       end if
-    end if
-
-    if (neko_case%params%stats_mean_sqr_flow) then
-       call neko_case%q%add(neko_case%fluid%mean_sqr%uu)
-       call neko_case%q%add(neko_case%fluid%mean_sqr%vv)
-       call neko_case%q%add(neko_case%fluid%mean_sqr%ww)
-       call neko_case%q%add(neko_case%fluid%mean_sqr%pp)
-
-       if (neko_case%params%output_mean_sqr_flow) then
-          neko_case%f_msqrf = mean_sqr_flow_output_t(neko_case%fluid%mean_sqr, &
-                                             neko_case%params%stats_begin, &
-                                             path=neko_case%params%output_dir)
-          call neko_case%s%add(neko_case%f_msqrf)
-       end if
-    end if
-
-    call jobctrl_set_time_limit(neko_case%params%jlimit)
-    
-    call neko_log%end_section()
-
-  end subroutine json_case_create_neko_case
-
-  subroutine json_case_create_neko_params(params, json_param, jcore)
-    type(param_t), intent(inout) :: params
-    type(json_value), pointer :: json_param
-    type(json_core)  :: jcore
-    type(json_value), pointer :: p
-    logical :: found_key
-    real(kind=rp), allocatable :: uinf(:)
-    character(len=:), allocatable :: str
-
-    call jcore%get_child(json_param, 'nsamples', p, found_key)
-    if (found_key) call jcore%get(p, params%nsamples)
-
-    call jcore%get_child(json_param, 'output_bdry', p, found_key)
-    if (found_key) call jcore%get(p, params%output_bdry)
-    call jcore%get_child(json_param, 'output_part', p, found_key)
-    if (found_key) call jcore%get(p, params%output_part)
-    call jcore%get_child(json_param, 'output_chkp', p, found_key)
-    if (found_key) call jcore%get(p, params%output_chkp)
-
-    call jcore%get_child(json_param, 'dt', p, found_key)
-    if (found_key) call jcore%get(p, params%dt)
-    call jcore%get_child(json_param, 'T_end', p, found_key)
-    if (found_key) call jcore%get(p, params%T_end)
-    call jcore%get_child(json_param, 'rho', p, found_key)
-    if (found_key) call jcore%get(p, params%rho)
-    call jcore%get_child(json_param, 'mu', p, found_key)
-    if (found_key) call jcore%get(p, params%mu)
-    call jcore%get_child(json_param, 'Re', p, found_key)
-    if (found_key) call jcore%get(p, params%Re)
-    call jcore%get_child(json_param, 'uinf', p, found_key)
-    if (found_key) then
-       call jcore%get(p, uinf)
-       params%uinf = uinf
-    end if
-
-
-    call jcore%get_child(json_param, 'ksp_prs.type', p, found_key)
-    if (found_key) then
-       call jcore%get(p, str)
-       params%ksp_prs = str
-       deallocate(str)
-    end if
-    
-    call jcore%get_child(json_param, 'ksp_prs.pc', p, found_key)
-    if (found_key) then
-       call jcore%get(p, str)
-       params%pc_prs = str
-       deallocate(str)
-    end if
-    call jcore%get_child(json_param, 'ksp_prs.abs_tol', p, found_key)
-    if (found_key) call jcore%get(p, params%abstol_prs)
-
-    call jcore%get_child(json_param, 'ksp_vel.type', p, found_key)
-    if (found_key) then
-       call jcore%get(p, str)
-       params%ksp_vel = str
-       deallocate(str)
-    end if
-    
-    call jcore%get_child(json_param, 'ksp_vel.pc', p, found_key)
-    if (found_key) then
-       call jcore%get(p, str)
-       params%pc_vel = str
-       deallocate(str)
-    end if
-    call jcore%get_child(json_param, 'ksp_vel.abs_tol', p, found_key)
-    if (found_key) call jcore%get(p, params%abstol_vel)
-
-    
-    call jcore%get_child(json_param, 'fluid_inflow', p, found_key)
-    if (found_key) then
-       call jcore%get(p, str)
-       params%fluid_inflow = str
-       deallocate(str)
-    end if
-
-    call jcore%get_child(json_param, 'vol_flow_dir', p, found_key)
-    if (found_key) call jcore%get(p, params%vol_flow_dir)
-
-    call jcore%get_child(json_param, 'proj_prs_dim', p, found_key)
-    if (found_key) call jcore%get(p, params%proj_prs_dim)
-    call jcore%get_child(json_param, 'proj_vel_dim', p, found_key)
-    if (found_key) call jcore%get(p, params%proj_vel_dim)
-
-    call jcore%get_child(json_param, 'stats.begin', p, found_key)
-    if (found_key) call jcore%get(p, params%stats_begin)
-    call jcore%get_child(json_param, 'stats.mean_flow', p, found_key)
-    if (found_key) call jcore%get(p, params%stats_mean_flow)
-    call jcore%get_child(json_param, 'stats.output_mean_flow', p, found_key)
-    if (found_key) call jcore%get(p, params%output_mean_flow)
-    call jcore%get_child(json_param, 'stats.mean_sqr_flow', p, found_key)
-    if (found_key) call jcore%get(p, params%stats_mean_flow)
-    call jcore%get_child(json_param, 'stats.output_mean_sqr_flow', p, found_key)
-    if (found_key) call jcore%get(p, params%output_mean_flow)
-
-    call jcore%get_child(json_param, 'dealias', p, found_key)
-    if (found_key) call jcore%get(p, params%dealias)
-    call jcore%get_child(json_param, 'lxd', p, found_key)
-    if (found_key) call jcore%get(p, params%lxd)
-    
-  end subroutine json_case_create_neko_params
-
-end module json_case
diff --git a/contrib/pyneko/src/neko_intf.f90 b/contrib/pyneko/src/neko_intf.f90
index 647d26af0cf..b5e5bb7186d 100644
--- a/contrib/pyneko/src/neko_intf.f90
+++ b/contrib/pyneko/src/neko_intf.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2022, The Neko Authors
+! Copyright (c) 2022-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,6 @@
 !
 module neko_intf
   use neko
-  use json_case
   use json_module
   use, intrinsic :: iso_c_binding
   implicit none
@@ -44,34 +43,70 @@ subroutine neko_intf_init() bind(c, name='init')
     character(len=LOG_SIZE) :: log_buf
     character(10) :: time
     character(8) :: date
+    integer :: nthrds, rw, sw
 
     call neko_init()
 
-    call date_and_time(time=time, date=date)           
-    call neko_log%section("Session Information")       
+    call date_and_time(time=time, date=date)
+    call neko_log%section("Session Information")
     write(log_buf, '(A,A,A,A,1x,A,1x,A,A,A,A,A)') 'Start time: ',&
          time(1:2),':',time(3:4), '/', date(1:4),'-', date(5:6),'-',date(7:8)
-    call neko_log%message(log_buf)
+    call neko_log%message(log_buf, NEKO_LOG_QUIET)
     write(log_buf, '(a)') 'Running on: '
+    sw = 10
     if (pe_size .lt. 1e1)  then
        write(log_buf(13:), '(i1,a)') pe_size, ' MPI '
        if (pe_size .eq. 1) then
           write(log_buf(19:), '(a)') 'rank'
+          sw = 9
        else
           write(log_buf(19:), '(a)') 'ranks'
        end if
+       rw = 1
     else if (pe_size .lt. 1e2) then
        write(log_buf(13:), '(i2,a)') pe_size, ' MPI ranks'
+       rw = 2
     else if (pe_size .lt. 1e3) then
        write(log_buf(13:), '(i3,a)') pe_size, ' MPI ranks'
+       rw = 3
     else if (pe_size .lt. 1e4) then
        write(log_buf(13:), '(i4,a)') pe_size, ' MPI ranks'
+       rw = 4
     else if (pe_size .lt. 1e5) then
        write(log_buf(13:), '(i5,a)') pe_size, ' MPI ranks'
+       rw = 5
     else
        write(log_buf(13:), '(i6,a)') pe_size, ' MPI ranks'
+       rw = 6
     end if
-    call neko_log%message(log_buf)
+
+    nthrds = 1
+    !$omp parallel
+    !$omp master
+    !$ nthrds = omp_get_num_threads()
+    !$omp end master
+    !$omp end parallel
+
+    if (nthrds .gt. 1) then
+       if (nthrds .lt. 1e1) then
+          write(log_buf(13 + rw + sw:), '(a,i1,a)') ', using ', &
+               nthrds, ' thrds each'
+       else if (nthrds .lt. 1e2) then
+          write(log_buf(13 + rw + sw:), '(a,i2,a)') ', using ', &
+               nthrds, ' thrds each'
+       else if (nthrds .lt. 1e3) then
+          write(log_buf(13 + rw + sw:), '(a,i3,a)') ', using ', &
+               nthrds, ' thrds each'
+       else if (nthrds .lt. 1e4) then
+          write(log_buf(13 + rw + sw:), '(a,i4,a)') ', using ', &
+               nthrds, ' thrds each'
+       end if
+    end if
+    call neko_log%message(log_buf, NEKO_LOG_QUIET)
+
+    write(log_buf, '(a)') 'CPU type  : '
+    call system_cpu_name(log_buf(13:))
+    call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
     write(log_buf, '(a)') 'Bcknd type: '
     if (NEKO_BCKND_SX .eq. 1) then
@@ -87,13 +122,13 @@ subroutine neko_intf_init() bind(c, name='init')
     else
        write(log_buf(13:), '(a)') 'CPU'
     end if
-    call neko_log%message(log_buf)
+    call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
     if (NEKO_BCKND_HIP .eq. 1 .or. NEKO_BCKND_CUDA .eq. 1 .or. &
          NEKO_BCKND_OPENCL .eq. 1) then
        write(log_buf, '(a)') 'Dev. name : '
        call device_name(log_buf(13:))
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
     end if
 
     write(log_buf, '(a)') 'Real type : '
@@ -105,10 +140,12 @@ subroutine neko_intf_init() bind(c, name='init')
     case (real128)
        write(log_buf(13:), '(a)') 'quad precision'
     end select
-    call neko_log%message(log_buf)       
+    call neko_log%message(log_buf, NEKO_LOG_QUIET)
+
     call neko_log%end()
-    call neko_log%newline
 
+    call neko_log%newline
+    
   end subroutine neko_intf_init
 
   subroutine neko_intf_finalize() bind(c, name="finalize")
@@ -120,12 +157,10 @@ subroutine neko_intf_solve(pyneko_case, ilen) bind(c, name="solve")
     integer(c_int), value :: ilen
     character(len=:), allocatable :: fpyneko_case
     type(json_file) :: json_case
-    type(case_t) :: neko_case
-    
-    
+    type(case_t), target :: neko_case
 
     if (c_associated(pyneko_case)) then
-       block    
+       block
          character(kind=c_char,len=ilen+1),pointer :: s
          call c_f_pointer(pyneko_case, s)
          fpyneko_case = s(1:ilen)
@@ -134,14 +169,19 @@ subroutine neko_intf_solve(pyneko_case, ilen) bind(c, name="solve")
          nullify(s)
        end block
     end if
-    
-    call json_case_create_neko_case(neko_case, json_case)
-    call json_case%destroy()
 
+    call case_init(neko_case, json_case)
+    call json_case%destroy()
+    
     call neko_solve(neko_case)
 
     call case_free(neko_case)
 
+    !> @todo Add a clean method
+    call neko_field_registry%free()
+    call neko_field_registry%init()
+
+
   end subroutine neko_intf_solve
 
 end module neko_intf
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
index c5c0ed8c98a..b2a79d568c5 100644
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@@ -758,19 +758,7 @@ WARN_LOGFILE           =
 # spaces.
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                 = @top_srcdir@/doc/pages/index.md\
-                        @top_srcdir@/doc/pages/code-style.md\
-                        @top_srcdir@/doc/pages/governing-equations.md\
-                        @top_srcdir@/doc/pages/case-file.md\
-                        @top_srcdir@/doc/pages/point-zones.md\
-                        @top_srcdir@/doc/pages/simcomps.md\
-                        @top_srcdir@/doc/pages/accelerators.md\
-                        @top_srcdir@/doc/pages/testing.md\
-                        @top_srcdir@/doc/pages/publication-list.md\
-                        @top_srcdir@/doc/pages/appendix.md\
-                        @top_srcdir@/doc/pages/statistics.md\
-                        @top_srcdir@/doc/pages/installation.md\
-                        @top_srcdir@/doc/pages/dev_patterns.md\
+INPUT                 = @top_srcdir@/doc/pages\
                         @top_srcdir@/src\
                         @top_srcdir@/contrib/rea2nbin\
                         @top_srcdir@/contrib/average_fields_in_time\
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 00281459a96..14a4352ce09 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -11,19 +11,27 @@ doxygen.stamp: Doxyfile $(INPUT)
 CLEANFILES = doxygen.stamp
 
 clean-local:
-	rm -fr html 
+	rm -fr html
 
 html-local: doxygen.stamp
 
-EXTRA_DIST = pages/code-style.md\
-	     pages/publication-list.md\
-	     pages/accelerators.md\
-	     pages/case-file.md\
-	     pages/statistics.md\
-             pages/simcomps.md\
-             pages/point-zones.md\
-             pages/testing.md\
-             pages/appendix.md\
-             pages/installation.md\
-             pages/dev_patterns.md\
-	     pages/index.md
+EXTRA_DIST = pages/appendices/governing-equations.md \
+			pages/appendices/publications.md \
+			pages/developer-guide \
+			pages/developer-guide/accelerators.md \
+			pages/developer-guide/code-style.md \
+			pages/developer-guide/contributing.md \
+			pages/developer-guide/dev_patterns.md \
+			pages/developer-guide/testing.md \
+			pages/user-guide \
+			pages/user-guide/case-file.md \
+			pages/user-guide/installation.md \
+			pages/user-guide/io.md \
+			pages/user-guide/point-zones.md \
+			pages/user-guide/simcomps.md \
+			pages/user-guide/statistics-guide.md \
+			pages/user-guide/user-file.md \
+			pages/appendices.md \
+			pages/developer-guide.md \
+			pages/index.md \
+			pages/user-guide.md
diff --git a/doc/pages/appendices.md b/doc/pages/appendices.md
new file mode 100644
index 00000000000..25ae8e7a50c
--- /dev/null
+++ b/doc/pages/appendices.md
@@ -0,0 +1,27 @@
+# Appendices {#appendices}
+
+\tableofcontents
+
+The appendices contain a few extra pages that are not directly related to the usage
+of the code. But can be useful for users and developers alike.
+
+- [Environmental variable reference](@ref appendices_env-var)
+- \subpage governing-equations
+- \subpage publications
+
+## Environmental variable reference {#appendices_env-var}
+
+| Name             | Description                                                 | Default value |
+| ---------------- | ----------------------------------------------------------- | ------------- |
+| `NEKO_AUTOTUNE`  | Force Ax auto-tuning strategy (``'1D'``,``'KSTEP'``)        | Unset         |
+| `NEKO_LOG_LEVEL` | Log verbosity level (integer > 0, default: 1)               | Unset         |
+| `NEKO_GS_STRTGY` | Gather-scatter device MPI sync. strategy (0 < integer < 5 ) | Unset         |
+
+### Logging level details
+
+A number of logging levels are supported.
+
+- `NEKO_LOG_LEVEL=0`   : Quiet mode, minimal logging during execution.
+- `NEKO_LOG_LEVEL=1`   : Default information mode, adding step informations.
+- `NEKO_LOG_LEVEL=2`   : Verbose mode, logging extra details.
+- `NEKO_LOG_LEVEL=10`  : Debug mode.
diff --git a/doc/pages/governing-equations.md b/doc/pages/appendices/governing-equations.md
similarity index 98%
rename from doc/pages/governing-equations.md
rename to doc/pages/appendices/governing-equations.md
index 3b9093a643d..a35676a1a80 100644
--- a/doc/pages/governing-equations.md
+++ b/doc/pages/appendices/governing-equations.md
@@ -1,5 +1,7 @@
 # Governing equations {#governing-equations}
 
+\tableofcontents
+
 ## Fluid
 
 Neko's solves the Navier-Stokes equations formulated as follows.
diff --git a/doc/pages/publication-list.md b/doc/pages/appendices/publications.md
similarity index 74%
rename from doc/pages/publication-list.md
rename to doc/pages/appendices/publications.md
index b4b53254ab7..490a21ebabc 100644
--- a/doc/pages/publication-list.md
+++ b/doc/pages/appendices/publications.md
@@ -1,8 +1,8 @@
-# Publication List {#publication-list}
+# Publications {#publications}
 
-* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2021. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. arXiv preprint arXiv:2107.01243.
 * Jansson, N., 2021. *Spectral Element Simulations on the NEC SX-Aurora TSUBASA*. In proc. HPCAsia 2021.
 * Karp, M., Podobas, A., Kenter, T., Jansson, N., Plessl, C., Schlatter, P. and Markidis, S., 2022. *A high-fidelity flow solver for unstructured meshes on field-programmable gate arrays: Design, evaluation, and future challenges*. In proc. HPCAsia 2022.
 * Karp, M., Jansson, N., Podobas, A., Schlatter, P., and Markidis, S., 2022. *Reducing Communication in the Conjugate Gradient Method: A Case Study on High-Order Finite Elements*. In proc. PASC 2022.
-* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2022. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. arXiv preprint arXiv:2207:07098.
+* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2023. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. The International Journal of High Performance Computing Applications, 37, 5.
 * Jansson, N., Karp, M., Perez, A., Mukha, T., Ju, Y., Liu, J., Páll, S., Laure, E., Weinkauf, T., Schumacher, J., Schlatter, P., Markidis, S., 2023. *Exploring the Ultimate Regime of Turbulent Rayleigh–Bénard Convection Through Unprecedented Spectral-Element Simulations*. SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis.
+* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2024. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. Computer & Fluids, 275.
diff --git a/doc/pages/appendix.md b/doc/pages/appendix.md
deleted file mode 100644
index cfb9a5fcf3e..00000000000
--- a/doc/pages/appendix.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Appendix {#appendix}
-
-## Environmental variable reference {#env-var}
-
-Name                    | Description                                                   | Default value
-----                    | -----------                                                   | -------------
-`NEKO_AUTOTUNE`         | Force Ax auto-tuning strategy (``'1D'``,``'KSTEP'``)          | Unset
-`NEKO_LOG_LEVEL`        | Log verbosity level (integer > 0, default: 1)                 | Unset
-`NEKO_GS_STRTGY`        | Gather-scatter device MPI sync. strategy (0 < integer < 5 )   | Unset
diff --git a/doc/pages/case-file.md b/doc/pages/case-file.md
deleted file mode 100644
index 59b6ab19e0d..00000000000
--- a/doc/pages/case-file.md
+++ /dev/null
@@ -1,338 +0,0 @@
-# Case File {#case-file}
-
-The case file defines all the parameters of a simulation.
-The format of the file is JSON, making it easy to read and write case files
-using the majority of the popular programming languages.
-JSON is heirarchical and, and consists of parameter blocks enclosed in curly
-braces.
-These blocks are referred to as objects.
-The case file makes use objects to separate the configuration of different parts
- of the solver.
-We refer the reader to the examples shipped with the code to get a good
-idea of how a case file looks.
-The table below provides a complete reference for all possible configuration
-choices.
-
-## High-level structure
-The current high-level structure of the case file is shown below.
-
-~~~~~~~~~~~~~~~{.json}
-{
-    "version": 1.0
-    "case": {
-        "numerics": {}
-        "fluid": {}
-        "scalar": {}
-        "statistics": {}
-        "simulation_components" : []
-        "point_zones" : []
-    }
-}
-~~~~~~~~~~~~~~~
-The `version` keywords is reserved to track changes in the format of the file. 
-The the subsections below we list all the configuration options for each of the high-level objects. 
-Some parameters will have default values, and are therefore optional. 
-
-## Output frequency control
-A common scheme for controlling the output frequency is applied for various
-outputs.
-It is described already now in order to clarify the meaning of several
-parameters found in the tables below.
-
-The frequency is controlled by two paramters, ending with `_control` and 
-`_value`, respectively.
-The latter name is perhaps not ideal, but it is somewhat difficult to come up
-with a good one, suggestions are welcome.
-
-The `_value` parameter is a number, that defines the output frequency, but the
-interpretation of that number depends on the choice of `_control`.
-The three following options are possible.
-1. `simulationtime`, then `_value` is the time interval between the outputs.
-2. `tsteps`, then `_value` is the number of time steps between the outputs.
-3. `nsamples`, then `_value` is the total number of outputs that will be
-   performed in the course of the simulation.
-4. `never`, then `_value` is ignored and output is never performed.
-
-
-## The case object 
-
-This object is mostly used as a high-level container for all the other objects,
-but also defines several parameters that pertain to the simulation as a whole.
-
-Name                 | Description                                                    | Admissable values                         | Default value 
------                | -----------                                                    | -----------------                         | -------------
-`mesh_file`          | The name of the mesh file.                                                                            | Strings ending with `.nmsh`               | -  
-`output_boundary`    | Whether to write a `bdry0.f0000` file with boundary labels. Can be used to check boundary conditions. | `true` or `false`                         | `false`       
-`output_directory`   | Folder for redirecting solver output. Note that the folder has to exist!                              | Path to an existing directory             | `.` 
-`load_balancing`     | Whether to apply load balancing.                                                                      | `true` or `false`                         | `false` 
-`output_partitions`  | Whether to write a `partitions.vtk` file with domain partitioning.                                    | `true` or `false`                         | `false` 
-`output_checkpoints` | Whether to output checkpoints, i.e. restart files.                                                    | `true` or `false`                         | `false` 
-`checkpoint_control` | Defines the interpretation of `checkpoint_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | -  
-`checkpoint_value` | The frequency of sampling in terms of `checkpoint_control`. | Positive real or integer | -  
-`restart_file` | checkpoint to use for a restart from previous data | Strings ending with `.chkp` | -  
-`time_step`          | Time-step size.                                                                                       | Positive reals                            | -  
-`end_time`           | Final time at which the simulation is stopped.                                                        | Positive reals                            | -   
-`job_timelimit`      | The maximum wall clock duration of the simulation.                                                    | String formatted as HH:MM:SS              | No limit 
-
-## Numerics
-Used to define the properties of the numerical discretization.
-
-Name                 | Description                                                        | Admissable values                         | Default value  
-----                 | -----------                                                        | -----------------                         | -------------
-`polynomial_order`   | The oder of the polynomial basis.                           | Integers, typically 5 to 9               | -             |
-`time_order`    | The order of the time integration scheme. Refer to the `time_scheme_controller` type documention for details. | 1,2, 3                         | -    
-`dealias`    | Whether to apply dealiasing to advection terms. | `true` or `false`                         | `false`     
-`dealiased_polynomial order`    | The polynomial order in the higher-order space used in the dealising. | Integer                         | `3/2(polynomial_order + 1) - 1`    
-
-## Fluid
-
-The configuration of the fluid solver and the flow problem.
-Contains multiple subobjects for various parts of the setup.
-
-### Material properties
-As per the governing equations, Neko requires the value of the density and
-dynamic viscosity to define the flow problem. These can be provided as `rho` and
-`mu` in the case file.
-
-Alternatively, one may opt to provide the Reynolds number, `Re`, which
-corresponds to a non-dimensional formulation of the Navier-Stokes equations.
-This formulation can effectively be obtained by setting \f$ \rho = 1 \f$ and \f$
-\mu = 1/Re \f$. This is exactly what Neko does under the hood, when `Re` is
-provided in the case file. 
-
-Note that if both `Re` and any of the dimensional material properties are
-provided, the simulation will issue an error.
-
-As an alternative to providing material properties in the case file, it is
-possible to do that in a special routine in the user file. This is demonstrated
-in the `rayleigh-benard-cylinder` example. Ultimately, both `rho` and `mu` have
-to be set in the subroutine, but it can be based on arbitrary computations and
-arbitrary parameters read from the case file. Additionally, this allows to
-change the material properties in time.
-
-
-### Inflow boundary conditions
-The object `inflow_condition` is used to specify velocity values at a Dirichlet
-boundary.
-This does not necessarily have to be an inflow boundary, so the name is not so
-good, and will most likely be changed along with type changes in the code.
-Since not all cases have Dirichlet boundaries (note, the special case of a 
-no-slip boundary is treated separately in the configuration), this object
-is not obligatory.
-The means of prescribing the values are controlled via the `type` keyword:
-
-1. `user`, the values are set inside the compiled user file.
-2. `uniform`, the value is a constant vector, looked up under the `value`
-   keyword.
-3. `blasius`, a Blasius profile is prescribed. Its properties are looked up
-   in the `case.fluid.blasius` object, see below.
-
-### Initial conditions
-The object `initial_condition` is used to provide initial conditions.
-It is mandatory.
-Note that this currently pertains to both the fluid, but also scalars.
-The means of prescribing the values are controlled via the `type` keyword:
-
-1. `user`, the values are set inside the compiled user file. The only way to
-   initialize scalars.
-2. `uniform`, the value is a constant vector, looked up under the `value`
-   keyword.
-3. `blasius`, a Blasius profile is prescribed. Its properties are looked up
-   in the `case.fluid.blasius` object, see below.
-
-### Blasius profile
-The `blasius` object is used to specify the Blasius profile that can be used for the 
-initial and inflow condition.
-The boundary cannot be tilted with respect to the coordinate axes.
-It requires  the following parameters:
-
-1. `delta`, the thickness of the boundary layer.
-2. `freestream_velocity`, the velocity value in the free stream.
-3. `approximation`, the numerical approximation to the Blasius profile.
-   - `linear`, linear approximation.
-   - `quadratic`, quadratic approximation.
-   - `cubic`, cubic approximation.
-   - `quartic`, quartic approximation.
-   - `sin`, sine function approximation.
-
-### Source terms
-The `source_terms` object should be used to specify the source terms in the
-momentum equation. The object is not mandatory, by default no forcing term is
-present. Each source term, is itself a JSON object, so `source_terms` is just an
-array of them. Note that with respect to the governing equations, the source
-terms define \f$ f^u \f$, meaning that the values are then multiplied by the
-density.
-
-For each source, the `type` keyword defines the kind of forcing that will be
-introduced. The following types are currently implemented.
-
-1. `constant`, constant forcing. Strength defined by the `values` array with 3
-   reals corresponding to the 3 components of the forcing. 
-2. `user_pointwise`, the values are set inside the compiled user file, using the
-   pointwise user file subroutine. Only works on CPUs!
-2. `user_vector`, the values are set inside the compiled user file, using the 
-   non-pointwise user file subroutine. Should be used when running on the GPU.
-   
-
-### Boundary types
-The optional `boundary_types` keyword can be used to specify boundary conditions.
-The reason for it being optional, is that some conditions can be specified
-directly inside the mesh file.
-In particular, this happens when Nek5000 `.re2` files are converted to `.nmsh`.
-Periodic boundary conditions are *always* defined inside the mesh file.
-
-The value of the keyword is an array of strings, with the following possible
-values:
-* `w`, a no-slip wall.
-* `v`, a Dirichlet boundary.
-* `sym`, a symmetry boundary.
-* `on`, Dirichlet for the boundary-parallel velocity and homogeneous Neumann for
-   the wall-normal. The wall-parallel velocity is defined by the initial
-   condition. 
-* `o`, outlet boundary. 
-* `o+dong`, outlet boundary using the Dong condition. 
-* `on+dong`, an `on` boundary using the Dong condition, ensuring that the
-   wall-normal velocity is directed outwards. 
-
-In some cases, only some boundary types have to be provided.
-For example, when one has periodic boundaries, like in the channel flow example. 
-In this case, to put the specification of the boundary at the right index,
-preceding boundary types can be marked with an empty string.
-For example, if boundaries with index 1 and 2 are periodic, and the third one is
-a wall, we can set.
-```
-"boundary_types": ["", "", "w"]
-```
-
-## Linear solver configuration
-The mandatory `velocity_solver` and `pressure_solver` objects are used to 
-configure the solvers for the momentum and pressure-Poisson equation.
-The following keywords are used, with the corresponding options.
-
-* `type`, solver type.
-  - `cg`, a conjugate gradient solver.
-  - `pipecg`, a pipelined conjugate gradient solver.
-  - `bicgstab`, a bi-conjugate gradient stabilized solver.
-  - `cacg`, a communication-avoiding conjugate gradient solver.
-  - `gmres`, a GMRES solver. Typically used for pressure.
-* `preconditioner`, preconditioner type.
-  - `jacobi`, a Jacobi preconditioner. Typically used for velocity.
-  - `hsmg`, a hybrid-Schwarz multigrid preconditioner. Typically used for pressure.
-  - `ident`, an identity matrix (no preconditioner).
-* `absolute_tolerance`, tolerance criterion for convergence.
-* `max_iterations`, maximum number of iterations before giving up.
-* `projection_space_size`, size of the vector space used for accelerating the
-   solution procedure. If 0, then the projection space is not used.
-   More important for the pressure equation.
-
-### Flow rate forcing
-The optional `flow_rate_force` object can be used to force a particular flow
-rate through the domain.
-Useful for channel and pipe flows.
-The configuration uses the following parameters:
-
-* `direction`, the direction of the flow, defined as 0, 1, or 2, corresponding
-  to x, y or z, respectively.
-* `value`, the desired flow rate.
-* `use_averaged_flow`, whether `value` specifies the domain-averaged (bulk) 
-   velocity or the volume flow rate.
-
-
-### Full parameter table
-All the parameters are summarized in the table below.
-This includes all the subobjects discussed above, as well as keyword parameters
-that can be described concisely directly in the table.
-
-Name                                    | Description                                                                      | Admissable values                                | Default value
-----------------------------------------|----------------------------------------------------------------------------------|--------------------------------------------------|--------------
-`scheme`                                | The fluid solve type.                                                            | `pnpn`                                           | -
-`Re`                                    | The Reynolds number.                                                             | Positive real                                    | -
-`rho`                                   | The density of the fluid.                                                        | Positive real                                    | -
-`mu`                                    | The dynamic viscosity of the fluid.                                              | Positive real                                    | -
-`output_control` | Defines the interpretation of `output_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | -  
-`output_value` | The frequency of sampling in terms of `output_control`. | Positive real or integer | -  
-`inflow_condition.type`                 | Velocity inflow condition type.                                                  | `user`, `uniform`, `blasius`                     | -
-`inflow_condition.value`                | Value of the inflow velocity.                                                    | Vector of 3 reals                                | -
-`initial_condition.type`                | Initial condition type.                                                          | `user`, `uniform`, `blasius`                     | -
-`initial_condition.value`               | Value of the velocity initial condition.                                         | Vector of 3 reals                                | -
-`blasius.delta`                         | Boundary layer thickness in the Blasius profile.                                 | Positive real                                    | -
-`blasius.freestream_velocity`           | Freestream velocity in the Blasius profile.                                      | Vector of 3 reals                                | -
-`blasius.approximation`                 | Numerical approximation of the Blasius profile.                                  | `linear`, `quadratic`, `cubic`, `quartic`, `sin` | -
-`source_term.type`                      | Source term in the momentum equation.                                            | `noforce`, `user`, `user_vector`                 | -
-`boundary_types`                        | Boundary types/conditions labels.                                                | Array of strings                                 | -
-`velocity_solver.type`                  | Linear solver for the momentum equation.                                         | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres`      | -
-`velocity_solver.preconditioner`        | Linear solver preconditioner for the momentum equation.                          | `ident`, `hsmg`, `jacobi`                        | -
-`velocity_solver.absolute_tolerance`    | Linear solver convergence criterion for the momentum equation.                   | Positive real                                    | -
-`velocity_solver.maxiter`               | Linear solver max iteration count for the momentum equation.                     | Positive real                                    | 800
-`velocity_solver.projection_space_size` | Projection space size for the momentum equation.                                 | Positive integer                                 | 0
-`pressure_solver.type`                  | Linear solver for the momentum equation.                                         | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres`      | -
-`pressure_solver.preconditioner`        | Linear solver preconditioner for the momentum equation.                          | `ident`, `hsmg`, `jacobi`                        | -
-`pressure_solver.absolute_tolerance`    | Linear solver convergence criterion for the momentum equation.                   | Positive real                                    | -
-`pressure_solver.maxiter`               | Linear solver max iteration count for the momentum equation.                     | Positive real                                    | 800
-`pressure_solver.projection_space_size` | Projection space size for the momentum equation.                                 | Positive integer                                 | 0
-`flow_rate_force.direction`             | Direction of the forced flow.                                                    | 0, 1, 2                                          | -
-`flow_rate_force.value`                 | Bulk velocity or volumetric flow rate.                                           | Positive real                                    | -
-`flow_rate_force.use_averaged_flow`     | Whether bulk velocity or volumetric flow rate is given by the `value` parameter. | `true` or `false`                                | -            
-`freeze`                                | Whether to fix the velocity field at initial conditions.                          | `true` or `false`                                | `false`            
-
-## Scalar
-The scalar object allows to add a scalar transport equation to the solution.
-The solution variable is called `s`, but saved as `temperature` in the fld
- files. 
-Some properties of the object are inherited from `fluid`: the properties of the
-linear solver, the value of the density, and the output
-control.
-
-The scalar equation requires defining additional material properties: the
-specific heat capacity and thermal conductivity. These are provided as `cp` and
-`lambda`. Similarly to the fluid, one can provide the Peclet number, `Pe`, as an
-alternative. In this case, `cp` is set to 1 and `lambda` to the inverse of `Pe`. 
-
-The boundary conditions for the scalar are specified through the
-`boundary_types` keyword.
-It is possible to directly specify a uniform value for a Dirichlet boundary.
-The syntax is, e.g. `d=1`, to set the value to 1, see the Ryleigh-Benard
-example case.
-
-Note that the source term configuration for the scalar currently differs from
-that of the fluid. This will be addressed in a future release. For now, the
-`source_term` keyword should be used, set to either `noforce` (no forcing),
-`user` (same as `user_pointwise` for the fluid), and `user_vector` (same as for
-the fluid).
-
-Name               | Description                               | Admissable values                | Default value
--------------------|-------------------------------------------|----------------------------------|--------------
-`enabled`          | Whether to enable the scalar computation. | `true` or `false`                | `true`
-`Pe`               | The Peclet number.                        | Positive real                    | -
-`cp`               | Specific heat cpacity.                    | Positive real                    | -
-`lambda`           | Thermal conductivity.                     | Positive real                    | -
-`source_term.type` | Source term in the momentum equation.     | `noforce`, `user`, `user_vector` | -
-`boundary_types`   | Boundary types/conditions labels.         | Array of strings                 | -
-
-## Statistics
-
-This object adds the collection of statistics for the flud fields.
-For additional details on the workflow, see the corresponding page in the
-user manual.
-
-Name                | Description                                                          | Admissable values | Default value
---------------------|----------------------------------------------------------------------|-------------------|--------------
-`enabled`           | Whether to enable the statistics computation.                        | `true` or `false` | `true`
-`start_time`        | Time at which to start gathering statistics.                         | Positive real     | 0
-`sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer  | 10
-
-## Simulation components
-Simulation components enable the user to perform various additional operations,
-which are not strictly necessary to run the solver. An example could be
-computing and output of additional fields, e.g. vorticity.
-
-A more detailed description as well as a  full list of available components and
- their setup is provided in a [separate page of the manual](simcomps.md).
-
-## Point zones
-Point zones enable the user to select GLL points in the computational domain according to some geometric criterion. Two predefined geometric shapes are selectable from the case file, boxes and spheres.
-
-A point zone object defined in the case file can be retrieved from the point zone registry, `neko_point_zone_registry`, and can be used to perform any zone-specific operations (e.g. localized source term, probing...). User-specific point zones can also be added manually to the point zone registry from the user file.
-
-A more detailed description as well as a  full list of available components and
- their setup is provided in a [separate page of the manual](point-zones.md).
diff --git a/doc/pages/developer-guide.md b/doc/pages/developer-guide.md
new file mode 100644
index 00000000000..5d71bf416db
--- /dev/null
+++ b/doc/pages/developer-guide.md
@@ -0,0 +1,11 @@
+# Developer guide {#developer-guide}
+
+These pages will provide any developer with the information needed to contribute
+to Neko. The pages cover topics such as contributing, development patterns, code
+style, testing and usage of accelerators.
+
+- \subpage contributing
+- \subpage dev_patterns
+- \subpage code-style
+- \subpage testing
+- \subpage accelerators
diff --git a/doc/pages/accelerators.md b/doc/pages/developer-guide/accelerators.md
similarity index 89%
rename from doc/pages/accelerators.md
rename to doc/pages/developer-guide/accelerators.md
index b12cb22ee08..48e881bd09f 100644
--- a/doc/pages/accelerators.md
+++ b/doc/pages/developer-guide/accelerators.md
@@ -1,5 +1,7 @@
 # Accelerators {#accelerators}
 
+\tableofcontents
+
 ## Device abstraction layer
 
 Neko has a device abstraction layer (device.F90) to manage device memory, data transfer and kernel invocations directly from a familiar Fortran interface, targeting all supported accelerator backends.
@@ -57,8 +59,8 @@ Since allocation and association is such an ordinary operation, Neko provides a
   call device_map(x, x_d, n)
 ~~~~~~~~~~~~~~~
 
-#### Data transfer
-To copy data between host and device (and device to use) use the routine device::device_memcpy which takes a Fortran array `x` of type `integer`, `integer(i8)`, `real(kind=sp)` or `real(kind=dp)`, its size `n` and the direction as the third argument which can either be `HOST_TO_DEVICE` to copy data to the device, or `DEVICE_TO_HOST` to retrieve data from the device.
+#### Data transfer {#accelerators_data-transfer}
+To copy data between host and device (and device to use) use the routine device::device_memcpy which takes a Fortran array `x` of type `integer`, `integer(i8)`, `real(kind=sp)` or `real(kind=dp)`, its size `n` and the direction as the third argument which can either be `HOST_TO_DEVICE` to copy data to the device, or `DEVICE_TO_HOST` to retrieve data from the device. The fourth boolean argument, `sync`, controls whether the transfer is synchronous (`.true.`) or asynchronous (`.false.`).
 ~~~~~~~~~~~~~~~{.f90}
   integer, allocatable :: x(:)
   type(c_ptr) :: x_d
@@ -67,14 +69,12 @@ To copy data between host and device (and device to use) use the routine device:
   allocate(x(n))
   ...
   ! Fill the device with data
-  call device_memcpy(x, x_d, n, HOST_TO_DEVICE)
+  call device_memcpy(x, x_d, n, HOST_TO_DEVICE, sync=.false.)
   ...
   ! Retrieve data from device
-  call device_memcpy(x, x_d, n, DEVICE_TO_HOST)
+  call device_memcpy(x, x_d, n, DEVICE_TO_HOST, sync=.true.)
 ~~~~~~~~~~~~~~~
 
-@attention device::device_memcpy defaults to asynchronous data transfer. The optional boolean argument `sync` must be true if synchronous transfers are needed.
-
 @note that there's a special device::device_memcpy_cptr routine which only works with pointers, either on the host or the device, and the size needs to be given in bytes. This routine can be used to copy data between two arrays on a device with the direction `DEVICE_TO_DEVICE`.
 ~~~~~~~~~~~~~~~{.f90}
   type(c_ptr) :: x_d, y_d
@@ -84,9 +84,10 @@ To copy data between host and device (and device to use) use the routine device:
   call device_memcpy_cptr(y_d, x_d, s, DEVICE_TO_DEVICE)
 ~~~~~~~~~~~~~~~
 
+@attention device::device_memcpy_cptr defaults to asynchronous data transfer. The optional boolean argument `sync` must be true if synchronous transfers are needed.
 @attention It is the programmers' responsibility to make sure that device arrays are kept in sync with the associated host array. Neko does not perform any implicit data movement.
 
-### Offload work
+### Offload work {#accelerators_offload-work}
 To offload work to a device, most routines in Neko have a device version prefixed with `device_<name of routine>`. These routines have the same arguments as the host equivalent, but one must pass device pointers instead of Fortran arrays.
 
 For example, we call the `math::add2` routine to add two arrays together on the host.
diff --git a/doc/pages/code-style.md b/doc/pages/developer-guide/code-style.md
similarity index 77%
rename from doc/pages/code-style.md
rename to doc/pages/developer-guide/code-style.md
index 2e78d781027..11afa102351 100644
--- a/doc/pages/code-style.md
+++ b/doc/pages/developer-guide/code-style.md
@@ -1,4 +1,7 @@
 # Code style {#code-style}
+
+\tableofcontents
+
 Fortran code should conform to the Fortran 2008 standard and should use an indentation level of 2, except for the extra indentation within `do` `if`, `select` or `where` statements and for each level inside a structure e.g. `type`, `interface`, where the indentation level is 3, and a 0 indentation is used for `module` or `contains` (except for `contains` inside a derived type, where a single indentation level is used).
 
 These are the default rules in Emacs' Fortran mode, an example is given below,
@@ -36,6 +39,17 @@ end module example
 
 Please note that the maximum line length in Neko should not exceed 80 columns.
 
+Additionally the [findent](https://github.com/wvermin/findent) tool can be used
+to enforce these rules by assigning the following options. The documentation of
+`findent` provide details for emacs, vim and gedit. For VSCode, the
+[Modern Fortran](https://marketplace.visualstudio.com/items?itemName=fortran-lang.linter-gfortran)
+extension provide an integration.
+
+```sh
+findent -i2 -d3 -f3 -s3 -w3 -t3 -j3 -k- -Rr -c3 < input.f90 > formatted.f90
+```
+
+
 ## Data types
 For portability reasons, it is essential to only use data type kinds defined in num_types.f90 and avoid legacy constructs like `real*8` or `integer(kind=8)`
 
diff --git a/CONTRIBUTING.md b/doc/pages/developer-guide/contributing.md
similarity index 98%
rename from CONTRIBUTING.md
rename to doc/pages/developer-guide/contributing.md
index 4717a9e7e0e..2d641c1fa1f 100644
--- a/CONTRIBUTING.md
+++ b/doc/pages/developer-guide/contributing.md
@@ -1,4 +1,7 @@
-# Contributing to Neko 
+# Contributing to Neko {#contributing} 
+
+\tableofcontents
+
 Please read the following guide before contributing new code or a bug fix to Neko.
 
 All contributions to Neko must be made under the 3-Clause BSD license. Please refer to the `COPYING` file.
@@ -51,7 +54,7 @@ This section contains information on how to add new source files to the build sy
 
 Neko uses Autotools for building all sources. You will need to have at least `autoconf` and `automake` installed for development work. It is also highly recommended to have `makedepf90` installed to avoid error-prone manual dependency tracking.
 
-The following steps describe how to add a new Fortran file to Neko`s build system
+The following steps describe how to add a new Fortran file to Neko's build system
 1. Place the file in an appropriate subdirectory under `src/`. Either create a new subdirectory or place the file in `common` if none of the existing directories is a good match. Avoid placing the file directly under `src`
 2. Add the file to the `neko_fortran_SOURCES` list in `src/Makefile.am`, following the pattern of `<subdir under src>/newfile.f90`.
 3. Ensure correct dependency tracking
diff --git a/doc/pages/dev_patterns.md b/doc/pages/developer-guide/dev_patterns.md
similarity index 99%
rename from doc/pages/dev_patterns.md
rename to doc/pages/developer-guide/dev_patterns.md
index 15f52784440..706c1a37948 100644
--- a/doc/pages/dev_patterns.md
+++ b/doc/pages/developer-guide/dev_patterns.md
@@ -1,5 +1,7 @@
 # Programming patterns and conventions {#dev_patterns}
 
+\tableofcontents
+
 This section aims to summarize the programming conventions in Neko to guide all
 developers into writing code with good style and facilitate the reuse of the
 same programming patterns throughout the code base. It is a good idea to check
diff --git a/doc/pages/testing.md b/doc/pages/developer-guide/testing.md
similarity index 99%
rename from doc/pages/testing.md
rename to doc/pages/developer-guide/testing.md
index 847e343045b..ce2f1f38e60 100644
--- a/doc/pages/testing.md
+++ b/doc/pages/developer-guide/testing.md
@@ -1,5 +1,7 @@
 # Testing {#testing}
 
+\tableofcontents
+
 ## pFUnit
 Neko uses the software pFUnit for unit testing.
 To install the software you can use the following commands, in which you should just set the desired installation path
@@ -70,4 +72,4 @@ The instructions will differ somewhat depending on whether you test uses MPI or
    Find `# Config tests` line and the `AC_CONFIG_FILES` list below it.
    Add the path to a `Makefile` (note, no `.in`!!) in your test folder. 
 8. Please also add the compilation products of your test to the  `tests/.gitignore` file.
-   This helps keeps the version controlled file list unpolluted.
\ No newline at end of file
+   This helps keeps the version controlled file list unpolluted.
diff --git a/doc/pages/index.md b/doc/pages/index.md
index 9f5c6c764c0..332bf6f67a8 100644
--- a/doc/pages/index.md
+++ b/doc/pages/index.md
@@ -8,31 +8,41 @@ accelerators to SX-Aurora vector processors. Neko has its roots in the spectral
 element code Nek5000 from UChicago/ANL, from where many of the namings, code
 structure and numerical methods are adopted.
 
-Neko is currently maintained and developed at KTH Royal Institute of
-Technology.
-
-
-\tableofcontents
-
-\subpage installation
-
-\subpage governing-equations
-
-\subpage case-file
-
-\subpage simcomps
-
-\subpage point-zones
-
-\subpage accelerators
-
-\subpage statistics-guide
-
-\subpage code-style
- 
-\subpage dev_patterns
-
-\subpage testing
-
-\subpage appendix
-
+## Structure of the Manual
+
+In order to facilitate reading of the documentation. The manual is divided into
+several sections. The [user guides](@ref user-guide) are intended for users of the code, while the
+[developer guides](@ref developer-guide) are intended for developers of the code. The [appendix](@ref appendices) contains
+additional information that is not directly related to the usage of the code.
+
+- \subpage user-guide
+  - [Installation](@ref installation) explains how to download and compile Neko
+  on your platform.
+  - [Case File](@ref case-file) discusses the various parameters and options 
+  for your case setup such as boundary conditions, output control, etc.
+  - [User File](@ref user-file) explains all the user functions and how they can
+  be used to run more advanced simulations.
+  - [Simulation Components](@ref simcomps) presents some extra functionalities 
+  and tools such as computation and output of additional fields, in-situ 
+post-processing operations, data sampling, etc.
+  - [Point Zones](@ref point-zones) allow you to select zones in the mesh for
+application of source terms, initial conditions, etc.
+  - [Statistics Guide](@ref statistics-guide) outlines the steps to generate
+3D and 2D field statistics.
+  - [Input/Output](@ref io) explains how to read and write various types of data
+using the Neko framework.
+- \subpage developer-guide
+  - [Contributing](@ref contributing) presents basic instructions to add
+your contributions to Neko.
+  - [Development Patterns](@ref dev_patterns) outlines the standards to be used
+when developing code in the Neko framework, such as naming conventions and
+documentation.
+  - [Code Style](@ref code-style) introduces some extra programming conventions
+related to coding style and IDEs.
+  - [Testing](@ref testing) outlines the steps to run and add unit tests to the
+code base with pFUnit.
+  - [Accelerators](@ref accelerators) discusses important concepts and 
+conventions related to GPU programming in the Neko framework.
+- \subpage appendices
+  - [Governing Equations](@ref governing-equations) used in our solvers.
+  - [Publications](@ref publications)
diff --git a/doc/pages/simcomps.md b/doc/pages/simcomps.md
deleted file mode 100644
index 4f8e91b6c3d..00000000000
--- a/doc/pages/simcomps.md
+++ /dev/null
@@ -1,70 +0,0 @@
-
-# Simulation components {#simcomps}
-## What are simulation components?
-Simulation components, or simcomps fo short,  incapsulate additional
-functionality that may be useful for certain cases but not necessary to run the
-solver. 
-This can include computation and output of additional fields, in-situ
-post-processing operations, data sampling, etc.
-
-By design, simulation components can tap into every aspect of the simulation,
-so they can be quite powerful.
-As the code grows, we expect to add more and more simcomps to the code.
-
-## Adding simulation components to the case
-Each simcomp is defined as a single JSON object at are added to an array
-of objects called `simulation_components`, which resides directly under the
-`case` object.
-
-## Controling execution and file output
-Each simulation component is, by default, executed once per time step to
-perform associated computations and output.
-However, this can be modified by using the `compute_control` and `compute_value`
-parameters for the computation and the `output_control and` and 
-`output_value` for the output to disk.
-The paramters for the `_control` values are the same as for the fluid and 
-checkpointing.
-
-For example, in the `tgv` example case the `vorticity` component is executed 
-once per 50 time steps. 
-~~~~~~~~~~~~~~~{.json}
-{
-    "type": "vorticity",
-    "compute_control": "tsteps",
-    "compute_value": 50
-}
-~~~~~~~~~~~~~~~
-If no parameters for the `output_` parametersare provided, they areset to be the
- same as for `compute_`.
-
- ## List of simulation components
-
- ### vorticity
- Computes the vorticity field an stores in the field registry as `omega_x`,
- `omega_y` and `omega_z`.
- Currently produces no output.
-
- ### lambda2
- Computes \f$ \lambda_2 \f$ for the velocity field and stores it in the normal output files as the first unused field.
- This means that \f$ \lambda_2 \f$ can be found in the temeprature field in then fld files if running without a scalar
- and s1 if neko is run with one scalar.
- 
- ### probes
- Probes selected solution fields at the points given inside an input file. Example usage:
- ~~~~~~~~~~~~~~~{.json}
- {
-   "type": "probes",
-   "compute_control": "simulationtime",
-   "compute_value"    : 1,
-   "points_file":  "probes.csv",
-   "output_file":  "output.csv",
-   "fields": ["w","s"]
- }
- ~~~~~~~~~~~~~~~
-This probes the fields 'w', and 's' in the points described by points.csv and outputs into output.csv every 1 time units.
-
-
-
-
-
-
diff --git a/doc/pages/statistics.md b/doc/pages/statistics.md
deleted file mode 100644
index f527b79847d..00000000000
--- a/doc/pages/statistics.md
+++ /dev/null
@@ -1,45 +0,0 @@
-
-# Statistics guide {#statistics-guide}
-
-Under development, updated incrementally
-## Notes on the statistics calculation in Neko
-***Daniele Massaro, Martin Karp (KTH)***
-
-1) Run your simulations and collect mean_field* and stats* files by having the statistics object added to the case file,  and specifying the write interval to something suitable.
-
-2) For each RUN_i, you get a set of mean_field* and stats* files. You can average them for each single RUN_i, or average all of them only once (after re-ordering them properly). If you follow the second approach, go to step 4. 
-Here, for each RUN_i, we compute the averaged means with "average_fields_in_time":
---mean
-`srun --unbuffered /your/location/neko/bin/average_fields_in_time meanXX.fld T0 mean_p.fld`
-where T0 is the initial time. To get some hints on the input for the script one can simply run `./average_fields_in_time` without any arguments. For RUN_1 the time T0 can be taken from the log of the first simulation, or from the header of the first mean_field* file; in this way you discard that file. For RUN_i, with i>1, it can be taken from header of the last file mean_field* of the previous simulation RUN_{i-1}. 
-In the command line, for the name "meanXX.fld", XX indicates the number of the nek5000 file. In mean_fieldXX.nek5000 you set the number of the first mean0* file to read and the number of steps corresponding to the number of files. In this way, the code generates a mean_p0.f00000 and mean_post0.nek5000. It is suggested to rename mean_p0.f00000 as mean_p0.f0000i and move it to a separate folder where you take the average with all the others. 
---stats
-`srun --unbuffered /your/location/neko/bin/average_fields_in_time statXX.fld T0 stat_p.fld`
-T0 is the same as before. In stat0.nek5000 you set the number of the first stat0* file to read and the number of steps corresponds to the number of files. It is suggested to rename stat_p0.f00000 as stat_p0.f0000i and move it to a separate folder where you take the average with all the others. 
-Repeat this for each RUN_i folder. Eventually, given n RUN_i folders, you will get n mean_p* and stat_p* files.
-
-3) Take the average of the averaged runs. Now, the time average over all the n simulations is taken. The procedure is similar, but changing the output name is recommended to  avoid over-writing.
--- mean
-`srun --unbuffered /your/location/neko/bin/average_fields mean_p0.fld T0 mean_post.fld`
-where T0 is the initial time which has been used to compute mean_p* for RUN_1.
--- stats
-`srun --unbuffered /your/location/neko/bin/average_fields stat_p0.fld T0 stat_post.fld`
-where T0 is the initial time which has been used to compute mean_p* for RUN_1.
-
-
-
-
-
-4) Compute Reynolds stress tensors and other statistical moments (see the list).
-`srun --unbuffered /your/location/neko/bin/postprocess_fluid_stats mesh_file.nmsh mean_post0.fld stat_post0.fld`
-
-5) We also provide a tool to average the resulting field in a homogenous direction in `bin/average_field_in_space`. The required arguments are shown if one runs the program without any input. Currently it requires the number of elements in the homogenous direction as an input argument, e.g. 
-`./average_field_in_space mesh.nmsh field.fld x 18 outfield.fld`
-if we want to average a field in the x direction on a mesh with 18 elements in x and output the averaged field in outfield0.nek5000.
-
-
-
-
-
-
-
diff --git a/doc/pages/user-guide.md b/doc/pages/user-guide.md
new file mode 100644
index 00000000000..e2c35ee3e27
--- /dev/null
+++ b/doc/pages/user-guide.md
@@ -0,0 +1,13 @@
+# User guide {#user-guide}
+
+These pages will provide any user with the information needed to run Neko. Both
+for simple examples where one just wish to configure a case and run it, to more
+advanced topics such as statistics and user defined extensions.
+
+- \subpage installation
+- \subpage case-file
+- \subpage user-file
+- \subpage simcomps
+- \subpage point-zones
+- \subpage statistics-guide
+- \subpage io
diff --git a/doc/pages/user-guide/case-file.md b/doc/pages/user-guide/case-file.md
new file mode 100644
index 00000000000..98cd8e04fb6
--- /dev/null
+++ b/doc/pages/user-guide/case-file.md
@@ -0,0 +1,508 @@
+# Case File {#case-file}
+
+\tableofcontents
+
+The case file defines all the parameters of a simulation.
+The format of the file is JSON, making it easy to read and write case files
+using the majority of the popular programming languages.
+JSON is heirarchical and, and consists of parameter blocks enclosed in curly
+braces.
+These blocks are referred to as objects.
+The case file makes use objects to separate the configuration of different parts
+ of the solver.
+We refer the reader to the examples shipped with the code to get a good
+idea of how a case file looks.
+The table below provides a complete reference for all possible configuration
+choices.
+
+## High-level structure
+The current high-level structure of the case file is shown below.
+
+~~~~~~~~~~~~~~~{.json}
+{
+    "version": 1.0
+    "case": {
+        "numerics": {}
+        "fluid": {}
+        "scalar": {}
+        "statistics": {}
+        "simulation_components" : []
+        "point_zones" : []
+    }
+}
+~~~~~~~~~~~~~~~
+The `version` keywords is reserved to track changes in the format of the file.
+The the subsections below we list all the configuration options for each of the high-level objects.
+Some parameters will have default values, and are therefore optional.
+
+## Output frequency control
+A common scheme for controlling the output frequency is applied for various
+outputs.
+It is described already now in order to clarify the meaning of several
+parameters found in the tables below.
+
+The frequency is controlled by two paramters, ending with `_control` and
+`_value`, respectively.
+The latter name is perhaps not ideal, but it is somewhat difficult to come up
+with a good one, suggestions are welcome.
+
+The `_value` parameter is a number, that defines the output frequency, but the
+interpretation of that number depends on the choice of `_control`.
+The three following options are possible.
+1. `simulationtime`, then `_value` is the time interval between the outputs.
+2. `tsteps`, then `_value` is the number of time steps between the outputs.
+3. `nsamples`, then `_value` is the total number of outputs that will be
+   performed in the course of the simulation.
+4. `never`, then `_value` is ignored and output is never performed.
+
+
+## The case object
+
+This object is mostly used as a high-level container for all the other objects,
+but also defines several parameters that pertain to the simulation as a whole.
+
+| Name                       | Description                                                                                           | Admissible values                               | Default value |
+| -------------------------- | ----------------------------------------------------------------------------------------------------- | ----------------------------------------------- | ------------- |
+| `mesh_file`                | The name of the mesh file.                                                                            | Strings ending with `.nmsh`                     | -             |
+| `output_boundary`          | Whether to write a `bdry0.f0000` file with boundary labels. Can be used to check boundary conditions. | `true` or `false`                               | `false`       |
+| `output_directory`         | Folder for redirecting solver output. Note that the folder has to exist!                              | Path to an existing directory                   | `.`           |
+| `output_precision`         | Whether to output snapshots in single or double precision                                             | `single` or `double`                            | `single`      |
+| `load_balancing`           | Whether to apply load balancing.                                                                      | `true` or `false`                               | `false`       |
+| `output_partitions`        | Whether to write a `partitions.vtk` file with domain partitioning.                                    | `true` or `false`                               | `false`       |
+| `output_checkpoints`       | Whether to output checkpoints, i.e. restart files.                                                    | `true` or `false`                               | `false`       |
+| `checkpoint_control`       | Defines the interpretation of `checkpoint_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | -             |
+| `checkpoint_value`         | The frequency of sampling in terms of `checkpoint_control`.                                           | Positive real or integer                        | -             |
+| `restart_file`             | checkpoint to use for a restart from previous data                                                    | Strings ending with `.chkp`                     | -             |
+| `timestep`                 | Time-step size                                                                                        | Positive reals                                  | -             |
+| `variable_timestep`        | Whether to use variable dt                                                                            | `true` or `false`                               | `false`       |
+| `max_timestep`             | Maximum time-step size when variable time step is activated                                           | Positive reals                                  | -             |
+| `target_cfl`               | The desired CFL number                                                                                | Positive real                                   | `0.4`         |
+| `cfl_max_update_frequency` | The minimum interval between two time-step-updating steps in terms of time steps                      | Integer                                         | `0`           |
+| `cfl_running_avg_coeff`    | The running average coefficient `a` where `cfl_avg_new = a * cfl_new + (1-a) * cfl_avg_old`           | Positive real between `0` and `1`               | `0.5`         |
+| `max_dt_increase_factor`   | The maximum scaling factor to increase time step                                                      | Positive real greater than `1`                  | `1.2`         |
+| `min_dt_decrease_factor`   | The minimum scaling factor to decrease time step                                                      | Positive real less than `1`                     | `0.5`         |
+| `end_time`                 | Final time at which the simulation is stopped.                                                        | Positive reals                                  | -             |
+| `job_timelimit`            | The maximum wall clock duration of the simulation.                                                    | String formatted as HH:MM:SS                    | No limit      |
+
+<h3> Boundary type numbering in the `output_boundary` field </h3>
+
+When the `output_boundary` setting is set to `true`, and additional `.fld` file
+will be stored in the beginning of the simulation, where the recognized
+boundaries will be marked with an integer number. This is a good way to debug
+the simulation setup. The value of the number depends on the type of the
+boundary as follows:
+
+1. A wall boundary, i.e. the `w` label.
+2. A Dirichlet boundary, i.e. the `v` label.
+3. An outlet boundary, i.e. the `o` label.
+4. A symmetry boundary, i.e. the `sym` label.
+5. A periodic boundary.
+6. An wall-normal transpiration boundary, i.e. the `on` label.
+
+Note that the boundary conditions can be both prescribed via the labels in the
+case file or built into the mesh via conversion from a `.re2` file. Both types
+will be picked up and marked in the field produced by `output_boundary`.
+
+
+## Numerics
+Used to define the properties of the numerical discretization.
+
+| Name                         | Description                                                                                                   | Admissible values          | Default value                   |
+| ---------------------------- | ------------------------------------------------------------------------------------------------------------- | -------------------------- | ------------------------------- |
+| `polynomial_order`           | The oder of the polynomial basis.                                                                             | Integers, typically 5 to 9 | -                               |
+| `time_order`                 | The order of the time integration scheme. Refer to the `time_scheme_controller` type documention for details. | 1,2, 3                     | -                               |
+| `dealias`                    | Whether to apply dealiasing to advection terms.                                                               | `true` or `false`          | `false`                         |
+| `dealiased_polynomial order` | The polynomial order in the higher-order space used in the dealising.                                         | Integer                    | `3/2(polynomial_order + 1) - 1` |
+
+## Fluid
+
+The configuration of the fluid solver and the flow problem.
+Contains multiple subobjects for various parts of the setup.
+
+### Material properties
+As per the governing equations, Neko requires the value of the density and
+dynamic viscosity to define the flow problem. These can be provided as `rho` and
+`mu` in the case file.
+
+Alternatively, one may opt to provide the Reynolds number, `Re`, which
+corresponds to a non-dimensional formulation of the Navier-Stokes equations.
+This formulation can effectively be obtained by setting \f$ \rho = 1 \f$ and \f$
+\mu = 1/Re \f$. This is exactly what Neko does under the hood, when `Re` is
+provided in the case file.
+
+Note that if both `Re` and any of the dimensional material properties are
+provided, the simulation will issue an error.
+
+As an alternative to providing material properties in the case file, it is
+possible to do that in a special routine in the user file. This is demonstrated
+in the `rayleigh-benard-cylinder` example. Ultimately, both `rho` and `mu` have
+to be set in the subroutine, but it can be based on arbitrary computations and
+arbitrary parameters read from the case file. Additionally, this allows to
+change the material properties in time.
+
+### Boundary types {#case-file_boundary-types}
+The optional `boundary_types` keyword can be used to specify boundary conditions.
+The reason for it being optional, is that some conditions can be specified
+directly inside the mesh file.
+In particular, this happens when Nek5000 `.re2` files are converted to `.nmsh`.
+Periodic boundary conditions are *always* defined inside the mesh file.
+
+The value of the keyword is an array of strings, with the following possible
+values:
+
+* Standard boundary conditions 
+  * `w`, a no-slip wall.
+  * `v`, a velocity Dirichlet boundary.
+  * `sym`, a symmetry boundary.
+  * `o`, outlet boundary. 
+  * `on`, Dirichlet for the boundary-parallel velocity and homogeneous Neumann for
+   the wall-normal. The wall-parallel velocity is defined by the initial
+   condition. 
+
+* Advanced boundary conditions
+  * `d_vel_u`, `d_vel_v`, `d_vel_w` (or a combination of them, separated by a `"/"`), a 
+  Dirichlet boundary for more complex velocity profiles. This boundary condition uses a 
+  [more advanced user interface](#user-file_field-dirichlet-update). 
+  * `d_pres`, a boundary for specified non-uniform pressure profiles, similar in 
+  essence to `d_vel_u`,`d_vel_v` and `d_vel_w`. Can be combined with other 
+  complex Dirichlet coonditions by specifying e.g.: `"d_vel_u/d_vel_v/d_pres"`.
+  * `o+dong`, outlet boundary using the Dong condition. 
+  * `on+dong`, an `on` boundary using the Dong condition, ensuring that the
+   wall-normal velocity is directed outwards.
+
+In some cases, only some boundary types have to be provided.
+For example, when one has periodic boundaries, like in the channel flow example.
+In this case, to put the specification of the boundary at the right index,
+preceding boundary types can be marked with an empty string.
+For example, if boundaries with index 1 and 2 are periodic, and the third one is
+a wall, we can set.
+```
+"boundary_types": ["", "", "w"]
+```
+
+### Inflow boundary conditions {#case-file_fluid-if}
+The object `inflow_condition` is used to specify velocity values at a Dirichlet
+boundary.
+This does not necessarily have to be an inflow boundary, so the name is not so
+good, and will most likely be changed along with type changes in the code.
+Since not all cases have Dirichlet boundaries (note, the special case of a
+no-slip boundary is treated separately in the configuration), this object
+is not obligatory.
+The means of prescribing the values are controlled via the `type` keyword:
+
+1. `user`, the values are set inside the compiled user file.
+2. `uniform`, the value is a constant vector, looked up under the `value`
+   keyword.
+3. `blasius`, a Blasius profile is prescribed. Its properties are looked up
+   in the `case.fluid.blasius` object, see below.
+
+### Initial conditions {#case-file_fluid-ic}
+The object `initial_condition` is used to provide initial conditions.
+It is mandatory.
+Note that this currently pertains to both the fluid, but also scalars.
+The means of prescribing the values are controlled via the `type` keyword:
+
+1. `user`, the values are set inside the compiled user file. The only way to
+   initialize scalars.
+2. `uniform`, the value is a constant vector, looked up under the `value`
+   keyword.
+3. `blasius`, a Blasius profile is prescribed. Its properties are looked up
+   in the `case.fluid.blasius` object, see below.
+
+### Blasius profile
+The `blasius` object is used to specify the Blasius profile that can be used for the
+initial and inflow condition.
+The boundary cannot be tilted with respect to the coordinate axes.
+It requires  the following parameters:
+
+1. `delta`, the thickness of the boundary layer.
+2. `freestream_velocity`, the velocity value in the free stream.
+3. `approximation`, the numerical approximation to the Blasius profile.
+   - `linear`, linear approximation.
+   - `quadratic`, quadratic approximation.
+   - `cubic`, cubic approximation.
+   - `quartic`, quartic approximation.
+   - `sin`, sine function approximation.
+
+### Source terms {#case-file_fluid-source-term}
+The `source_terms` object should be used to specify the source terms in the
+momentum equation. The object is not mandatory, by default no forcing term is
+present. Each source term, is itself a JSON object, so `source_terms` is just an
+array of them. Note that with respect to the governing equations, the source
+terms define \f$ f^u \f$, meaning that the values are then multiplied by the
+density.
+
+For each source, the `type` keyword defines the kind of forcing that will be
+introduced. Furthermore, the `start_time` and `end_time` keywords can be used to
+set a time frame for when the source term is active. Note, however, that these
+keywords have no effect on the user-defined source terms, but their execution
+can, of course, be directly controlled in the user code. By default, all source
+terms are active during the entire simulation.
+
+The following types are currently implemented.
+
+1. `constant`, constant forcing. Strength defined by the `values` array with 3
+   reals corresponding to the 3 components of the forcing.
+2. `boussinesq`, a source term introducing boyancy based on the Boussinesq
+   approximation, \f$ \rho \beta (T - T_{ref} \cdot g) \f$. Here, \f$ rho \f$ is
+   density, \f$ \beta \f$ the thermal expansion coefficient, \f$ g \f$ the
+   gravity vector, and $T_{ref}$ a reference value of the scalar, typically
+   temperature.
+
+   Reads the following entries:
+   - `scalar_field`: The name of the scalar that drives the source term,
+     defaults to "s".
+   - `reference_value`: The reference value of the scalar.
+   - `g`: The gravity vector.
+   - `beta`: The thermal expansion coefficient, defaults to the inverse of
+      `ref_value`.
+3. `user_pointwise`, the values are set inside the compiled user file, using the
+   pointwise user file subroutine. Only works on CPUs!
+4. `user_vector`, the values are set inside the compiled user file, using the
+   non-pointwise user file subroutine. Should be used when running on the GPU.
+4. `brinkman`, Brinkman permeability forcing inside a pre-defined region.
+
+#### Brinkman
+The Brinkman source term introduces regions of resistance in the fluid domain.
+The volume force \f$ f_i \f$ applied in the selected regions are proportional to the
+fluid velocity component \f$ u_i \f$.
+
+\f{eqnarray*}{
+   f_i(x) &=& - B(x) u_i(x), \\
+   B(x) &=& \kappa_0 + (\kappa_1 - \kappa_0) \xi(x) \frac{q + 1}{q + \xi(x)},
+ \f}
+
+where, \f$ x \f$ is the current location in the domain, \f$ \xi: x \mapsto [0,1] \f$
+represent an indicator function for the resistance where \f$ \xi(x) = 0 \f$ is a free
+flow. \f$ \kappa_i \f$ describes the limits for the force application at \f$ \xi(x)=0 \f$
+and \f$ \xi(x)=1 \f$. A penalty parameter \f$ q \f$ help us to reduce numerical problems.
+
+The indicator function will be defined based on the object type. The following
+types are currently implemented.
+
+1. `boundary_mesh`, the indicator function for a boundary mesh is computed in
+   two steps. First, the signed distance function is computed for the boundary
+   mesh. Then, the indicator function is computed using the distance transform
+   function specified in the case file.
+2. `point_zone`, the indicator function is defined as 1 inside the point zone
+   and 0 outside.
+
+Each object are added to a common indicator field by means of a point-wise max
+operator. This means that the indicator field will be the union of all the
+regions defined by the objects.
+
+To assist correct placement and scaling of objects from external sources, the
+meshes can be transformed using the `mesh_transform` object. The object can be
+used to apply a transformation to the boundary mesh. The following types are
+currently implemented.
+
+1. `none`, no transformation is applied.
+2. `bounding_box`, the boundary mesh is transformed to fit inside a box defined
+   by `box_min` and `box_max`. The box is defined by two vectors of 3 reals
+   each. The `keep_aspect_ratio` keyword can be used to keep the aspect ratio of
+   the boundary mesh.
+
+After the indicator field is computed, it is filtered using a filter type
+specified in the case file. The filter is used to smooth the indicator field
+before computing the Brinkman force. The following types are currently
+implemented.
+
+1. `none`, no filtering is applied.
+
+The filtering can be defined for each object separately. Additionally, the
+filter can be specified for the entire source term, in which case it will be
+applied to the final indicator field, after all sources have been added.
+
+Additional keywords are available to modify the Brinkman force term.
+
+| Name                               | Description                                                                                   | Admissible values                 | Default value |
+| ---------------------------------- | --------------------------------------------------------------------------------------------- | --------------------------------- | ------------- |
+| `brinkman.limits`                  | Brinkman factor at free-flow (\f$ \kappa_0 \f$) and solid domain (\f$ \kappa_1 \f$).          | Vector of 2 reals.                | -             |
+| `brinkman.penalty`                 | Penalty parameter \f$ q \f$ when estimating Brinkman factor.                                        | Real                              | \f$ 1.0 \f$         |
+| `objects`                          | Array of JSON objects, defining the objects to be immersed.                                   | Each object must specify a `type` | -             |
+| `distance_transform.type`          | How to map from distance field to indicator field.                                            | `step`, `smooth_step`             | -             |
+| `distance_transform.value`         | Values used to define the distance transform, such as cut-off distance for the step function. | Real                              | -             |
+| `filter.type`                      | Type of filtering applied to the indicator field either globally or for the current object.   | `none`                            | `none`        |
+| `mesh_transform.type`              | Apply a transformation to the boundary mesh.                                                  | `bounding_box`, `none`            | `none`        |
+| `mesh_transform.box_min`           | Lower left front corner of the box to fit inside.                                             | Vector of 3 reals                 | -             |
+| `mesh_transform.box_max`           | Upper right back corner of the box to fit inside.                                             | Vector of 3 reals                 | -             |
+| `mesh_transform.keep_aspect_ratio` | Keep the aspect ratio of the boundary mesh.                                                   | `true` or `false`                 | `true`        |
+
+Example of a Brinkman source term where a boundary mesh and a point zone are
+combined to define the resistance in the fluid domain. The indicator field for
+the boundary mesh is computed using a step function with a cut-off distance of
+\f$ 0.1 \f$. The indicator field for the point zone is not filtered.
+
+~~~~~~~~~~~~~~~{.json}
+"source_terms": [
+   {
+      "type": "brinkman",
+      "objects": [
+         {
+            "type": "boundary_mesh",
+            "name": "some_mesh.stl",
+            "distance_transform": {
+               "type": "step",
+               "value": 0.1
+            },
+         },
+         {
+            "type": "point_zone",
+            "name": "cylinder_zone",
+            "filter": {
+               "type": "none"
+            }
+         }
+      ],
+      "brinkman": {
+         "limits": [0.0, 100.0],
+         "penalty": 1.0
+      }
+   }
+]
+~~~~~~~~~~~~~~~
+
+
+## Linear solver configuration
+The mandatory `velocity_solver` and `pressure_solver` objects are used to
+configure the solvers for the momentum and pressure-Poisson equation.
+The following keywords are used, with the corresponding options.
+
+* `type`, solver type.
+  - `cg`, a conjugate gradient solver.
+  - `pipecg`, a pipelined conjugate gradient solver.
+  - `bicgstab`, a bi-conjugate gradient stabilized solver.
+  - `cacg`, a communication-avoiding conjugate gradient solver.
+  - `gmres`, a GMRES solver. Typically used for pressure.
+  - `fusedcg`, a conjugate gradient solver optimised for accelerators using kernel fusion.
+* `preconditioner`, preconditioner type.
+  - `jacobi`, a Jacobi preconditioner. Typically used for velocity.
+  - `hsmg`, a hybrid-Schwarz multigrid preconditioner. Typically used for pressure.
+  - `ident`, an identity matrix (no preconditioner).
+* `absolute_tolerance`, tolerance criterion for convergence.
+* `max_iterations`, maximum number of iterations before giving up.
+* `projection_space_size`, size of the vector space used for accelerating the
+   solution procedure. If 0, then the projection space is not used.
+   More important for the pressure equation.
+* `projection_hold_steps`, steps for which the simulation does not use projection after starting
+   or time step changes. E.g. if 5, then the projection space will start to update at the 6th
+   time step and the space will be utilized at the 7th time step.
+
+### Flow rate forcing
+The optional `flow_rate_force` object can be used to force a particular flow
+rate through the domain.
+Useful for channel and pipe flows.
+The configuration uses the following parameters:
+
+* `direction`, the direction of the flow, defined as 0, 1, or 2, corresponding
+  to x, y or z, respectively.
+* `value`, the desired flow rate.
+* `use_averaged_flow`, whether `value` specifies the domain-averaged (bulk)
+   velocity or the volume flow rate.
+
+
+### Full parameter table
+All the parameters are summarized in the table below.
+This includes all the subobjects discussed above, as well as keyword parameters
+that can be described concisely directly in the table.
+
+| Name                                    | Description                                                                                       | Admissible values                                | Default value |
+| --------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------ | ------------- |
+| `scheme`                                | The fluid solve type.                                                                             | `pnpn`                                           | -             |
+| `Re`                                    | The Reynolds number.                                                                              | Positive real                                    | -             |
+| `rho`                                   | The density of the fluid.                                                                         | Positive real                                    | -             |
+| `mu`                                    | The dynamic viscosity of the fluid.                                                               | Positive real                                    | -             |
+| `output_control`                        | Defines the interpretation of `output_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never`  | -             |
+| `output_value`                          | The frequency of sampling in terms of `output_control`.                                           | Positive real or integer                         | -             |
+| `inflow_condition.type`                 | Velocity inflow condition type.                                                                   | `user`, `uniform`, `blasius`                     | -             |
+| `inflow_condition.value`                | Value of the inflow velocity.                                                                     | Vector of 3 reals                                | -             |
+| `initial_condition.type`                | Initial condition type.                                                                           | `user`, `uniform`, `blasius`                     | -             |
+| `initial_condition.value`               | Value of the velocity initial condition.                                                          | Vector of 3 reals                                | -             |
+| `blasius.delta`                         | Boundary layer thickness in the Blasius profile.                                                  | Positive real                                    | -             |
+| `blasius.freestream_velocity`           | Free-stream velocity in the Blasius profile.                                                      | Vector of 3 reals                                | -             |
+| `blasius.approximation`                 | Numerical approximation of the Blasius profile.                                                   | `linear`, `quadratic`, `cubic`, `quartic`, `sin` | -             |
+| `source_terms`                          | Array of JSON objects, defining additional source terms.                                          | See list of source terms above                   | -             |
+| `boundary_types`                        | Boundary types/conditions labels.                                                                 | Array of strings                                 | -             |
+| `velocity_solver.type`                  | Linear solver for the momentum equation.                                                          | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres`      | -             |
+| `velocity_solver.preconditioner`        | Linear solver preconditioner for the momentum equation.                                           | `ident`, `hsmg`, `jacobi`                        | -             |
+| `velocity_solver.absolute_tolerance`    | Linear solver convergence criterion for the momentum equation.                                    | Positive real                                    | -             |
+| `velocity_solver.maxiter`               | Linear solver max iteration count for the momentum equation.                                      | Positive real                                    | 800           |
+| `velocity_solver.projection_space_size` | Projection space size for the momentum equation.                                                  | Positive integer                                 | 20            |
+| `velocity_solver.projection_hold_steps` | Holding steps of the projection for the momentum equation.                                        | Positive integer                                 | 5             |
+| `pressure_solver.type`                  | Linear solver for the momentum equation.                                                          | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres`      | -             |
+| `pressure_solver.preconditioner`        | Linear solver preconditioner for the momentum equation.                                           | `ident`, `hsmg`, `jacobi`                        | -             |
+| `pressure_solver.absolute_tolerance`    | Linear solver convergence criterion for the momentum equation.                                    | Positive real                                    | -             |
+| `pressure_solver.maxiter`               | Linear solver max iteration count for the momentum equation.                                      | Positive real                                    | 800           |
+| `pressure_solver.projection_space_size` | Projection space size for the momentum equation.                                                  | Positive integer                                 | 20            |
+| `pressure_solver.projection_hold_steps` | Holding steps of the projection for the momentum equation.                                        | Positive integer                                 | 5             |
+| `flow_rate_force.direction`             | Direction of the forced flow.                                                                     | 0, 1, 2                                          | -             |
+| `flow_rate_force.value`                 | Bulk velocity or volumetric flow rate.                                                            | Positive real                                    | -             |
+| `flow_rate_force.use_averaged_flow`     | Whether bulk velocity or volumetric flow rate is given by the `value` parameter.                  | `true` or `false`                                | -             |
+| `freeze`                                | Whether to fix the velocity field at initial conditions.                                          | `true` or `false`                                | `false`       |
+
+## Scalar {#case-file_scalar}
+The scalar object allows to add a scalar transport equation to the solution.
+The solution variable is called `s`, but saved as `temperature` in the fld
+ files.
+Some properties of the object are inherited from `fluid`: the properties of the
+linear solver, the value of the density, and the output
+control.
+
+The scalar equation requires defining additional material properties: the
+specific heat capacity and thermal conductivity. These are provided as `cp` and
+`lambda`. Similarly to the fluid, one can provide the Peclet number, `Pe`, as an
+alternative. In this case, `cp` is set to 1 and `lambda` to the inverse of `Pe`.
+
+The boundary conditions for the scalar are specified through the
+`boundary_types` keyword.
+It is possible to directly specify a uniform value for a Dirichlet boundary.
+The syntax is, e.g. `d=1`, to set the value to 1, see the Ryleigh-Benard
+example case.
+
+The configuration of source terms is the same as for the fluid. A demonstration
+of using source terms for the scalar can be found in the `scalar_mms` example.
+
+| Name                      | Description                                              | Admissible values              | Default value |
+| ------------------------- | -------------------------------------------------------- | ------------------------------ | ------------- |
+| `enabled`                 | Whether to enable the scalar computation.                | `true` or `false`              | `true`        |
+| `Pe`                      | The Peclet number.                                       | Positive real                  | -             |
+| `cp`                      | Specific heat cpacity.                                   | Positive real                  | -             |
+| `lambda`                  | Thermal conductivity.                                    | Positive real                  | -             |
+| `boundary_types`          | Boundary types/conditions labels.                        | Array of strings               | -             |
+| `initial_condition.type`  | Initial condition type.                                  | `user`, `uniform`              | -             |
+| `initial_condition.value` | Value of the velocity initial condition.                 | Real                           | -             |
+| `source_terms`            | Array of JSON objects, defining additional source terms. | See list of source terms above | -             |
+
+## Statistics
+
+This object adds the collection of statistics for the fluid fields. For
+additional details on the workflow, see the 
+[corresponding page](@ref statistics-guide) in the user manual.
+
+| Name                | Description                                                          | Admissible values | Default value |
+| ------------------- | -------------------------------------------------------------------- | ----------------- | ------------- |
+| `enabled`           | Whether to enable the statistics computation.                        | `true` or `false` | `true`        |
+| `start_time`        | Time at which to start gathering statistics.                         | Positive real     | 0             |
+| `sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer  | 10            |
+
+## Simulation components
+Simulation components enable the user to perform various additional operations,
+which are not strictly necessary to run the solver. An example could be
+computing and output of additional fields, e.g. vorticity.
+
+A more detailed description as well as a  full list of available components and
+ their setup is provided in a [separate page of the manual](simcomps.md).
+
+## Point zones
+Point zones enable the user to select GLL points in the computational domain
+according to some geometric criterion. Two predefined geometric shapes are
+selectable from the case file, boxes and spheres.
+
+A point zone object defined in the case file can be retrieved from the point
+zone registry, `neko_point_zone_registry`, and can be used to perform any
+zone-specific operations (e.g. localized source term, probing...). User-specific
+point zones can also be added manually to the point zone registry from the user
+file.
+
+A more detailed description as well as a  full list of available components and
+ their setup is provided in a [separate page of the manual](point-zones.md).
diff --git a/doc/pages/installation.md b/doc/pages/user-guide/installation.md
similarity index 76%
rename from doc/pages/installation.md
rename to doc/pages/user-guide/installation.md
index da7d27f61d1..cb44a166b5d 100644
--- a/doc/pages/installation.md
+++ b/doc/pages/user-guide/installation.md
@@ -1,10 +1,12 @@
 # Installing Neko {#installation}
 
+\tableofcontents
+
 Neko can be installed in various ways, either building directly from source, manually compiling all dependencies and Neko or via Spack. Pre-built Docker images are also provided for each release of Neko.
 
 ## Building from source
 
-To build Neko, you will need a Fortran compiler supporting the Fortran-08 standard, autotools, a working MPI installation supporting the Fortran 2008 bindings (`mpi_f08`), BLAS/LAPACK and JSON-Fortran. Optional dependencies are gslib and ParMETIS. 
+To build Neko, you will need a Fortran compiler supporting the Fortran-08 standard, autotools, pkg-config, a working MPI installation supporting the Fortran 2008 bindings (`mpi_f08`), BLAS/LAPACK and JSON-Fortran. Optional dependencies are PFunit, gslib and ParMETIS. 
 
 Follow the steps below to install the less common dependencies (e.g. JSON-Fortran).
 
@@ -100,31 +102,31 @@ In the above command, `[options]` refers to either optional features or packages
 
 Features are enabled and disabled by passing either `--enable-FEATURE[=arg]` or `--disable-FEATURE` to `configure`. A list of currently supported features are given in the table below.
 
-Name                  |  Description
-----                  | ------------
-`--enable-real=Xp`    | Specify working precision of REAL types:<br>`sp` -- `REAL(kind=4)`<br>`dp` -- `REAL(kind=8)` (default)<br>`qp` -- `REAL(kind=16)`<br>
-`--enable-contrib`    | Compile various tools
-`--enable-device-mpi` | Enable device aware MPI
+| Name                  | Description                                                                                                                           |
+| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
+| `--enable-real=Xp`    | Specify working precision of REAL types:<br>`sp` -- `REAL(kind=4)`<br>`dp` -- `REAL(kind=8)` (default)<br>`qp` -- `REAL(kind=16)`<br> |
+| `--enable-contrib`    | Compile various tools                                                                                                                 |
+| `--enable-device-mpi` | Enable device aware MPI                                                                                                               |
 
 Optional packages are controlled by passing either `--with-PACKAGE[=ARG]` or `--without-PACKAGE` to `configure`. A list of all supported optional packages are given in the table below.
 
-Name                            | Description
-----                            | -----------
-`--with-blas=<lib>`             | Use BLAS library `<lib>`
-`--with-lapack=<lib>`           | Use LAPACK library `<lib>`
-`--with-metis=DIR`              | Directory for metis
-`--with-metis-libdir=LIBDIR`    | Directory for metis library (if different)
-`--with-parmetis=DIR`           | Compile with support for parmetis library
-`--with-parmetis-libdir=LIBDIR` | Directory for parmetis library (if different)
-`--with-adios2=DIR`             | Compile with support for ADIOS2
-`--with-gslib=DIR`              | Compile with support for gslib
-`--with-libxsmm`                | Compile with support for libxsmm
-`--with-hip=DIR`                | Compile with HIP backend
-`--with-cuda=DIR`               | Compile with CUDA backend
-`--with-opencl=DIR`             | Compile with OpenCL backend
-`--with-nvtx=DIR`               | Compile with support for NVTX
-`--with-roctx=DIR`              | Compile with support for ROCTX
-`--with-pfunit=DIR`             | Directory for pFUnit (see \subpage testing)   
+| Name                            | Description                                   |
+| ------------------------------- | --------------------------------------------- |
+| `--with-blas=<lib>`             | Use BLAS library `<lib>`                      |
+| `--with-lapack=<lib>`           | Use LAPACK library `<lib>`                    |
+| `--with-metis=DIR`              | Directory for metis                           |
+| `--with-metis-libdir=LIBDIR`    | Directory for metis library (if different)    |
+| `--with-parmetis=DIR`           | Compile with support for parmetis library     |
+| `--with-parmetis-libdir=LIBDIR` | Directory for parmetis library (if different) |
+| `--with-adios2=DIR`             | Compile with support for ADIOS2               |
+| `--with-gslib=DIR`              | Compile with support for gslib                |
+| `--with-libxsmm`                | Compile with support for libxsmm              |
+| `--with-hip=DIR`                | Compile with HIP backend                      |
+| `--with-cuda=DIR`               | Compile with CUDA backend                     |
+| `--with-opencl=DIR`             | Compile with OpenCL backend                   |
+| `--with-nvtx=DIR`               | Compile with support for NVTX                 |
+| `--with-roctx=DIR`              | Compile with support for ROCTX                |
+| `--with-pfunit=DIR`             | Directory for pFUnit (see \subpage testing)   |
 
 @note Accelerators backends are not enabled as a feature in Neko, but rather via optional packages.
 
diff --git a/doc/pages/user-guide/io.md b/doc/pages/user-guide/io.md
new file mode 100644
index 00000000000..568d3a589e9
--- /dev/null
+++ b/doc/pages/user-guide/io.md
@@ -0,0 +1,50 @@
+# Input-output {#io}
+
+\tableofcontents
+
+## Mesh
+
+Neko has it's own mesh format, `.nmsh`. All meshes should be 3D and consist of
+hexahedral elements. A 2D or 1D case can be mimicked by having a single element
+across selected axes and applying periodic boundary conditions.
+
+A native mesh generator for simple box meshes, called `genmeshbox`, is part of
+Neko, and is used in some of the examples, for example `advecting_cone`. The
+usage is straightforward, and is well described by the help string provided when
+running the utility.
+
+The main way of obtaining a `.nmsh` is converting a Nek5000 `.re2` mesh file
+using the `rea2nbin` utility. Nek5000, in turn, has several converters among its
+tools, such as `gmsh2nek`. The latter is also available under the `contrib`
+directory in Neko. The workflow is thus to export your mesh into a format, which
+can be converted to `.re2`, and then convert the `.re2` to `.nmsh`. Of course
+one can also use native Nek5000 tools to produce the `.re2`, such as `genbox`.
+In the future, native support for other formats than `.nmsh` will be added for
+convenience.
+
+It should be noted, that the `.re2` format allows to store boundary conditions.
+This is relevant for users that have old Nek5000 cases, who wish to convert them
+to Neko. Boundary condition is converted and used by Neko, and for these
+boundaries one does not need to provide information in the `boundary_types`
+keyword in the case file. This is why in some of the examples, `boundary_types`
+is only filled for some of the boundaries. However, since this feature
+complicates the code and leads to somewhat confusing case setups, it is planned
+to deprecate it at some point. Note that periodic boundaries are also directly
+encoded into the mesh file, and this will remain so in the future.
+
+## Three-dimensional field output
+Neko stores the 3D fields with results in the `.fld` format, which is the same
+as in Nek5000. The advantage of adopting this format, is that there is a reader
+in Paraview and Visit, which can be used to visualize them. Note that the latest
+version of Paraview actually has two reader for `.fld`. For now, Neko has only
+been tested with the older reader, which uses Visit under the hood.  A file with
+the `.nek5000` extension is used as the entry point for the readers and stores
+some metadata. Users may also find the Python package `pymech` useful for
+working with `.fld`s. Note that only the first output `.fld` file stores the
+mesh.
+
+## Checkpoint files
+Simulations cannot be restarted from `.fld` files. Instead, separate checkpoint
+files can be output for the purpose of restarts. These contain additional
+information allowing a clean restart, with, e.g., the correct time integration
+order. A separate file format, `.chkp` is adopted for the checkpoint files.
diff --git a/doc/pages/point-zones.md b/doc/pages/user-guide/point-zones.md
similarity index 77%
rename from doc/pages/point-zones.md
rename to doc/pages/user-guide/point-zones.md
index 2bc1b25813e..00e4c431b28 100644
--- a/doc/pages/point-zones.md
+++ b/doc/pages/user-guide/point-zones.md
@@ -1,5 +1,7 @@
-
 # Point zones {#point-zones}
+
+\tableofcontents
+
 ## What are point zones?
 
 Point zones are subsections of the computational domain which are 
@@ -12,10 +14,10 @@ being applying a localized source term or probing a particular zone of interest.
 
 ## Predefined geometrical shapes
 
-There are two predefined shapes from which to initialize a point zone in the case
-file: boxes and spheres. Each shape is described by its own subtype
-`box_point_zone_t` and `sphere_point_zone_t`, extending the abstract class 
-`point_zone_t`.
+There are three predefined shapes from which to initialize a point zone in the
+case file: boxes, spheres and cylinders. Each shape is described by its own
+subtype `box_point_zone_t`, `sphere_point_zone_t` and `cylinder_point_zone_t`,
+extending the abstract class `point_zone_t`.
 
 ### Box
 
@@ -47,13 +49,29 @@ A sphere is defined by its center and its radius.
 ]
 ~~~~~~~~~~~~~~~
 
+### Cylinder
+
+A cylinder is defined by its end points and its radius.
+
+~~~~~~~~~~~~~~~{.json}
+[
+    {
+        "name": "mycylinder",
+        "geometry": "cylinder",
+        "start": [0.0, 0.0, 0.0],
+        "end": [0.0, 0.0, 1.0],
+        "radius": 0.01
+    },
+]
+~~~~~~~~~~~~~~~
+
 ## User-defined geometrical shapes
 
 The current version of Neko does not support user-defined shapes from the case 
 file. That said, shapes can be defined manually into new types by extending 
 `point_zone_t` and implementing the abstract `criterion` interface.
 
-## Using point zones
+## Using point zones {#point-zones_using-point-zones}
 
 Point zones defined in the case file are stored in a point zone registry, 
 `neko_point_zone_registry`. The point zone registry allows for the retrieval of
diff --git a/doc/pages/user-guide/simcomps.md b/doc/pages/user-guide/simcomps.md
new file mode 100644
index 00000000000..1ba0dccd661
--- /dev/null
+++ b/doc/pages/user-guide/simcomps.md
@@ -0,0 +1,111 @@
+# Simulation components {#simcomps}
+
+\tableofcontents
+
+## What are simulation components?
+Simulation components, or simcomps fo short,  incapsulate additional
+functionality that may be useful for certain cases but not necessary to run the
+solver.
+This can include computation and output of additional fields, in-situ
+post-processing operations, data sampling, etc.
+
+By design, simulation components can tap into every aspect of the simulation,
+so they can be quite powerful.
+As the code grows, we expect to add more and more simcomps to the code.
+
+## Adding simulation components to the case
+Each simcomp is defined as a single JSON object at are added to an array
+of objects called `simulation_components`, which resides directly under the
+`case` object.
+
+## Controling execution and file output
+Each simulation component is, by default, executed once per time step to perform
+associated computations and output. However, this can be modified by using the
+`compute_control` and `compute_value` parameters for the computation and the
+`output_control and` and `output_value` for the output to disk. The parameters
+for the `_control` values are the same as for the fluid and checkpointing.
+Additionally, one can set `output_control` to `global` and `never`. The former
+will sync the `output_` parameter to that of the fluid. Choosing `never` will
+suppress output all together. If no parameters for the `output_` parameters are
+ provided, they are set to be the same as for `compute_`.
+
+For simcomps that compute 3D fields, the output can be either added to the main
+`.fld` file, containing velocity and pressure, or saved to a separate file. For
+the latter, the `output_filename` keyword should be provided. One can
+additionally provide the `precision` keyword, which can be set to either
+`single` or `double` to control the precision of the written data.
+
+For example, in the `tgv` example case the `vorticity` component is executed
+once per 50 time steps. The `output_` parameters are synced to that, and the
+vorticity fields will be added to the main `.fld` file.
+~~~~~~~~~~~~~~~{.json}
+{
+    "type": "vorticity",
+    "compute_control": "tsteps",
+    "compute_value": 50
+}
+~~~~~~~~~~~~~~~
+
+ ## List of simulation components
+
+ ### vorticity
+ Computes the vorticity field an stores in the field registry as `omega_x`,
+ `omega_y` and `omega_z`.
+ Currently produces no output.
+
+ ### lambda2
+ Computes \f$ \lambda_2 \f$ for the velocity field and stores it in the normal output files as the first unused field.
+ This means that \f$ \lambda_2 \f$ can be found in the temperature field in then fld files if running without a scalar
+ and s1 if neko is run with one scalar.
+
+ ### probes
+ Probes selected solution fields at the points given inside an input file.
+ Example usage:
+ ~~~~~~~~~~~~~~~{.json}
+ {
+   "type": "probes",
+   "compute_control": "simulationtime",
+   "compute_value"    : 1,
+   "points_file":  "probes.csv",
+   "output_file":  "output.csv",
+   "fields": ["w","s"]
+ }
+ ~~~~~~~~~~~~~~~
+This probes the fields 'w', and 's' in the points described by points.csv and
+outputs into output.csv every 1 time units.
+
+The probed information will be saved in the output file in the following format:
+
+~~~~~~~~~~~~~~~{.csv}
+N_p, N_f, fields[0], fields[1], ..., fields[N_f-1]
+p_0_x, p_0_y, p_0_z
+p_1_x, p_1_y, p_1_z
+...
+p_N_p_x, p_N_p_y, p_N_p_z
+time_0, p_0_field_0, p_0_field_1, ..., p_0_field_N_f-1
+time_0, p_1_field_0, p_1_field_1, ..., p_1_field_N_f-1
+...
+time_0, p_N_p_field_0, p_N_p_field_1, ..., p_N_p_field_N_f-1
+time_1, p_0_field_0, p_0_field_1, ..., p_0_field_N_f-1
+time_1, p_1_field_0, p_1_field_1, ..., p_1_field_N_f-1
+...
+time_N_p, p_N_p_field_0, p_N_p_field_1, ..., p_N_p_field_N_f-1
+~~~~~~~~~~~~~~~
+
+ ### field_writer
+ Outputs registered 3D fields to an `.fld` file. Requires a list of field names
+ in the `fields` keyword. Primarily to be used for outputting new fields defined
+ in the user file. The fields are added to then `neko_field_registry` object and
+ are expected to be updated in the user file, or, perhaps, by other simcomps.
+ Since this simcomp does not compute anything `compute_` configuration is
+ irrelevant.
+ ~~~~~~~~~~~~~~~{.json}
+ {
+   "type": "field_writer",
+   "fields": ["my_field1", "my_field2"],
+   "output_filename": "myfields",
+   "precision": "double",
+   "output_control" : "simulation_time",
+   "output_value" : 1.0
+ }
+ ~~~~~~~~~~~~~~~
diff --git a/doc/pages/user-guide/statistics-guide.md b/doc/pages/user-guide/statistics-guide.md
new file mode 100644
index 00000000000..b6ce8f92e89
--- /dev/null
+++ b/doc/pages/user-guide/statistics-guide.md
@@ -0,0 +1,171 @@
+# Statistics guide {#statistics-guide}
+
+\tableofcontents
+
+Under development, updated incrementally
+
+Statistics in the context of Neko, is the common name for fields that are averaged in time and possible also space.
+
+The statistics module in Neko computes the temporal average of a wide range of fields.
+
+In this page we use the following convention for a field
+- \f$ u \f$, the instantaneous field.
+- \f$ \langle u \rangle_t \f$, the temporal average of \f$ u \f$. 
+- \f$ u = \langle u \rangle + u' \f$, the Reynolds decomposition of \f$ u \f$, where \f$ u' \f$ is the fluctuation of \f$ u \f$ around the mean field.
+
+The temporal average of a field \f$u\f$ is the approximation of the integral
+
+$$
+\langle u \rangle_t = \int_{T_0}^{T_N} u dt
+$$
+
+In Neko, this is computed as
+
+$$
+\langle u \rangle_t = \sum_{i=0}^N u_i \Delta t_i
+$$
+where \f$ u_0 \f$ is the fields value at \f$ T_0 \f$ and \f$ N \f$ is the number of time steps needed to reach \f$ T_N \f$, \f$ T_N = T_0 + \sum_{i=0}^N \Delta t_i \f$.
+
+In the statistics in Neko, various averages of the the different velocity components, derivatives and pressure are computed. In total, 44 "raw statistics" are computed that are required to compute the Reynolds stress budgets, mean fields, and the different terms in the turbulent kinetic energy equation.
+
+## Using statistics
+Statistics are enable in the the case file as the following:
+
+| Name                | Description                                                          | Admissible values | Default value |
+| ------------------- | -------------------------------------------------------------------- | ----------------- | ------------- |
+| `enabled`           | Whether to enable the statistics computation.                        | `true` or `false` | `true`        |
+| `start_time`        | Time at which to start gathering statistics.                         | Positive real     | 0             |
+| `sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer  | 10            |
+
+In addition to the usual controls for the output, which then outputs the averages computes from the last time the statistics were written to file.
+
+For example, if one wants to sample the fields every 4 time steps and compute the averages in time intervals of 20 and write the output every 20 time units, and start collecting statistics after an initial transient of 50 time units the following would work:
+
+~~~~~~~~~~~~~~~{.json}
+"statistics": {
+        "enabled": true,
+        "start_time": 50.0,
+        "sampling_interval": 4,
+        "output_control": "simulationtime",
+        "output_value": 20,
+  }
+~~~~~~~~~~~~~~~
+When the output is written one obtains two .fld files called mean_field and stats. 
+
+## List of fields in output files 
+In `mean_field` the following averages are stored. The stored in variable column is which field one finds the computed statistic if one opens the file in paraview or visit.
+
+| Number | Statistic | Stored in variable |
+| ------ | --------- | ------------------ |
+| 1 | \f$ \langle p \rangle \f$ | Pressure|
+| 2 | \f$ \langle u \rangle \f$ | X-Velocity|
+| 3 | \f$ \langle v \rangle \f$ | Y-Velocity|
+| 4 | \f$ \langle w \rangle \f$ | Z-Velocity|
+
+In `stats` several other statistics are stored, and while not all might be interesting to your specific use case, with them most different budgets and quantities of interest can be computed. They are stored as the following:
+
+
+| Number | Statistic | Stored in variable |
+| ------ | --------- | ------------------ |
+| 1 | \f$ \langle pp \rangle \f$ | Pressure|
+| 2 | \f$ \langle uu \rangle \f$ | X-Velocity|
+| 3 | \f$ \langle vv \rangle \f$ | Y-Velocity|
+| 4 | \f$ \langle ww \rangle \f$ | Z-Velocity|
+| 5 | \f$ \langle uv \rangle \f$ | Temperature|
+| 6 | \f$ \langle uw \rangle \f$ | Scalar 1 (s1)|
+| 7 | \f$ \langle vw \rangle \f$ | Scalar 2 (s2)|
+| 8 | \f$ \langle uuu \rangle \f$ | s3|
+| 9 | \f$ \langle vvv \rangle \f$ | s4|
+| 10 | \f$ \langle www \rangle \f$ | s5|
+| 11 | \f$ \langle  uuv   \rangle \f$ | s6 |
+| 12 | \f$ \langle  uuw   \rangle \f$ | s7 |
+| 13 | \f$ \langle  uvv   \rangle \f$ | s8 |
+| 14 | \f$ \langle  uvw   \rangle \f$ | s9 |
+| 15 | \f$ \langle  vvw   \rangle \f$ | s10 |
+| 16 | \f$ \langle  uww   \rangle \f$ | s11 |
+| 17 | \f$ \langle  vww   \rangle \f$ | s12 |
+| 18 | \f$ \langle  uuuu  \rangle \f$ | s13 |
+| 19 | \f$ \langle  vvvv  \rangle \f$ | s14 |
+| 20 | \f$ \langle wwww   \rangle \f$ | s15 |
+| 21 | \f$ \langle  ppp   \rangle \f$ | s16 |
+| 22 | \f$ \langle  pppp  \rangle \f$ | s17 |
+| 23 | \f$ \langle  pu    \rangle \f$ | s18 |
+| 24 | \f$ \langle  pv    \rangle \f$ | s19 |
+| 25 | \f$ \langle  pw    \rangle \f$ | s20 |
+| 26 | \f$ \langle  p \frac{\partial u} {\partial x} \rangle \f$ | s21 |
+| 27 | \f$ \langle  p \frac{\partial u} {\partial y}\rangle \f$ | s22 |
+| 28 | \f$ \langle  p \frac{\partial u} {\partial z}\rangle \f$ | s23 |
+| 29 | \f$ \langle  p \frac{\partial v} {\partial x}\rangle \f$ | s24 |
+| 30 | \f$ \langle  p \frac{\partial v} {\partial y}\rangle \f$ | s25 |
+| 31 | \f$ \langle  p \frac{\partial v} {\partial z}\rangle \f$ | s26 |
+| 32 | \f$ \langle  p \frac{\partial w} {\partial x}\rangle \f$ | s27 |
+| 33 | \f$ \langle  p \frac{\partial w} {\partial y}\rangle \f$ | s28 |
+| 34 | \f$ \langle  p \frac{\partial w} {\partial z}\rangle \f$ | s29 |
+| 35 | \f$ \langle  e11   \rangle \f$ | s30 |
+| 36 | \f$ \langle  e22   \rangle \f$ | s31 |
+| 37 | \f$ \langle  e33   \rangle \f$ | s32 |
+| 38 | \f$ \langle  e12   \rangle \f$ | s33 |
+| 39 | \f$ \langle  e13   \rangle \f$ | s34 |
+| 40 | \f$ \langle  e23   \rangle \f$ | s35 |
+
+where \f$e11,e22...\f$ is computed as:
+$$
+\begin{aligned}
+e11 &= \left(\frac{\partial u}{\partial x}\right)^2 + \left(\frac{\partial u}{\partial y}\right)^2 + \left(\frac{\partial u}{\partial z}\right)^2 \\\\
+e22 &= \left(\frac{\partial v}{\partial x}\right)^2 + \left(\frac{\partial v}{\partial y}\right)^2 + \left(\frac{\partial v}{\partial z}\right)^2 \\\\
+e33 &= \left(\frac{\partial w}{\partial x}\right)^2 + \left(\frac{\partial w}{\partial y}\right)^2 + \left(\frac{\partial w}{\partial z}\right)^2 \\\\
+e12 &= \frac{\partial u}{\partial x}  \frac{\partial v}{\partial x} + \frac{\partial u}{\partial y}\frac{\partial v}{\partial y}+ \frac{\partial u}{\partial z}\\frac{\partial v}{\partial z} \\\\
+e13 &= \frac{\partial u}{\partial x}  \frac{\partial w}{\partial x} + \frac{\partial u}{\partial y}\frac{\partial w}{\partial y}+ \frac{\partial u}{\partial z}\\frac{\partial w}{\partial z} \\\\
+e23 &= \frac{\partial v}{\partial x}  \frac{\partial w}{\partial x} + \frac{\partial v}{\partial y}\frac{\partial w}{\partial y}+ \frac{\partial v}{\partial z}\\frac{\partial w}{\partial z} \\\\
+\end{aligned}
+$$
+
+
+# Postprocessing
+Of course, these statistics are only the "raw statistics" in the sense that in general we are not interested in \f$ \langle uu\rangle \f$, but rather say the rms of the velocity fluctuation. FOr this we need to postprocess the statistics. 
+
+There is some rudimentary postprocessing to compute the spatial averages of fld filesa and also to combine the statistics collected from several runs (compute average in time) and also compute both the mean velocity gradient and the Reynolds stresses available among the contrib scripts. By running the contrib scripts without any arguments one gets a hint on their usage, and also the text below gives a guide on how to postprocess the raw statistics. The postprocessing part of Neko is expanding and changing quite a lot at the moment, where we currently envision primarily using python for the postprocessing of the final statistics.
+
+
+
+## Notes on the statistics calculation in Neko
+***Daniele Massaro, Martin Karp (KTH)***
+
+1) Run your simulations and collect mean_field* and stats* files by having the statistics object added to the case file,  and specifying the write interval to something suitable.
+
+2) For each RUN_i, you get a set of mean_field* and stats* files. You can average them for each single RUN_i, or average all of them only once (after re-ordering them properly). If you follow the second approach, go to step 4. 
+Here, for each RUN_i, we compute the averaged means with "average_fields_in_time":
+--mean
+`srun --unbuffered /your/location/neko/bin/average_fields_in_time meanXX.fld T0 mean_p.fld`
+where T0 is the initial time. To get some hints on the input for the script one can simply run `./average_fields_in_time` without any arguments. For RUN_1 the time T0 can be taken from the log of the first simulation, or from the header of the first mean_field* file; in this way you discard that file. For RUN_i, with i>1, it can be taken from header of the last file mean_field* of the previous simulation RUN_{i-1}. 
+In the command line, for the name "meanXX.fld", XX indicates the number of the nek5000 file. In mean_fieldXX.nek5000 you set the number of the first mean0* file to read and the number of steps corresponding to the number of files. In this way, the code generates a mean_p0.f00000 and mean_post0.nek5000. It is suggested to rename mean_p0.f00000 as mean_p0.f0000i and move it to a separate folder where you take the average with all the others. 
+--stats
+`srun --unbuffered /your/location/neko/bin/average_fields_in_time statXX.fld T0 stat_p.fld`
+T0 is the same as before. In stat0.nek5000 you set the number of the first stat0* file to read and the number of steps corresponds to the number of files. It is suggested to rename stat_p0.f00000 as stat_p0.f0000i and move it to a separate folder where you take the average with all the others. 
+Repeat this for each RUN_i folder. Eventually, given n RUN_i folders, you will get n mean_p* and stat_p* files.
+
+3) Take the average of the averaged runs. Now, the time average over all the n simulations is taken. The procedure is similar, but changing the output name is recommended to  avoid over-writing.
+-- mean
+`srun --unbuffered /your/location/neko/bin/average_fields mean_p0.fld T0 mean_post.fld`
+where T0 is the initial time which has been used to compute mean_p* for RUN_1.
+-- stats
+`srun --unbuffered /your/location/neko/bin/average_fields stat_p0.fld T0 stat_post.fld`
+where T0 is the initial time which has been used to compute mean_p* for RUN_1.
+
+
+
+
+
+4) Compute Reynolds stress tensors and other statistical moments (see the list).
+`srun --unbuffered /your/location/neko/bin/postprocess_fluid_stats mesh_file.nmsh mean_post0.fld stat_post0.fld`
+
+5) We also provide a tool to average the resulting field in a homogenous direction in `bin/average_field_in_space`. The required arguments are shown if one runs the program without any input. Currently it requires the number of elements in the homogenous direction as an input argument, e.g. 
+`./average_field_in_space mesh.nmsh field.fld x 18 outfield.fld`
+if we want to average a field in the x direction on a mesh with 18 elements in x and output the averaged field in outfield0.nek5000.
+
+
+
+
+
+
+
diff --git a/doc/pages/user-guide/user-file.md b/doc/pages/user-guide/user-file.md
new file mode 100644
index 00000000000..66ec1993a3f
--- /dev/null
+++ b/doc/pages/user-guide/user-file.md
@@ -0,0 +1,952 @@
+# User File {#user-file}
+
+\tableofcontents
+
+The user file is a fortran file where the user can implement their own functions
+to extend the capabilities of the default Neko executable. The user file can be
+used for setting advanced initial/boundary conditions, source terms, I/O
+operations, and interactions with the Neko framework.
+
+## Compiling and running
+
+The user file is a regular Fortran `.f90` file that needs to be compiled with
+`makeneko`, located in the `bin` folder of your neko installation. To compile a
+user file `user.f90`, run:
+
+```bash
+makeneko user.f90
+```
+
+If everything goes well, you should observe the following output:
+
+```bash
+N E K O build tool, Version 0.7.99
+(build: 2024-02-13 on x86_64-pc-linux-gnu using gnu)
+
+Building user NEKO ... done!
+```
+
+Compiling your user file with `makeneko` will create a `neko` executable, which
+you will need to execute with your case file as an argument. For example, if
+your case file is called `user.case`:
+
+```bash
+./neko user.case
+```
+
+Or in parallel using MPI:
+
+```bash
+mpirun -n 8 ./neko user.case
+```
+
+
+## High-level structure
+
+The current high-level structure of the user file is shown below.
+
+```fortran
+module user
+  use neko
+  implicit none
+
+contains
+
+  ! Register user defined functions here (see user_intf.f90)
+  subroutine user_setup(u)
+    type(user_t), intent(inout) :: u
+
+
+  end subroutine user_setup
+
+end module user
+
+```
+
+The user file implements the `user` module. The `user` modules contains a
+subroutine named `user_setup`, which we use to interface the internal procedures
+defined in `src/common/user_intf.f90` with the subroutines that you will
+implement in your user file. Each user subroutine should be implemented under
+the `contains` statement, below `user_setup`.
+
+@note The above code snippet is the most basic code structure for the user file.
+Compiling it and running it would be equivalent to running the "vanilla" neko
+executable `bin/neko` in your local neko installation folder.
+
+## Default user functions
+
+The following user functions, if defined in the user file, will always be
+executed, regardless of what is set in the case file:
+
+- [user_init_modules](@ref user-file_init-and-final): For initializing user
+  variables and objects
+- [user_finalize_modules](@ref user-file_init-and-final): For finalizing, e.g
+  freeing variables and terminating processes
+- [user_check](@ref user-file_user-check): Executed at the end of every time step,
+  for e.g. computing and/or outputting user defined quantities.
+- [material_properties](@ref user-file_mat-prop): For computing and setting material
+  properties such as `rho`, `mu`, `cp` and `lambda`.
+- [user_mesh_setup](@ref user-file_user-mesh-setup): For applying a deformation to
+  the mesh element nodes, before the simulation time loop.
+- [scalar_user_bc](@ref user-file_scalar-bc): For applying boundary conditions to
+  the scalar, on all zones that are not already specified with uniform dirichlet
+  values e.g. `d=1`. For more information on the scalar, see the [relevant section of the case file](@ref case-file_scalar).
+
+### Initializing and finalizing {#user-file_init-and-final}
+
+The two subroutines `user_init_modules` and `user_finalize_modules` may be used
+to initialize/finalize any user defined variables, external objects, or
+processes. They are respectively executed right before/after the simulation time
+loop.
+
+```.f90
+
+  ! Initialize user variables or external objects
+  subroutine initialize(t, u, v, w, p, coef, params)
+    real(kind=rp) :: t
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(coef_t), intent(inout) :: coef
+    type(json_file), intent(inout) :: params
+
+    ! insert your initialization code here
+
+  end subroutine initialize
+
+  ! Finalize user variables or external objects
+  subroutine finalize(t, params)
+    real(kind=rp) :: t
+    type(json_file), intent(inout) :: params
+
+    ! insert your code here
+
+  end subroutine initialize
+```
+
+In the example above, the subroutines `initialize` and `finalize` contain the
+actual implementations. They must also be interfaced to the internal procedures
+`user_init_modules` and `user_finalize_modules` in `user_setup`:
+
+```.f90
+
+  ! Register user defined functions (see user_intf.f90)
+  subroutine user_setup(u)
+    type(user_t), intent(inout) :: u
+
+    u%user_init_modules => initialize
+    u%user_finalize_modules => finalize
+
+  end subroutine user_setup
+
+```
+
+@note `user_init_modules` and `user_finalize_modules` are independent of each
+other. Using one does not require the use of the other.
+
+### Computing at every time step {#user-file_user-check}
+
+The subroutine `user_check` is executed at the end of every time step. It can be
+used for computing and/or outputting your own variables/quantities at every time
+step.
+```.f90
+  ! This is called at the end of every time step
+  subroutine usercheck(t, tstep, u, v, w, p, coef, param)
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    type(coef_t), intent(inout) :: coef
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: param
+
+    ! insert code below
+
+  end subroutine usercheck
+
+```
+
+In the example above, the subroutine `usercheck` contains the actual
+implementation, and needs to be registered by adding:
+
+```.f90
+u%user_check => usercheck
+```
+
+to our `user_setup`.
+
+### Setting material properties {#user-file_mat-prop}
+
+`material_properties` allows for more complex computations and setting of
+various material properties, such as `rho`, `mu` for the fluid and `cp`,
+`lambda` for the scalar. The example below is taken from the
+[rayleigh-benard-cylinder
+example](https://github.com/ExtremeFLOW/neko/blob/564686b127ff75a362a06126c6b23e9b4e21879e/examples/rayleigh-benard-cylinder/rayleigh.f90#L22C1-L38C41).
+
+```.f90
+
+  subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params)
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    real(kind=rp), intent(inout) :: rho, mu, cp, lambda
+    type(json_file), intent(inout) :: params
+    real(kind=rp) :: Re
+
+    call json_get(params, "case.fluid.Ra", Ra)
+    call json_get(params, "case.scalar.Pr", Pr)
+
+    Re = sqrt(Ra / Pr)
+    mu = 1.0_rp / Re
+    lambda = mu / Pr
+    rho = 1.0_rp
+    cp = 1.0_rp
+  end subroutine set_material_properties
+
+```
+
+And of course not forgetting to register our function in `user_setup` by adding
+the following line:
+
+```.f90
+u%material_properties => set_material_properties
+```
+
+### Runtime mesh deformation {#user-file_user-mesh-setup}
+
+This user function allows for the modification of the mesh at runtime, by acting
+on the element nodes of the mesh specified in the case file. This function is
+only called once before the simulation time loop. The example below is taken
+from the [tgv
+example](https://github.com/ExtremeFLOW/neko/blob/a0613606360240e5059e65d6d98f4a57cf73e237/examples/tgv/tgv.f90#L27-L42).
+
+```.f90
+  ! Rescale mesh
+  subroutine user_mesh_scale(msh)
+    type(mesh_t), intent(inout) :: msh
+    integer :: i, p, nvert
+    real(kind=rp) :: d
+    d = 4._rp
+
+    ! original mesh has size 0..8 to be mapped onto -pi..pi
+    ! will be updated later to a method giving back the vertices of the mesh
+    nvert = size(msh%points)
+    do i = 1, nvert
+       msh%points(i)%x(1) = (msh%points(i)%x(1) - d) / d * pi
+       msh%points(i)%x(2) = (msh%points(i)%x(2) - d) / d * pi
+       msh%points(i)%x(3) = (msh%points(i)%x(3) - d) / d * pi
+    end do
+
+  end subroutine user_mesh_scale
+
+```
+
+The registering of the above function in `user_setup` should then be done as follows:
+
+```.f90
+u%user_mesh_setup => user_mesh_scale
+```
+
+### Scalar boundary conditions {#user-file_scalar-bc}
+
+This user function can be used to specify the scalar boundary values, on all
+zones that are not already set to uniform Dirichlet or Neumann values e.g. `d=1`
+or `n=0`. For more information on the scalar, see the 
+[relevant section of the case file](@ref case-file_scalar). The example below 
+sets the scalar boundary condition values to be a linear function of the `z` 
+coordinate (taken from the 
+[rayleigh-benard example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/rayleigh-benard-cylinder/rayleigh.f90#L41-L63)).
+
+```.f90
+
+  subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
+    real(kind=rp), intent(inout) :: s
+    real(kind=rp), intent(in) :: x
+    real(kind=rp), intent(in) :: y
+    real(kind=rp), intent(in) :: z
+    real(kind=rp), intent(in) :: nx
+    real(kind=rp), intent(in) :: ny
+    real(kind=rp), intent(in) :: nz
+    integer, intent(in) :: ix
+    integer, intent(in) :: iy
+    integer, intent(in) :: iz
+    integer, intent(in) :: ie
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    ! This will be used on all zones without labels
+    s = 1.0_rp - z
+
+  end subroutine set_scalar_boundary_conditions
+
+```
+
+This function will be called on all the points on the relevant boundaries. The
+registering of the above function in `user_setup` should be done as follows:
+
+```.f90
+u%scalar_user_bc => set_scalar_boundary_conditions
+```
+
+## Case-specific user functions
+
+As explained in the [case file](case-file.md) page, certain components of the
+simulation can be set to be user defined. These components and their associated
+user functions are:
+
+| Description                     | User function                                              | JSON Object in the case file     |
+|---------------------------------|------------------------------------------------------------|----------------------------------|
+| Fluid initial condition         | [fluid_user_ic](@ref user-file_user-ic)                        | `case.fluid.initial_condition`   |
+| Scalar initial condition        | [scalar_user_ic](@ref user-file_user-ic)                       | `case.scalar.initial_condition`  |
+| Fluid inflow boundary condition | [fluid_user_if](@ref user-file_fluid-user-if)                  | `case.fluid.inflow_condition`    |
+| Scalar boundary conditions      | [scalar_user_bc](@ref user-file_scalar-bc)                     | (user function is always called) |
+| Fluid source term               | [fluid_user_f_vector or fluid_user_f](@ref user-file_user-f)   | `case.fluid.source_terms`        |
+| Scalar source term              | [scalar_user_f_vector or scalar_user_f](@ref user-file_user-f) | `case.scalar.source_terms`       |
+| Fluid and Scalar boundary conditions | [field_dirichlet_update](@ref user-file_field-dirichlet-update) | `case.fluid.boundary_types` and/or `case.scalar.boundary_types`       |
+
+Note that `scalar_user_bc` is included for completeness but is technically not case-specific.
+
+### Fluid and Scalar initial conditions {#user-file_user-ic}
+
+Enabling user defined initial conditions for the fluid and/or scalar is done by
+setting the `initial_condition.type` to `"user"` in the relevant sections of the
+case file, `case.fluid` and/or `case.scalar`.
+
+```.json
+
+"case": {
+    "fluid": {
+        "initial_condition": {
+            "type": "user"
+        }
+    }
+}
+```
+
+See the relevant sections on the [fluid](@ref case-file_fluid-ic) and
+[scalar](@ref case-file_scalar) initial conditions in the 
+[case file page](@ref case-file) for more details.
+
+The associated user functions for the fluid and/or scalar initial conditions can
+then be added to the user file. An example for the fluid taken from the
+[advecting cone example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/advecting_cone/advecting_cone.f90#L48-L75),
+is shown below.
+
+```.f90
+
+  !> Set the advecting velocity field.
+  subroutine set_velocity(u, v, w, p, params)
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: x, y
+
+    do i = 1, u%dof%size()
+       x = u%dof%x(i,1,1,1)
+       y = u%dof%y(i,1,1,1)
+
+       ! Angular velocity is pi, giving a full rotation in 2 sec
+       u%x(i,1,1,1) = -y*pi
+       v%x(i,1,1,1) = x*pi
+       w%x(i,1,1,1) = 0
+    end do
+
+     if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine set_velocity
+
+```
+
+@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if
+running on GPUs, and the calls to `device_memcpy` to transfer data between the
+host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for
+more information on how this works.
+
+The same can be done for the scalar, with the example below also inspired from
+the 
+[advecting cone example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/advecting_cone/advecting_cone.f90#L14-L45):
+
+```.f90
+
+  !> User initial condition for the scalar
+  subroutine set_s_ic(s, params)
+    type(field_t), intent(inout) :: s
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: cone_radius, mux, muy, x, y, r, theta
+
+    ! Center of the cone
+    mux = 1
+    muy = 0
+
+    cone_radius = 0.5
+
+    do i = 1, s%dof%size()
+       x = s%dof%x(i,1,1,1) - mux
+       y = s%dof%y(i,1,1,1) - muy
+
+       r = sqrt(x**2 + y**2)
+       theta = atan2(y, x)
+
+       ! Check if the point is inside the cone's base
+       if (r > cone_radius) then
+         s%x(i,1,1,1) = 0.0
+       else
+         s%x(i,1,1,1) = 1.0 - r / cone_radius
+       endif
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine set_s_ic
+
+```
+
+@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if
+running on GPUs, and the calls to `device_memcpy` to transfer data between the
+host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for
+more information on how this works.
+
+We should also add of the following lines in `user_setup`, registering our user
+functions `set_velocity` and `set_s_ic` to be used as the fluid and scalar
+initial conditions:
+
+```.f90
+u%fluid_user_ic => set_velocity
+u%scalar_user_ic => set_s_ic
+```
+
+### Fluid inflow condition {#user-file_fluid-user-if}
+
+Enabling user defined inflow condition for the fluid is done by setting
+the `case.fluid.inflow_condition.type` to `"user"`:
+
+```.json
+
+"case": {
+    "fluid": {
+        "inflow_condition": {
+            "type": "user"
+        }
+    }
+}
+```
+
+See the [the relevant section](@ref case-file_fluid-if) in the 
+[case file page](@ref case-file.md) for more details. The associated user 
+function for the fluid inflow condition can then be added to the user file.
+An example inspired from the 
+[lid-driven cavity example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/lid/lid.f90#L29-L53)
+is shown below.
+
+```.f90
+ ! user-defined boundary condition
+  subroutine user_bc(u, v, w, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
+    real(kind=rp), intent(inout) :: u
+    real(kind=rp), intent(inout) :: v
+    real(kind=rp), intent(inout) :: w
+    real(kind=rp), intent(in) :: x
+    real(kind=rp), intent(in) :: y
+    real(kind=rp), intent(in) :: z
+    real(kind=rp), intent(in) :: nx
+    real(kind=rp), intent(in) :: ny
+    real(kind=rp), intent(in) :: nz
+    integer, intent(in) :: ix
+    integer, intent(in) :: iy
+    integer, intent(in) :: iz
+    integer, intent(in) :: ie
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    real(kind=rp) lsmoothing
+    lsmoothing = 0.05_rp    ! length scale of smoothing at the edges
+
+    u = step( x/lsmoothing ) * step( (1._rp-x)/lsmoothing )
+    v = 0._rp
+    w = 0._rp
+
+  end subroutine user_bc
+```
+
+We should also add of the following line in `user_setup`, registering our user
+function `user_bc` to be used as the fluid inflow conditions:
+
+```.f90
+u%fluid_user_if => user_bc
+```
+
+### Fluid and scalar source terms {#user-file_user-f}
+
+Enabling user defined source terms for the fluid and/or scalar is done by adding
+JSON Objects to the `case.fluid.source_terms` and/or `case.scalar.source_terms`
+lists.
+
+```.json
+
+"case": {
+    "fluid": {
+        "source_terms":
+        [
+            {
+                "type": "user_vector"
+            }
+        ]
+    }
+}
+```
+
+See the relevant sections on the [fluid](@ref case-file_fluid-source-term) and
+[scalar](@ref case-file_scalar) source terms in the [case file page](@ref case-file) for
+more details.
+
+@attention There are two variants of the source term user functions: `_user_f`
+and `_user_f_vector`. The former is called when setting `"user_pointwise"` as
+the source term type, while the latter requires the use of the `"user_vector"`
+keyword in the case file. The pointwise variant, `fluid_user_f` is not supported
+on GPUs. In general, `fluid_user_f_vector` is the prefered variant, and is the
+one which will be use in our examples below. The same applies for the scalar
+source term user functions.
+
+The associated user functions for the fluid and/or scalar source terms can then
+be added to the user file. An example for the fluid, taken from the
+[rayleigh-benard-cylinder example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard-cylinder/rayleigh.f90#L101C1-L121C44),
+is shown below.
+
+```.f90
+  ! Sets the z-component of the fluid forcing term = scalar
+  subroutine set_bousinesq_forcing_term(f, t)
+    class(fluid_user_source_term_t), intent(inout) :: f
+    real(kind=rp), intent(in) :: t
+
+    ! Retrieve u,v,w,s fields from the field registry
+    type(field_t), pointer :: u, v, w, s
+    u => neko_field_registry%get_field('u')
+    v => neko_field_registry%get_field('v')
+    w => neko_field_registry%get_field('w')
+    s => neko_field_registry%get_field('s')
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_rzero(f%u_d,f%dm%size())
+       call device_rzero(f%v_d,f%dm%size())
+       call device_copy(f%w_d,s%x_d,f%dm%size())
+    else
+       call rzero(f%u,f%dm%size())
+       call rzero(f%v,f%dm%size())
+       call copy(f%w,s%x,f%dm%size())
+    end if
+  end subroutine set_bousinesq_forcing_term
+```
+
+@note Notice the use of the `neko_field_registry` to retrieve the velocity and
+scalar fields. See [Registries](@ref user-file_tips_registries) for more information
+about registries in neko. @note Notice the use of the `NEKO_BCKND_DEVICE` flag,
+which will be set to 1 if running on GPUs, and the use of `device_` functions.
+See [Running on GPUs](@ref user-file_tips_running-on-gpus) for more information on
+how this works.
+
+The same can be done for the scalar, with the example below also taken from the
+[scalar_mms example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/scalar_mms/scalar_mms.f90#L28-L47):
+
+```.f90
+
+  !> Set source term
+  subroutine set_source(f, t)
+    class(scalar_user_source_term_t), intent(inout) :: f
+    real(kind=rp), intent(in) :: t
+    real(kind=rp) :: x, y
+    integer :: i
+
+    do i = 1, f%dm%size()
+       x = f%dm%x(i,1,1,1)
+       y = f%dm%y(i,1,1,1)
+
+       ! 0.01 is the viscosity
+       f%s(i,1,1,1) = cos(x) - 0.01 * sin(x) - 1.0_rp
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(f%s, f%s_d, f%dm%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine set_source
+
+```
+
+@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if
+running on GPUs, and the call to `device_memcpy` to transfer data between the
+host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for
+more information on how this works.
+
+We should also add of the following lines in `user_setup`, registering our user
+functions `set_boussinesq_forcing_term` and `set_source` to be used as the fluid
+and scalar source terms:
+
+```.f90
+u%fluid_user_f_vector => set_boussinesq_forcing_term
+u%scalar_user_f_vector => set_source
+```
+
+### Complex fluid and/or scalar boundary conditions {#user-file_field-dirichlet-update}
+
+This user function can be used to specify dirichlet boundary values for velocity
+components `u,v,w`, the pressure `p`, and/or the scalar `s`. This type of boundary 
+condition allows for time-dependent velocity profiles (currently
+not possible with a standard `user_inflow`) or non-uniform pressure profiles
+to e.g. impose an outlet pressure computed from another simulation.
+
+The selection of such boundary condition is done in the `case.fluid.boundary_types`
+array for the velocities and pressure, and in the `case.scalar.boundary_types` 
+array for the scalar. The [case file](@ref case-file_boundary-types) outlines which keywords can be used for such purpose:
+* `d_vel_u` for the `u` component of the velocity field
+* `d_vel_v` for the `v` component of the velocity field
+* `d_vel_w` for the `w` component of the velocity field
+* `d_pres` for the pressure field
+* `d_s` for the scalar field (cannot be combined with the above)
+
+The separator `"/"` can be used to combine the keywords related to `u,v,w` and `p`.
+For example, if one wants to only apply `u,v` and `p` values on a given boundary, one
+should use `"d_vel_u/d_vel_v/d_pres"`. In this case, the `w` component would be 
+left untouched (not zeroed!). An example of case file from the 
+[cyl-boundary-layer example](https://github.com/ExtremeFLOW/neko/blob/develop/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case) is shown below.
+
+```.json
+
+"case": {
+    "fluid": {
+        "boundary_types": [
+          "d_vel_u/d_vel_v/d_vel_w", 
+          "d_vel_u/d_vel_v/d_vel_w/d_pres",
+ 	      "sym",
+          "w",
+          "on", 
+          "on", 
+          "w"
+        ]
+    }
+    "scalar": {
+        "boundary_types": [
+          "d_s",
+          "d_s",
+          "",
+          "",
+          "",
+          ""
+        ]
+    }
+}
+```
+
+In this example, we indicate in `case.fluid.boundary_types` that we would like
+to apply a velocity profile on all three components `u,v,w` on the boundary 
+number 1 (in this case, the inlet boundary). On boundary number 2 (the outlet 
+boundary), we also indicate the three velocity components, with the addition 
+of the pressure. In `case.scalar.boundary_types`, we indicate the same for the
+scalar on boundaries 1 and 2 (inlet and outlet).
+
+@attention Do not confuse the `d_s` and `d=x` boundary conditions for the scalar.
+The latter is to be used to specify a constant Dirichlet value `x` along the 
+relevant boundary.
+
+Once the appropriate boundaries have been identified and labeled,
+the user function `field_dirichlet_update` should be used to compute and
+apply the desired values to our velocity/pressure/scalar field(s). The prefix
+"field" in `field_dirichlet_update` refers to the fact that 
+a list of entire fields is passed down for the user to edit. 
+
+The fields that are passed down are tied to the `boundary_types` keywords passed in
+the case file. The function `field_dirichlet_update` is then called internally, 
+one time in the `fluid` solver and one time in the `scalar` solver (if enabled).
+
+Finally, depending on which boundary labels were input, the fields given to the user
+are copied onto the solution field boundaries. 
+
+The header of the user function is given in the code snippet below.
+
+```.f90
+  subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver)
+    type(field_list_t), intent(inout) :: field_bc_list
+    type(bc_list_t), intent(inout) :: bc_bc_list
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    character(len=*), intent(in) :: which_solver
+```
+
+The arguments and their purpose are as follows:
+
+* `field_bc_list` is the list of the field that can be edited. It is a list 
+of `field_t` objects.
+  * The field `i` contained in `field_bc_list` is accessed using 
+  `field_bc_list%fields(i)%f` and will refer to a `field_t` object. 
+  * If `which_solver = "fluid"`, it will contain the 4 fields `u,v,w,p`. They
+  are retrieved in that order in `field_bc_list`, i.e. `u` corresponds to
+  `field_bc_list%fields(1)%f`, etc.
+  * If `which_solver = "scalar"`, it will only contain the scalar field `s`. 
+ 
+* `bc_bc_list` contains a list of the `bc_t` objects to help access the 
+  boundary indices through the boundary `mask`.
+  * The boundary `i` contained in `bc_bc_list` is accessed with 
+  `bc_bc_list%bc(i)%bcp`.
+  * The boundary mask of the `i`-th `bc_t` object contained in `bc_bc_list` is accessed
+  with `bc_bc_list%bc(i)%bcp%msk`. It contains the linear indices of each GLL point on 
+  the `i`-th boundary facets.
+  @note `msk(0)` contains the size of the array. The first boundary index is `msk(1)`.
+  * If `which_solver = "fluid"`, it will contain the 4 `bc_t` objects 
+  corresponding to `d_vel_u`, `d_vel_v`, `d_vel_w`, and `d_pres`. They
+  can be retrieved in that order, in the same way as for `field_bc_list`.
+  * If `which_solver = "scalar"`, it will only the 1 `bc_t` object 
+  corresponding to `d_s`.
+ 
+* `coef` is a `coef_t` object containing various numerical parameters and 
+variables, such as the polynomial order `lx`, derivatives, facet normals...
+* `t`, `tstep` are self-explanatory.
+* `which_solver` takes the value `"fluid"` when the user function is called in
+the fluid solver. It takes the value `"scalar"` when it is called in the scalar
+solver.
+
+Links to the documentation to learn more about what the types mentioned above
+contain and how to use them: field::field_t, bc::bc_t, coefs::coef_t.
+
+The user function should be registered in `user_setup` with the following line:
+
+```.f90
+u%user_dirichlet_update => dirichlet_update
+```
+
+A very simple example illustrating the above is shown below, which is taken from the 
+[cyl_boundary_layer example](https://github.com/ExtremeFLOW/neko/blob/feature/field_bcs/examples/cyl_boundary_layer/cyl_bl.f90)
+
+```.f90
+  ! Initial example of using user specified dirichlet bcs
+  ! Note: This subroutine will be called two times, once in the fluid solver, and once
+  ! in the scalar solver (if enabled).
+  ! We apply u = (1,0,0) at the inlet/outlet, p = -1 at the outlet, and s(y,z) = sin(y)*sin(z)
+  ! at the inlet/outlet.
+  subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver)
+    type(field_list_t), intent(inout) :: field_bc_list
+    type(bc_list_t), intent(inout) :: bc_bc_list
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    character(len=*), intent(in) :: which_solver
+
+    integer :: i
+    real(kind=rp) :: y,z
+
+    ! Only do this at the first time step since our BCs are constants.
+    if (tstep .ne. 1) return
+
+    ! Check that we are being called by `fluid`
+    if (trim(which_solver) .eq. "fluid") then
+
+       associate(u => field_bc_list%fields(1)%f, &
+            v => field_bc_list%fields(2)%f, &
+            w => field_bc_list%fields(3)%f, &
+            p => field_bc_list%fields(4)%f)
+
+         !
+         ! Perform operations on u%x, v%x, w%x and p%x here
+         ! Note that we are checking if fields are allocated. If a
+         ! boundary type only contains e.g. "d_vel_u/d_pres", the fields
+         ! v%x and w%x will not be allocated.
+         !
+         ! Here we are applying very simple uniform boundaries (u,v,w) = (1,0,0)
+         ! and pressure outlet of p = -1
+         !
+         if (allocated(u%x)) u = 1.0_rp
+         if (allocated(v%x)) v = 0.0_rp
+         if (allocated(w%x)) w = 0.0_rp
+         if (allocated(p%x)) p = -1.0_rp
+
+       end associate
+
+    ! Check that we are being called by `scalar`
+    else if (trim(which_solver) .eq. "scalar") then
+
+       associate( s => field_bc_list%fields(1)%f, &
+            s_bc => bc_bc_list%bc(1)%bcp)
+
+         !
+         ! Perform operations on the scalar field here
+         ! Note that we are checking if the field is allocated, in
+         ! case the boundary is empty.
+         !
+         if (allocated(s%x)) then
+
+            do i = 1, s_bc%msk(0)
+               y = s_bc%dof%y(s_bc%msk(i), 1, 1, 1)
+               z = s_bc%dof%z(s_bc%msk(i), 1, 1, 1)
+               s%x(s_bc%msk(i), 1, 1, 1) = sin(y)*sin(z)
+            end do
+
+         end if
+       end associate
+
+    end if
+
+  end subroutine dirichlet_update
+```
+
+This example is applying constant dirichlet values at the selected
+boundaries for the velocity components and presure. The scalar is applied a 
+function `s(y,z) = sin(y)*sin(z)` to demonstrate the usage of boundary masks. 
+
+@attention The notation `u = 1.0_rp` is only possible because of the overloading of the 
+assignement operator `=` in `field_t`. In general, a field's array should be 
+accessed and modified with `u%%x`. 
+
+Note that we are only applying our boundary values at the first timestep,
+which is done simply with the line `if (tstep .ne. 1) return`. This is a trick
+that can be used for time independent boundary profiles that require 
+some kind of time consuming operation like interpolation or reading from a file,
+which would add overhead if executed at every time step.
+
+Observe that we always check if the fields are allocated before manipulating
+them. This is to prevent accidental memory access if only part of the velocity
+components or pressure are given in `case.fluid.boundary_types`. Fields in the 
+lists are only allocated if they are present in the case file.For
+example, if we removed the `d_pres` condition in the JSON case file code snippet
+above, the pressure field for our boundary condition would not be allocated (
+in the example above, `allocated(p%x)` would never be `true`). `"boundary_types": ["d_vel_u", "d_vel_v"]` will allocate the two first 
+fields in `field_bc_list`, which is the same behaviour as
+`"boundary_types": ["d_vel_u/d_vel_v", ""]`.
+
+@attention All the rules for [Running on GPUs](@ref user-file_tips_running-on-gpus)
+apply when working on field arrays. Use `device_memcpy` to make sure the device 
+arrays are also updated.
+
+## Additional remarks and tips
+
+### Running on GPUs {#user-file_tips_running-on-gpus}
+
+When running on GPUs, special care must be taken when using certain user
+functions. The short explanation is that the device (GPU) has its own memory and
+cannot directly access the memory on the host (CPU). This means that data and
+more specifically arrays must be copied manually from the host to the device
+(see device::device_memcpy).
+
+@attention In some cases, data transfer via `device_memcpy` is avoidable. Neko
+has some device math functions implemented that operate directly on device
+arrays. If you can decompose whatever operations you are performing in a user
+function into a set of instructions from the `math` module (e.g. `cadd`,
+`cfill`, `sub2`, ...), you may use the corresponding `device_math` functions to
+[offload work to the GPU](@ref accelerators_offload-work). See the 
+[fluid forcing code snippet](@ref user-file_user-f) for a simple example. For 
+more advanced examples, see the 
+[rayleigh-benard example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard/rayleigh.f90#L96-119)
+or the 
+[tgv example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/tgv/tgv.f90#L146-172).
+
+To illustrate this, let us have a look at the 
+[fluid initial condition code snippet](@ref user-file_user-ic):
+
+```.f90
+
+  !> Set the advecting velocity field.
+  subroutine set_velocity(u, v, w, p, params)
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: x, y
+
+    !
+    ! 1. Set the initial condition in fields u%x, v%x, w%x
+    !
+    do i = 1, u%dof%size()
+       x = u%dof%x(i,1,1,1)
+       y = u%dof%y(i,1,1,1)
+
+       ! Angular velocity is pi, giving a full rotation in 2 sec
+       u%x(i,1,1,1) = -y*pi
+       v%x(i,1,1,1) = x*pi
+       w%x(i,1,1,1) = 0
+    end do
+
+    !
+    ! 2. Copy the data set in u%x, v%x, w%x to the device arrays
+    ! u%x_d, v%x_d, w%x_d.
+    !
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine set_velocity
+
+```
+
+The code above is used to set the fluid initial condition, by specifying the
+values of fields `u,v,w` (and `p`) at all points in the domain. Notice that we
+have divided the above code into two parts.
+
+In the first part, we set the velocity components `u=-y*pi*`, `v=x*pi*`, and
+`w=0`, which updates the velocity field arrays `u%%x, v%%x, w%%x` allocated on
+the **host (CPU)**. If we were to run on GPUs, these lines of code would only
+act on the velocity arrays on the host (CPU), leaving the device (GPU) arrays
+untouched.
+
+We take care of this in the second part, for all three velocity arrays. To
+update the device (GPU) arrays, we use `device_memcpy` to copy the data
+contained in a host (CPU) array to a device (GPU) array. Looking at the details
+of the `device_memcpy` calls, we note the following:
+- Device arrays are refered to by appending the suffix `_d` to the host array
+  variable name (e.g. `u%%x` and `u%%x_d`). This is the standard in Neko.
+- We specify the direction of the data movement with the flag `HOST_TO_DEVICE`.
+  Other flags can also be used to move data from device to host
+  (`DEVICE_TO_HOST`) or device to device (`DEVICE_TO_DEVICE`). See the
+  [accelerators page](@ref accelerators_data-transfer) for more details on this.
+- The `sync` argument is a non-optional argument which dictates wether or not to
+  perform the data transfer synchronously.
+
+@attention Use asynchronous data transfers at your own risk! If you are unsure,
+use `sync = .true.` as a starting point.
+
+Finally, observe that we use the flag `NEKO_BCKND_DEVICE` to check if we are
+indeed running on GPUs. In that case, `NEKO_BCKND_DEVICE` would be equal to 1.
+
+### Registries {#user-file_tips_registries}
+
+Neko uses the concept of `registry` as a practical way to retrieve fields and
+point zones anywhere in the user file.
+
+The field registry `neko_field_registry` is often used in user functions where
+certain fields are not directly accessible as arguments. One can retrieve any
+field in the registry by its `name` with `neko_field_registry%%get_field(name)`.
+Default fields that are added to the registry are `u,v,w,p` and `s` if running
+with the scalar enabled. For a practical example of usage, see the 
+[rayleigh benard example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard/rayleigh.f90#L102-L105)
+
+Other fields may be added to the registry by various simulation components. For
+example:
+- If running with `simulation_components.vorticity` enabled, the fields
+  `omega_x, omega_y, omega_z` will be accessible in the registry.
+- If running with `simulation_components.lambda2` enabled, the field `lambda2`
+  will be accessible in the registry.
+
+@note You can add your own fields to the registry with `neko_field_registry%%add_field` (see field_registry::add_field).
+
+The point zone registry, `neko_point_zone_registry`, can be used to retrieve
+pointers to `point_zone_t` objects defined in the case file. See 
+[using point zones](#point-zones_using-point-zones) for detailed instructions.
diff --git a/examples/TS_channel/README.md b/examples/TS_channel/README.md
new file mode 100644
index 00000000000..634e5c0aa05
--- /dev/null
+++ b/examples/TS_channel/README.md
@@ -0,0 +1,10 @@
+## Tollmien-Schlichting (TS) wave test in channel flow.
+This test refers to [1].
+The temporal instability of TS wave in a channel flow at Re_c=5000 is tested in this example.
+Domain Size: (x, y, z) = (2pi/1.12, 2, 2pi/2.1), where x and z are two homogeneous derections.
+A recommended number of elements: (x, y, z) = (36, 36, 36) for decaying case and (16, 16, 16) for transitional case.
+A 2D (k_z=0) TS wave and two 3D (k_z=\pm 1) oblique TS waves are settled up as the initial condition in a channel with 1 period on streamwise and spanwise direction.
+One could use contrib/map_to_equidistant_1d twice to interpolate the fields onto a mesh whose grid points are equidistant in x- and z- directions. And then Fourier transform could be performed to get the amplitude of the TS wave.
+
+Reference:
+[1] Schlatter, P.C., 2005. Large-eddy simulation of transition and turbulence in wall-bounded shear flow (Doctoral dissertation, ETH Zurich).
diff --git a/examples/TS_channel/TS_channel.case b/examples/TS_channel/TS_channel.case
new file mode 100644
index 00000000000..10d35027374
--- /dev/null
+++ b/examples/TS_channel/TS_channel.case
@@ -0,0 +1,49 @@
+{
+"version": 1.0,
+"case": {
+    "mesh_file": "box.nmsh",
+    "output_at_end": false,
+    "output_boundary": false,
+    "output_checkpoints": false,
+    "output_precision": "double",
+    "checkpoint_control": "simulationtime",
+    "checkpoint_value": 100,
+    "end_time": 200,
+    "timestep": 0.02,
+    "constant_cfl": 0.4,
+    "numerics": {
+        "time_order": 3,
+        "polynomial_order": 7,
+        "dealias": true
+    },
+    "fluid": {
+        "scheme": "pnpn",
+        "Re": 5000,
+        "initial_condition": {
+            "type": "user"
+        },
+        "velocity_solver": {
+            "type": "cg",
+            "preconditioner": "jacobi",
+            "projection_space_size": 0,
+            "absolute_tolerance": 1e-7,
+            "max_iterations": 800
+        },
+        "pressure_solver": {
+            "type": "gmres",
+            "preconditioner": "hsmg",
+            "projection_space_size": 0,
+            "absolute_tolerance": 1e-5,
+            "max_iterations": 800
+        },
+        "flow_rate_force": {
+            "direction": 1,
+            "value": 0.66666666666666666666,
+            "use_averaged_flow": true
+        },
+        "boundary_types": ["", "", "w", "w"],
+        "output_control": "simulationtime",
+        "output_value": 1
+    }
+}
+}
\ No newline at end of file
diff --git a/examples/TS_channel/TS_channel.f90 b/examples/TS_channel/TS_channel.f90
new file mode 100644
index 00000000000..c5642468978
--- /dev/null
+++ b/examples/TS_channel/TS_channel.f90
@@ -0,0 +1,312 @@
+module user
+  use neko
+  use ieee_arithmetic, only: ieee_is_nan
+
+  implicit none
+
+  integer, parameter :: num_rows = 1500 ! resolution of TS wave in Cheb points
+  integer, parameter :: num_columns = 7 ! y, real and imag part of TS wave in Cheb points
+  integer, parameter :: num_ygll = 8*8 ! number of GLL points in the y direction
+  real (kind=rp) :: Re = 5000.0_rp
+  real (kind=rp), parameter :: pi_rp = 4.0_rp * atan (1.0_rp)
+contains
+
+  ! Register user defined functions (see user_intf.f90)
+  subroutine user_setup(usr)
+    type(user_t), intent(inout) :: usr
+
+    usr%user_mesh_setup => user_mesh_scale
+    usr%fluid_user_ic => user_ic
+    
+  end subroutine user_setup
+
+
+  ! Rescale mesh
+  ! Original mesh size: (2.0, 2.0, 1.0).
+  ! New mesh can easily be genreated with genmeshbox.
+  subroutine user_mesh_scale(msh)
+    type(mesh_t), intent(inout) :: msh
+    integer :: i
+
+    do i = 1, size(msh%points)
+       msh%points(i)%x(1) = pi_rp*msh%points(i)%x(1)/1.12_rp
+       msh%points(i)%x(3) = pi_rp*msh%points(i)%x(3)*2.0_rp/2.1_rp
+    end do
+
+  end subroutine user_mesh_scale
+
+  ! User defined initial condition
+  subroutine user_ic(u, v, w, p, params)
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: params
+    integer :: i, j, i_y
+    real(kind=rp) :: uvw(3)
+
+    real(kind=rp), dimension(num_rows, num_columns):: data_mode_cheb_2D
+    real(kind=rp), dimension(num_rows, num_columns):: data_mode_cheb_3D
+    real(kind=rp) :: y_GLC(num_rows)
+    real(kind=rp) :: TS2D_GLC(num_rows,num_columns-1)
+    real(kind=rp) :: TS3D_GLC(num_rows,num_columns-1)
+    integer :: ios
+
+    real(kind=rp) :: x_fixed, z_fixed
+    real(kind=rp), dimension(num_ygll) :: y_GLL
+    real(kind=rp) :: TS2D_GLL(num_ygll,num_columns-1)
+    real(kind=rp) :: TS3D_GLL(num_ygll,num_columns-1)
+
+    real(kind=rp) :: ur_2D, ui_2D, vr_2D, vi_2D
+    real(kind=rp) :: ur_3D, ui_3D, vr_3D, vi_3D, wr_3D, wi_3D
+
+    ! data reading
+    open(unit=10, file='TSwave_cheb_2D.csv', status='old', action='read', iostat=ios)
+       if (ios /= 0) then
+          print *, "2D TS wave: Error opening the file!"
+          stop
+       end if
+       do i = 1, num_rows
+          read(10,*) (data_mode_cheb_2D(i, j), j = 1, num_columns)
+       end do
+    close(10)
+    y_GLC = data_mode_cheb_2D(:,1)
+    TS2D_GLC = data_mode_cheb_2D(:,2:num_columns)
+
+    open(unit=10, file='TSwave_cheb_3D.csv', status='old', action='read', iostat=ios)
+       if (ios /= 0) then
+          print *, "3D TS wave: Error opening the file!"
+          stop
+       end if
+       do i = 1, num_rows
+          read(10,*) (data_mode_cheb_3D(i, j), j = 1, num_columns)
+       end do
+    close(10)
+    TS3D_GLC = data_mode_cheb_3D(:,2:num_columns)
+
+    ! alternative of point zone
+    i_y = 1
+    x_fixed = u%dof%x(1,1,1,1)
+    z_fixed = u%dof%z(1,1,1,1)
+    
+    ! initialize y_GLL
+    do i = 1, num_ygll
+       y_GLL(i) = 0.0
+    end do
+
+    do i = 1, u%dof%size()
+       if (.not. in_array(u%dof%y(i,1,1,1),y_GLL)) then
+          y_GLL(i_y) = u%dof%y(i,1,1,1)
+          i_y = i_y +1
+       end if
+    end do
+
+    do i = 1, num_columns-1
+       do j = 1, num_ygll
+          TS2D_GLL(j,i) = GLC_GLL_interp(TS2D_GLC(:,i),y_GLC,y_GLL(j))
+          TS3D_GLL(j,i) = GLC_GLL_interp(TS3D_GLC(:,i),y_GLC,y_GLL(j))
+       end do
+    end do
+
+    do i = 1, u%dof%size()
+       ur_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,1))
+       ui_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,2))
+       vr_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,3))
+       vi_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,4))
+       ur_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,1))
+       ui_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,2))
+       vr_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,3))
+       vi_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,4))
+       wr_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,5))
+       wi_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,6))
+       uvw = channel_ic(u%dof%x(i,1,1,1),u%dof%y(i,1,1,1),u%dof%z(i,1,1,1), &
+                        ur_2D,ui_2D,vr_2D,vi_2D, &
+                        ur_3D,ui_3D,vr_3D,vi_3D,wr_3D,wi_3D)
+       u%x(i,1,1,1) = uvw(1)
+       v%x(i,1,1,1) = uvw(2)
+       w%x(i,1,1,1) = uvw(3)
+    end do
+
+  end subroutine user_ic
+
+  function in_array(y,y_list) result(is_in)
+    logical :: is_in
+    real(kind=rp) :: y, y_list(num_ygll)
+    integer :: i
+    real(kind=rp) :: tol
+
+    tol = 1e-7_rp
+    is_in = .false.
+
+    do i = 1, num_ygll
+       if (abs(y-y_list(i)).le.tol) then
+          is_in = .true.
+          exit
+       end if
+    end do 
+
+  end function in_array
+
+  function pick_pt(y_target,y_source,Pt_source) result(Pt_target)
+    real(kind=rp) :: y_target, Pt_target
+    real(kind=rp), dimension(num_ygll) :: y_source, Pt_source
+    real(kind=rp) :: tol
+    integer :: i
+    logical :: found
+    
+    tol = 1e-14_rp
+    found = .false.
+    do i = 1, num_ygll
+       if (abs(y_target-y_source(i)) .le. tol) then
+          Pt_target = Pt_source(i)
+          found = .true.
+          exit
+       end if
+    end do
+    
+    if (.not. found) then
+       print *, 'tolerence too small for picking points!!!!!!'
+    end if
+
+  end function pick_pt
+
+  function channel_ic(x, y, z, &
+                      ur_2D,ui_2D,vr_2D,vi_2D, &
+                      ur_3D,ui_3D,vr_3D,vi_3D,wr_3D,wi_3D) result(uvw)
+    real(kind=rp) :: x, y, z
+    real(kind=rp) :: uvw(3)
+    real(kind=rp) :: ub
+
+    real(kind=rp) :: ur_2D, ui_2D, vr_2D, vi_2D
+    real(kind=rp) :: ur_3D, ui_3D, vr_3D, vi_3D, wr_3D, wi_3D
+    real(kind=rp) :: TS_amp_2D, TS_amp_3D, alpha, beta
+
+    complex(kind=rp) :: spa_osci_2D
+    complex(kind=rp) :: u_mode_2D, v_mode_2D
+    real(kind=rp) :: u_pert_TS_2D, v_pert_TS_2D
+
+    complex(kind=rp) :: u_mode_3D, v_mode_3D, w_mode_3D
+    complex(kind=rp) :: spa_osci_3D_p, spa_osci_3D_n
+    real(kind=rp) :: u_pert_TS_3D, v_pert_TS_3D, w_pert_TS_3D
+    
+    ! amplitude for decaying TS wave:
+    ! TS_amp_2D = 1e-6_rp
+    ! TS_amp_3D = 0.0_rp
+
+    ! amplitude for secondary instability TS wave:
+    TS_amp_2D = 3e-2_rp
+    TS_amp_3D = 1e-4_rp
+
+    alpha = 1.12_rp
+    beta = 2.1_rp
+
+    spa_osci_2D = exp((0.0_rp,1.0_rp)*alpha*x)
+    u_mode_2D = cmplx(ur_2D,ui_2D,rp)
+    v_mode_2D = cmplx(vr_2D,vi_2D,rp)
+
+    u_pert_TS_2D = TS_amp_2D*real(u_mode_2D*spa_osci_2D,rp)
+    v_pert_TS_2D = TS_amp_2D*real(v_mode_2D*spa_osci_2D,rp)
+
+    spa_osci_3D_p = exp((0.0_rp,1.0_rp)*alpha*x+(0.0_rp,1.0_rp)*beta*z)
+    spa_osci_3D_n = exp((0.0_rp,1.0_rp)*alpha*x-(0.0_rp,1.0_rp)*beta*z)
+    u_mode_3D = cmplx(ur_3D,ui_3D,rp)
+    v_mode_3D = cmplx(vr_3D,vi_3D,rp)
+    w_mode_3D = cmplx(wr_3D,wi_3D,rp)
+
+    u_pert_TS_3D = TS_amp_3D*real(u_mode_3D*spa_osci_3D_p,rp)
+    u_pert_TS_3D = u_pert_TS_3D + TS_amp_3D*real(u_mode_3D*spa_osci_3D_n,rp)
+    v_pert_TS_3D = TS_amp_3D*real(v_mode_3D*spa_osci_3D_p,rp)
+    v_pert_TS_3D = v_pert_TS_3D + TS_amp_3D*real(v_mode_3D*spa_osci_3D_n,rp)
+    w_pert_TS_3D = TS_amp_3D*real(w_mode_3D*spa_osci_3D_p,rp)
+    w_pert_TS_3D = w_pert_TS_3D + TS_amp_3D*real(-w_mode_3D*spa_osci_3D_n,rp)
+
+    ub = 1.0_rp-y*y
+
+    uvw(1)  = ub + u_pert_TS_2D + u_pert_TS_3D
+    uvw(2)  =      v_pert_TS_2D + v_pert_TS_3D
+    uvw(3)  =                     w_pert_TS_3D
+
+  end function channel_ic
+
+  ! The raw data in .csv file is on GLC points,
+  ! hence a interpolation from GLC points to GLL points is performed.
+  function GLC_GLL_interp(f_GLC,x_GLC,x_GLL) result(f_GLL)
+    real(kind=rp) :: x_GLL, f_GLL
+    real(kind=rp), dimension(num_rows) :: f_GLC, x_GLC
+    real(kind=rp), dimension(num_rows) :: chi, psi
+    real(kind=rp) :: xn, xs, xe, xnc, dchebyshev 
+    integer :: i, N, cj
+    logical :: is_nan
+
+    N = num_rows - 1
+    xn = x_GLL
+    xs = x_GLC(1)
+    xe = x_GLC(N+1)
+    xnc = ( 2.0_rp*xn - (xs+xe) )/ &
+          ( xe - xs)
+    dchebyshev = eval_dchebyshev(xnc,N)
+    do i = 1, N+1
+       chi(i) = ( 2.0_rp*x_GLC(i) - (xs+xe) )/ &
+                ( xe - xs)
+
+       if (i .eq. 1) then
+          cj = 2.0_rp
+       else if (i .eq. N+1) then
+          cj = 2.0_rp
+       else
+          cj = 1.0_rp
+       end if
+
+       psi(i) = ((-1.0_rp)**(i) * (1.0_rp-xnc*xnc) * dchebyshev)/ &
+                (cj * N*N * (xnc-chi(i)))
+
+       is_nan = ieee_is_nan(psi(i))
+       if (is_nan .or. psi(i) .gt. 1e+3) then
+          psi(i) = 1.0_rp
+       end if
+
+    end do
+
+    f_GLL = 0.0_rp
+    do i = 1, N+1
+       f_GLL = f_GLL + psi(i)*f_GLC(i)
+    end do
+
+  end function GLC_GLL_interp
+
+  ! Evaluate the derivative of the Chebyshev polynomials of the first kind
+  function eval_dchebyshev(x,N) result(dT_Nx)
+    real(kind=rp) :: x, dT_Nx, tmp
+    integer :: N, j, nn ! N should be num_rows - 1
+    real(kind=rp), dimension(N+1) :: dT
+
+    dT(1) = 0.0_rp
+    dT(2) = 1.0_rp
+
+    do j = 3, N+1
+       nn = j - 2
+       tmp = eval_chebyshev(x,nn)
+       if (nn .eq. 1) then
+          dT(j) = 2.0_rp*(nn+1.0_rp)*tmp
+       else
+          dT(j) = 2.0_rp*(nn+1.0_rp)*tmp + (nn+1.0_rp)/(nn-1.0_rp)*dT(j-2)
+       end if
+    end do
+
+    dT_Nx = dT(N+1)
+  end function eval_dchebyshev
+  
+  ! Evaluate the Chebyshev polynomials of the first kind
+  function eval_chebyshev(x,N) result(T_Nx)
+    real(kind=rp) :: x, T_Nx
+    integer :: N, j ! N should be num_rows - 1
+    real(kind=rp), dimension(N+1) :: T
+    T(1) = 1.0_rp
+    T(2) = x
+    do j = 3, N+1
+       T(j) = 2.0_rp*x*T(j-1) - T(j-2)
+    end do
+    T_Nx = T(N+1)
+  end function eval_chebyshev
+
+end module user
diff --git a/examples/TS_channel/TSwave_cheb_2D.csv b/examples/TS_channel/TSwave_cheb_2D.csv
new file mode 100644
index 00000000000..c6f3065c2d1
--- /dev/null
+++ b/examples/TS_channel/TSwave_cheb_2D.csv
@@ -0,0 +1,1500 @@
+-1.000000000000000000e+00,4.584758335742073160e-09,-6.605710750344088278e-09,-0.000000000000000000e+00,-0.000000000000000000e+00,-0.000000000000000000e+00,-0.000000000000000000e+00
+-9.999978038281271964e-01,-4.015715757417417654e-05,4.569989657188079451e-05,5.619723763318393232e-11,4.938177670084886477e-11,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.999912153221552913e-01,-1.606457918596924050e-04,1.827882689522557561e-04,8.992473093557475700e-10,7.902535884006655202e-10,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.999802345110231361e-01,-3.614719394510981679e-04,4.111652352468359657e-04,4.551750322530005835e-09,4.000872470589427502e-09,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.999648614429623716e-01,-6.426530441740362564e-04,7.306753622148200612e-04,1.438212443575207382e-08,1.264525925619368440e-08,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.999450961854965403e-01,-1.004213472097001576e-03,1.141101164334928746e-03,3.510073438960935335e-08,3.087358290910671271e-08,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.999209388254417519e-01,-1.446184279505558770e-03,1.642163282546146722e-03,7.275447708838917253e-08,6.402269644946420233e-08,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.998923894689052405e-01,-1.968603028368624917e-03,2.233520585655198072e-03,1.347198280147815615e-07,1.186168143886666890e-07,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.998594482412856976e-01,-2.571513590167696928e-03,2.914770424059085277e-03,2.296943538311653487e-07,2.023683297837774581e-07,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.998221152872722728e-01,-3.254965803044406868e-03,3.685448874605150912e-03,3.676867012547371414e-07,3.241787592500070484e-07,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.997803907708440185e-01,-4.019015250155398913e-03,4.545030984689757934e-03,5.600050670667448155e-07,4.941398079966697259e-07,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.997342748752695574e-01,-4.863722874374213669e-03,5.492931154011141022e-03,8.192441633677707300e-07,7.235345479748266059e-07,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.996837678031056384e-01,-5.789154538022578227e-03,6.528503416356815081e-03,1.159270685870362062e-06,1.024838687943126465e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.996288697761966935e-01,-6.795380694260139999e-03,7.651041913961972862e-03,1.595207300688258317e-06,1.411721933535674662e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.995695810356738376e-01,-7.882475804836575756e-03,8.859781267889960815e-03,2.143415162548993643e-06,1.899049411149232100e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.995059018419534258e-01,-9.050517988285667823e-03,1.015389708960214542e-02,2.821474980010050348e-06,2.502883152689057148e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.994378324747364983e-01,-1.029958830017454509e-02,1.153250644820765121e-02,3.648166643228454403e-06,3.240483609283829179e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.993653732330070039e-01,-1.162977037689127621e-02,1.299466849679906978e-02,4.643447436725973307e-06,4.130311192300953368e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.992885244350309115e-01,-1.304114956475402320e-02,1.453938493992837540e-02,5.828428851008756323e-06,5.192027803534321687e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.992072864183545455e-01,-1.453381257942183181e-02,1.616560076609400640e-02,7.225352016233520099e-06,6.446498353873764760e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.991216595398032529e-01,-1.610784643491034590e-02,1.787220480932491182e-02,8.857561781682559340e-06,7.915792225669524219e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.990316441754796273e-01,-1.776333832410204386e-02,1.965803053563370401e-02,1.074947946040248018e-05,9.623184683296080429e-06,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.989372407207620652e-01,-1.950037429246448059e-02,2.152185671087786592e-02,1.292657427116070628e-05,1.159315819827774761e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.988384495903031013e-01,-2.131903883009049483e-02,2.346240814931219165e-02,1.541533348996891026e-05,1.385140363593812098e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.987352712180274095e-01,-2.321941403472903287e-02,2.547835659641024472e-02,1.824323135229658467e-05,1.642482133433034830e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.986277060571295827e-01,-2.520157880929160474e-02,2.756832152734582653e-02,2.143869673244703586e-05,1.934152200916413204e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.985157545800730228e-01,-2.726560783720866077e-02,2.973087096329946055e-02,2.503107961704672488e-05,2.263082745833955740e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.983994172785870536e-01,-2.941157105189550966e-02,3.196452251818065332e-02,2.905061641899702067e-05,2.632327105685760822e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.982786946636650338e-01,-3.163953246180171985e-02,3.426774429667543004e-02,3.352839416655188442e-05,3.045059801067757604e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.981535872655622477e-01,-3.394954960057282312e-02,3.663895576994501135e-02,3.849631357931886176e-05,3.504576533468219926e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.980240956337933511e-01,-3.634167222606450587e-02,3.907652894547419126e-02,4.398705107903873513e-05,4.014294153755993625e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.978902203371300406e-01,-3.881594170390346987e-02,4.157878942943630390e-02,5.003401979358865909e-05,4.577750595021980233e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.977519619635988324e-01,-4.137238998905035103e-02,4.414401729483752312e-02,5.667132954231383381e-05,5.198604773628320158e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.976093211204778433e-01,-4.401103862350966983e-02,4.677044843979915462e-02,6.393374587127469534e-05,5.880636448366306497e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.974622984342947918e-01,-4.673189772445730805e-02,4.945627575114016489e-02,7.185664819808252777e-05,6.627746038074531544e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.973108945508237788e-01,-4.953496528103580598e-02,5.219965013999435660e-02,8.047598707080369181e-05,7.443954395257284119e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.971551101350828450e-01,-5.242022596938809148e-02,5.499868178174252553e-02,8.982824057399080373e-05,8.333402534199321884e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.969949458713304180e-01,-5.538765022001540528e-02,5.785144152347981850e-02,9.995036997267215805e-05,9.300351304743627139e-05,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.968304024630633142e-01,-5.843719329740406504e-02,6.075596200935324404e-02,1.108797745994818669e-04,1.034918101573731021e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.966614806330126308e-01,-6.156879422721388273e-02,6.371023917151931448e-02,1.226542460476181065e-04,1.148439100017481073e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.964881811231409703e-01,-6.478237494204601432e-02,6.671223341532400708e-02,1.353119217092943027e-04,1.271059912209631347e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.963105046946395538e-01,-6.807783932561203843e-02,6.975987104507169745e-02,1.488912376911875684e-04,1.403254122313376162e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.961284521279242243e-01,-7.145507211111129520e-02,7.285104572099709852e-02,1.634308811763625889e-04,1.545507050284642282e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.959420242226324493e-01,-7.491393810197208292e-02,7.598362000669420246e-02,1.789697422992080902e-04,1.698315683201930785e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.957512217976196567e-01,-7.845428116555669407e-02,7.915542658579444169e-02,1.955468655357937271e-04,1.862188599836089409e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.955560456909556821e-01,-8.207592309034392186e-02,8.236426985397188316e-02,2.132014006658221827e-04,2.037645887616183120e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.953564967599211055e-01,-8.577866295579579525e-02,8.560792746733915382e-02,2.319725533812533856e-04,2.225219052053675783e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.951525758810034761e-01,-8.956227636818504811e-02,8.888415207032281107e-02,2.518995356229655042e-04,2.425450919338398162e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.949442839498933155e-01,-9.342651399065823870e-02,9.219067241228171006e-02,2.730215156161921856e-04,2.638895530093001438e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.947316218814804545e-01,-9.737110132830022191e-02,9.552519547648959108e-02,2.953775677005182555e-04,2.866118025191999492e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.945145906098497024e-01,-1.013957374318578131e-01,9.888540779287195714e-02,3.190066220755136475e-04,3.107694523657559844e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.942931910882769619e-01,-1.055000943230918337e-01,1.022689769401261828e-01,3.439474143428407371e-04,3.364211991378266678e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.940674242892252321e-01,-1.096838161361816666e-01,1.056735535357281458e-01,3.702384350650712646e-04,3.636268101790013385e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.938372912043398344e-01,-1.139465182837329227e-01,1.090967727844612128e-01,3.979178793506877709e-04,3.924471087114223835e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.936027928444443047e-01,-1.182877869443615254e-01,1.125362561899469760e-01,4.270235964901448827e-04,4.229439581050497307e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.933639302395360637e-01,-1.227071780735831535e-01,1.159896131785476125e-01,4.575930397122510195e-04,4.551802452230696767e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.931207044387818650e-01,-1.272042167891999642e-01,1.194544428253631607e-01,4.896632160889939113e-04,4.892198627830635733e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.928731165105130207e-01,-1.317783968969926112e-01,1.229283358444472479e-01,5.232706367262008945e-04,5.251276908533830884e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.926211675422206282e-01,-1.364291799805502914e-01,1.264088760940936251e-01,5.584512672383153088e-04,5.629695773374848635e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.923648586405513505e-01,-1.411559954026006536e-01,1.298936424203705753e-01,5.952404785394235006e-04,6.028123175807622398e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.921041909313020879e-01,-1.459582389859540030e-01,1.333802103630307434e-01,6.336729980400307742e-04,6.447236329761612183e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.918391655594147593e-01,-1.508352732500017612e-01,1.368661541640279200e-01,6.737828613214641785e-04,6.887721485638292462e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.915697836889719730e-01,-1.557864265573697671e-01,1.403490483366521269e-01,7.156033643362985294e-04,7.350273697909135697e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.912960465031916968e-01,-1.608109926998656791e-01,1.438264695112680680e-01,7.591670161269938825e-04,7.835596581533231519e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.910179552044214857e-01,-1.659082306899832826e-01,1.472959984203584261e-01,8.045054922207073994e-04,8.344402059715324595e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.907355110141340404e-01,-1.710773643945936728e-01,1.507552215886171498e-01,8.516495887259622989e-04,8.877410101750737251e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.904487151729213235e-01,-1.763175821772061946e-01,1.542017329057207009e-01,9.006291770727780028e-04,9.435348451488133423e-04,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.901575689404893410e-01,-1.816280367207549762e-01,1.576331359401051957e-01,9.514731596120402620e-04,1.001895234588507950e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.898620735956522587e-01,-1.870078448854993747e-01,1.610470452065719538e-01,1.004209426055420159e-03,1.062896422471532390e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.895622304363274058e-01,-1.924560875666547766e-01,1.644410883607708984e-01,1.058864810744415035e-03,1.126613343075429444e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.892580407795287245e-01,-1.979718094019875807e-01,1.678129078258102924e-01,1.115465050963754213e-03,1.193121590076127178e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.889495059613619965e-01,-2.035540190047723241e-01,1.711601626720931080e-01,1.174034746176056288e-03,1.262497384701687184e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.886366273370181812e-01,-2.092016891019843061e-01,1.744805303087682835e-01,1.234597318362810328e-03,1.334817543156801081e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.883194062807676428e-01,-2.149137560948944370e-01,1.777717083051130742e-01,1.297174973399350170e-03,1.410159443040083591e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.879978441859542659e-01,-2.206891205593774496e-01,1.810314161389475485e-01,1.361788663636867309e-03,1.488600988937754466e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.876719424649894608e-01,-2.265266470915031360e-01,1.842573968625139946e-01,1.428458051613333904e-03,1.570220577195734126e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.873417025493456123e-01,-2.324251650058182239e-01,1.874474189446555805e-01,1.497201475037026512e-03,1.655097059908537237e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.870071258895497524e-01,-2.383834683021361645e-01,1.905992781423443694e-01,1.568035913096781087e-03,1.743309708225850322e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.866682139551777864e-01,-2.444003159238797018e-01,1.937107986896731138e-01,1.640976954054720362e-03,1.834938174770612629e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.863249682348473879e-01,-2.504744322397058354e-01,1.967798354245550718e-01,1.716038764132165771e-03,1.930062455431996553e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.859773902362116704e-01,-2.566045072075279765e-01,1.998042752656677690e-01,1.793234057970977948e-03,2.028762850373290374e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.856254814859526370e-01,-2.627891975230010613e-01,2.027820388808080077e-01,1.872574070387421049e-03,2.131119924521994446e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.852692435297746298e-01,-2.690271263838397431e-01,2.057110822591804444e-01,1.954068529762595114e-03,2.237214467450008779e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.849086779323972252e-01,-2.753168843664817733e-01,2.085893980686171212e-01,2.037725632742173974e-03,2.347127452480931263e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.845437862775482385e-01,-2.816570304467751473e-01,2.114150178148847259e-01,2.123552020739971711e-03,2.460939995541131398e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.841745701679575076e-01,-2.880460917291755574e-01,2.141860125249895608e-01,2.211552757861768876e-03,2.578733313301258354e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.838010312253490097e-01,-2.944825651791651566e-01,2.169004949282093975e-01,2.301731310499186261e-03,2.700588680858049921e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.834231710904342005e-01,-3.009649178691596871e-01,2.195566201585230914e-01,2.394089528526981148e-03,2.826587389237281058e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.830409914229045754e-01,-3.074915875216546191e-01,2.221525880761713312e-01,2.488627628204762281e-03,2.956810702158182980e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.826544939014246749e-01,-3.140609839837957828e-01,2.246866439068208599e-01,2.585344176892092824e-03,3.091339812706609281e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.822636802236245357e-01,-3.206714897900287409e-01,2.271570798744191322e-01,2.684236079275773446e-03,3.230255799666643685e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.818685521060921406e-01,-3.273214616110995401e-01,2.295622363264834820e-01,2.785298565500161742e-03,3.373639583749555530e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.814691112843658694e-01,-3.340092302250968204e-01,2.319005030440808857e-01,2.888525180913495850e-03,3.521571883456135754e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.810653595129272819e-01,-3.407331022658136588e-01,2.341703205942564092e-01,2.993907777754947763e-03,3.674133170705736005e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.806572985651929031e-01,-3.474913612606537194e-01,2.363701811889792337e-01,3.101436508518272579e-03,3.831403626588281847e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.802449302335066728e-01,-3.542822690283070619e-01,2.384986299514322838e-01,3.211099821129807623e-03,3.993463097101128401e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.798282563291319525e-01,-3.611040661069046531e-01,2.405542660469523752e-01,3.322884455967361865e-03,4.160391048826990457e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.794072786822439758e-01,-3.679549734420433649e-01,2.425357437220989620e-01,3.436775444762498518e-03,4.332266524535395569e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.789819991419210776e-01,-3.748331933761483992e-01,2.444417729841460163e-01,3.552756111236366268e-03,4.509168099095075058e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.785524195761373667e-01,-3.817369109830798246e-01,2.462711205601711972e-01,3.670808073513449898e-03,4.691173835360985984e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.781185418717538438e-01,-3.886642955625760343e-01,2.480226110446334120e-01,3.790911248436659373e-03,4.878361240506193415e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.776803679345105191e-01,-3.956135014366701230e-01,2.496951275995739905e-01,3.913043857818200751e-03,5.070807222417976490e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.772378996890179748e-01,-4.025826701903341132e-01,2.512876123050816202e-01,4.037182436300036301e-03,5.268588046569175863e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.767911390787489267e-01,-4.095699311689335320e-01,2.527990675651267671e-01,4.163301841153165678e-03,5.471779293356328344e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.763400880660293435e-01,-4.165734028771165831e-01,2.542285562660634990e-01,4.291375263997976308e-03,5.680455815438764308e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.758847486320304521e-01,-4.235911951298248490e-01,2.555752022349048747e-01,4.421374244055587856e-03,5.894691695989352602e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.754251227767596344e-01,-4.306214093513427432e-01,2.568381913083817025e-01,4.553268683372937303e-03,6.114560207206808790e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.749612125190515455e-01,-4.376621415584195374e-01,2.580167714003854251e-01,4.687026863873674370e-03,6.340133769545601254e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.744930198965594537e-01,-4.447114826351561412e-01,2.591102527272519107e-01,4.822615465911802776e-03,6.571483911832372987e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.740205469657464699e-01,-4.517675200563890936e-01,2.601180089233641746e-01,4.959999588787148035e-03,6.808681231535170832e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.735437958018759996e-01,-4.588283395016230504e-01,2.610394765758605184e-01,5.099142773005719415e-03,7.051795356165829927e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.730627684990029724e-01,-4.658920261455591905e-01,2.618741557703765932e-01,5.240007023959882340e-03,7.300894905187480140e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.725774671699647378e-01,-4.729566666574972489e-01,2.626216104133639040e-01,5.382552837589853027e-03,7.556047453045303293e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.720878939463715174e-01,-4.800203500736582862e-01,2.632814679061648566e-01,5.526739227416144207e-03,7.817319492901290620e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.715940509785973012e-01,-4.870811696499460774e-01,2.638534198759184823e-01,5.672523753351691421e-03,8.084776401270189067e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.710959404357700775e-01,-4.941372243366404238e-01,2.643372215900573474e-01,5.819862552086398598e-03,8.358482403694609403e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.705935645057625072e-01,-5.011866200484255129e-01,2.647326921240029662e-01,5.968710368854074355e-03,8.638500541394186771e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.700869253951824867e-01,-5.082274715185780556e-01,2.650397143936237820e-01,6.119020590868619845e-03,8.924892638883557411e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.695760253293631559e-01,-5.152579038659700350e-01,2.652582348406193891e-01,6.270745282206197091e-03,9.217719273026057578e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.690608665523531284e-01,-5.222760532651549159e-01,2.653882632436194711e-01,6.423835220051444216e-03,9.517039742754548545e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.685414513269068326e-01,-5.292800696132888838e-01,2.654298726059474700e-01,6.578239932443621929e-03,9.822912040239724363e-03,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.680177819344745194e-01,-5.362681169858526387e-01,2.653831985442586250e-01,6.733907737298169480e-03,1.013539282330076265e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.674898606751921593e-01,-5.432383754198074621e-01,2.652484389464386805e-01,6.890785782677637944e-03,1.045453738877777372e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.669576898678714505e-01,-5.501890422648226808e-01,2.650258536243137453e-01,7.048820088319364754e-03,1.078039964731325982e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.664212718499893828e-01,-5.571183340424930019e-01,2.647157641608571810e-01,7.207955588590159515e-03,1.111303209954987367e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.658806089776785786e-01,-5.640244868867303474e-01,2.643185526248835737e-01,7.368136176424852982e-03,1.145248581350641684e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.653357036257157464e-01,-5.709057589804829025e-01,2.638346615784758487e-01,7.529304748307066170e-03,1.179881040324115386e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.647865581875129104e-01,-5.777604312189714708e-01,2.632645931977857234e-01,7.691403250529520175e-03,1.215205400926326618e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.642331750751055308e-01,-5.845868083927304726e-01,2.626089087570533387e-01,7.854372726343230443e-03,1.251226327979241583e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.636755567191421790e-01,-5.913832205889862914e-01,2.618682275891214273e-01,8.018153364144520179e-03,1.287948335367388071e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.631137055688746562e-01,-5.981480250143095256e-01,2.610432265332075508e-01,8.182684546412419616e-03,1.325375784474308941e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.625476240921463367e-01,-6.048796065098318575e-01,2.601346391111887812e-01,8.347904899750339947e-03,1.363512882792339981e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.619773147753815090e-01,-6.115763784829398864e-01,2.591432546765214950e-01,8.513752345562602269e-03,1.402363682599087370e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.614027801235747184e-01,-6.182367853219902232e-01,2.580699175359055797e-01,8.680164151719498705e-03,1.441932079867616623e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.608240226602797751e-01,-6.248593013696038811e-01,2.569155251835966713e-01,8.847076984438173933e-03,1.482221813257874471e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.602410449275984305e-01,-6.314424345207434452e-01,2.556810287005907290e-01,9.014426961095689875e-03,1.523236463277133704e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.596538494861691637e-01,-6.379847250419435900e-01,2.543674309267998646e-01,9.182149703953042252e-03,1.564979451634458674e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.590624389151560791e-01,-6.444847471572411335e-01,2.529757851820551018e-01,9.350180393827714834e-03,1.607454040620293068e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.584668158122380266e-01,-6.509411113552638906e-01,2.515071947045323664e-01,9.518453824537385921e-03,1.650663332779059647e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.578669827935962777e-01,-6.573524630231112331e-01,2.499628111368637218e-01,9.686904457767563092e-03,1.694610270649303529e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.572629424939035347e-01,-6.637174849251269437e-01,2.483438334147624327e-01,9.855466478140352191e-03,1.739297636640309633e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.566546975663126062e-01,-6.700348970136877913e-01,2.466515069542286054e-01,1.002407384880701913e-02,1.784728053097698122e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.560422506824440836e-01,-6.763034572563005886e-01,2.448871218835259878e-01,1.019266036724367740e-02,1.830903982449505474e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.554256045323753499e-01,-6.825219630261748494e-01,2.430520120385311900e-01,1.036115972103395302e-02,1.877827727551320547e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.548047618246282564e-01,-6.886892511136515349e-01,2.411475536784464524e-01,1.052950554402406515e-02,1.925501432165602558e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.541797252861573542e-01,-6.948041983130824883e-01,2.391751642765901908e-01,1.069763147243584477e-02,1.973927081553765450e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.535504976623380147e-01,-7.008657221557975658e-01,2.371363009638535468e-01,1.086547120115614537e-02,2.023106503221926145e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.529170817169542174e-01,-7.068727809090493697e-01,2.350324590497357147e-01,1.103295853958408644e-02,2.073041367787135242e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.522794802321865593e-01,-7.128243744602505982e-01,2.328651715433825709e-01,1.120002746797059585e-02,2.123733189970538476e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.516376960085998205e-01,-7.187195447699179507e-01,2.306360065492002454e-01,1.136661219331565158e-02,2.175183329752127318e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.509917318651310847e-01,-7.245573755973505614e-01,2.283465668887986466e-01,1.153264720499423690e-02,2.227392993626841830e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.503415906390768608e-01,-7.303369929263821003e-01,2.259984882616382640e-01,1.169806733051625275e-02,2.280363235960793447e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.496872751860808703e-01,-7.360575654395383749e-01,2.235934375558802090e-01,1.186280779058472915e-02,2.334094960500975036e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.490287883801215019e-01,-7.417183038408756701e-01,2.211331116990822576e-01,1.202680425381811094e-02,2.388588921964101414e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.483661331134992656e-01,-7.473184621160452457e-01,2.186192367932850988e-01,1.218999289155088886e-02,2.443845727752841715e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.476993122968236927e-01,-7.528573368323550064e-01,2.160535654559793128e-01,1.235231043183974281e-02,2.499865839813563903e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.470283288590011228e-01,-7.583342670581173506e-01,2.134378766006879702e-01,1.251369421293119059e-02,2.556649576541563176e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.463531857472214925e-01,-7.637486344059704058e-01,2.107739732198016247e-01,1.267408223663003992e-02,2.614197114809329453e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.456738859269452346e-01,-7.690998624851832499e-01,2.080636811615534376e-01,1.283341322056882547e-02,2.672508492069506844e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.449904323818909546e-01,-7.743874169339112212e-01,2.053088473949208070e-01,1.299162665014031749e-02,2.731583608529175589e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.443028281140214419e-01,-7.796108063023531320e-01,2.025113391786268480e-01,1.314866282959417654e-02,2.791422229484294415e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.436110761435310135e-01,-7.847695795125044782e-01,1.996730423259598652e-01,1.330446293297617155e-02,2.852023987685856771e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.429151795088319687e-01,-7.898633271788012555e-01,1.967958593828450786e-01,1.345896905360831688e-02,2.913388385720244375e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.422151412665417114e-01,-7.948916808338183371e-01,1.938817089718965214e-01,1.361212425333781055e-02,2.975514798582207243e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.415109644914688714e-01,-7.998543114172472768e-01,1.909325236561095762e-01,1.376387261079842217e-02,3.038402476182705189e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.408026522765996491e-01,-8.047509299206060396e-01,1.879502487006571576e-01,1.391415926869285502e-02,3.102050545960748085e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.400902077330849371e-01,-8.095812866858166368e-01,1.849368408482036930e-01,1.406293048027285747e-02,3.166458015576633506e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.393736339902262200e-01,-8.143451702802523284e-01,1.818942669409046031e-01,1.421013365515309997e-02,3.231623775629782064e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.386529341954613637e-01,-8.190424073689522855e-01,1.788245020895547766e-01,1.435571740381289853e-02,3.297546602404856364e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.379281115143518477e-01,-8.236728611300078162e-01,1.757295289530690641e-01,1.449963158134393720e-02,3.364225160665129666e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.371991691305681105e-01,-8.282364313272041478e-01,1.726113358850835389e-01,1.464182733027077910e-02,3.431658006427117102e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.364661102458753383e-01,-8.327330531195336061e-01,1.694719155595295623e-01,1.478225712203337823e-02,3.499843589841239444e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.357289380801203649e-01,-8.371626959884609365e-01,1.663132639197675011e-01,1.492087479759987047e-02,3.568780257985249699e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.349876558712167940e-01,-8.415253636381191393e-01,1.631373786515099567e-01,1.505763560701676301e-02,3.638466257770826134e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.342422668751307890e-01,-8.458210918488967556e-01,1.599462582424134582e-01,1.519249624786843215e-02,3.708899738765317894e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.334927743658669730e-01,-8.500499490760194954e-01,1.567419002233552960e-01,1.532541490270973583e-02,3.780078756081921287e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.327391816354543286e-01,-8.542120335774673334e-01,1.535262998693244230e-01,1.545635127487803712e-02,3.852001273217017102e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.319814919939308773e-01,-8.583074739833789213e-01,1.503014495495432756e-01,1.558526662362033285e-02,3.924665164866899397e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.312197087693302455e-01,-8.623364269410956906e-01,1.470693368878276397e-01,1.571212379793918146e-02,3.998068219760483877e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.304538353076660107e-01,-8.662990765900545620e-01,1.438319435911406818e-01,1.583688726890250878e-02,4.072208143394896035e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.296838749729178231e-01,-8.701956339062371093e-01,1.405912448825621108e-01,1.595952316111432998e-02,4.147082560828258685e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.289098311470160851e-01,-8.740263348570195268e-01,1.373492074649770023e-01,1.607999928290183150e-02,4.222689019402812299e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.281317072298272963e-01,-8.777914391341986589e-01,1.341077886541066078e-01,1.619828515471903943e-02,4.299024991396966505e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.273495066391393982e-01,-8.814912291191590032e-01,1.308689355452729852e-01,1.631435203685977983e-02,4.376087876625601308e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.265632328106458981e-01,-8.851260091805656094e-01,1.276345837212212730e-01,1.642817295569224192e-02,4.453875005034706180e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.257728891979321029e-01,-8.886961031265123712e-01,1.244066559735897554e-01,1.653972272861762433e-02,4.532383639192222313e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.249784792724585758e-01,-8.922018545428380687e-01,1.211870613920115608e-01,1.664897798747036473e-02,4.611610976710118354e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.241800065235469264e-01,-8.956436241429307499e-01,1.179776941975647364e-01,1.675591720092522996e-02,4.691554152668175881e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.233774744583637117e-01,-8.990217885121185581e-01,1.147804326902030908e-01,1.686052069510858564e-02,4.772210241806926212e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.225708866019060039e-01,-9.023367404959528049e-01,1.115971387476673293e-01,1.696277067353053197e-02,4.853576260818726062e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.217602464969846254e-01,-9.055888854147221556e-01,1.084296557524209864e-01,1.706265123503662370e-02,4.935649170466045732e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.209455577042096053e-01,-9.087786411461096225e-01,1.052798085459994565e-01,1.716014839046691581e-02,5.018425877551770803e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.201268238019744139e-01,-9.119064374212553226e-01,1.021494021364728749e-01,1.725525007825558579e-02,5.101903236938221120e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.193040483864395318e-01,-9.149727121934375651e-01,9.904022040433818619e-02,1.734794617826136004e-02,5.186078053367513402e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.184772350715175726e-01,-9.179779122657683033e-01,9.595402576785028170e-02,1.743822852417923133e-02,5.270947083121833160e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.176463874888565186e-01,-9.209224919648573149e-01,9.289255820731044666e-02,1.752609091516817846e-02,5.356507035771859715e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.168115092878250660e-01,-9.238069105094917077e-01,8.985753361440565468e-02,1.761152912518348240e-02,5.442754575651569809e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.159726041354947501e-01,-9.266316316175654544e-01,8.685064386687744997e-02,1.769454091137046339e-02,5.529686323282874405e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.151296757166257345e-01,-9.293971214663930835e-01,8.387355505072957573e-02,1.777512602078479276e-02,5.617298856634001664e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.142827277336491587e-01,-9.321038483526691287e-01,8.092790769112449822e-02,1.785328619585003704e-02,5.705588712330664225e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.134317639066518169e-01,-9.347522802895964489e-01,7.801531507067285820e-02,1.792902517853192657e-02,5.794552386685448975e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.125767879733592824e-01,-9.373428853395984195e-01,7.513736247738486695e-02,1.800234871253480334e-02,5.884186336674775836e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.117178036891198101e-01,-9.398761284947146910e-01,7.229560655024426519e-02,1.807326454414004413e-02,5.974486980760210925e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.108548148268875710e-01,-9.423524713195026159e-01,6.949157509408326883e-02,1.814178242209628714e-02,6.065450699530872819e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.099878251772063331e-01,-9.447723702628232356e-01,6.672676518059100781e-02,1.820791409564684327e-02,6.157073836250308296e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.091168385481928071e-01,-9.471362763550696950e-01,6.400264307917959139e-02,1.827167331081227739e-02,6.249352697288480807e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.082418587655194386e-01,-9.494446333596853194e-01,6.132064306908165119e-02,1.833307580542653525e-02,6.342283552451463691e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.073628896723984205e-01,-9.516978764493227994e-01,5.868216738137978228e-02,1.839213930264712096e-02,6.435862635117653441e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.064799351295638186e-01,-9.538964317027098794e-01,5.608858485267895211e-02,1.844888350319683068e-02,6.530086142271930572e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.055929990152560283e-01,-9.560407144463983764e-01,5.354123042140210981e-02,1.850333007559680151e-02,6.624950234437766705e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.047020852252026790e-01,-9.581311280318162149e-01,5.104140442801930605e-02,1.855550264525579821e-02,6.720451035385226202e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.038071976726038681e-01,-9.601680642640767482e-01,4.859037192183989634e-02,1.860542678182960236e-02,6.816584631837496744e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.029083402881132869e-01,-9.621519003585464747e-01,4.618936196158443530e-02,1.865312998517569196e-02,6.913347072987983954e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.020055170198213457e-01,-9.640829991350380324e-01,4.383956694029713524e-02,1.869864166965726909e-02,7.010734369821652878e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.010987318332384088e-01,-9.659617086674882058e-01,4.154214189869791934e-02,1.874199314694471874e-02,7.108742494442883697e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.001879887112764766e-01,-9.677883598434191548e-01,3.929820385469326438e-02,1.878321760722705819e-02,7.207367379209278502e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.992732916542326427e-01,-9.695632667487635858e-01,3.710883101883086926e-02,1.882235009873804710e-02,7.306604915680929568e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.983546446797705531e-01,-9.712867259562097955e-01,3.497506271436717079e-02,1.885942750596164216e-02,7.406450953610697885e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.974320518229038646e-01,-9.729590144880099212e-01,3.289789833574051359e-02,1.889448852641065213e-02,7.506901299643177483e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.965055171359771480e-01,-9.745803913309410982e-01,3.087829672907650355e-02,1.892757364548786764e-02,7.607951716031864653e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.955750446886495686e-01,-9.761510947561178320e-01,2.891717524058565192e-02,1.895872510967173963e-02,7.709597919214171424e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.946406385678755679e-01,-9.776713428048777743e-01,2.701540992225622689e-02,1.898798689818516697e-02,7.811835578231968169e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.937023028778884326e-01,-9.791413321579964801e-01,2.517383470863024386e-02,1.901540469377019024e-02,7.914660313073165754e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.927600417401808652e-01,-9.805612390689132374e-01,2.339324020343142019e-02,1.904102585091679151e-02,8.018067692963053161e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.918138592934876652e-01,-9.819312175209049531e-01,2.167437423794014648e-02,1.906489936309481437e-02,8.122053234574404257e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.908637596937675207e-01,-9.832513987832250946e-01,2.001794006133272505e-02,1.908707582832745719e-02,8.226612400037196238e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.899097471141842464e-01,-9.845218919421311332e-01,1.842459670003581199e-02,1.910760741258946127e-02,8.331740594908967901e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.889518257450892413e-01,-9.857427842409902663e-01,1.689495850110650857e-02,1.912654781281240488e-02,8.437433166186754374e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.879899997940023937e-01,-9.869141390153153282e-01,1.542959366481205276e-02,1.914395221716156531e-02,8.543685400128167573e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.870242734855938727e-01,-9.880359972001467872e-01,1.402902487972904760e-02,1.915987726424957932e-02,8.650492520004372798e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.860546510616654770e-01,-9.891083768242918683e-01,1.269372808718257327e-02,1.917438100082930960e-02,8.757849683874766034e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.850811367811323160e-01,-9.901312734254078540e-01,1.142413279401215120e-02,1.918752283794766464e-02,8.865751982298082645e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.841037349200038253e-01,-9.911046590975607362e-01,1.022062084070738189e-02,1.919936350566362263e-02,8.974194435964867600e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.831224497713646704e-01,-9.920284833292259963e-01,9.083526163385974642e-03,1.920996500568134327e-02,9.083171993268154831e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.821372856453569833e-01,-9.929026730872462236e-01,8.013134459657067499e-03,1.921939056275085919e-02,9.192679527882290091e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.811482468691598235e-01,-9.937271336590965953e-01,7.009683223445081941e-03,1.922770457481107545e-02,9.302711836353183550e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.801553377869717476e-01,-9.945017484298215305e-01,6.073360668783227452e-03,1.923497256151892112e-02,9.413263635660089335e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.791585627599906028e-01,-9.952263794867830526e-01,5.204305867327033326e-03,1.924126111099576722e-02,9.524329560780793458e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.781579261663950975e-01,-9.959008670133234764e-01,4.402608256257362247e-03,1.924663782554289493e-02,9.635904162161965103e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.771534324013248174e-01,-9.965250323610743699e-01,3.668307526806494252e-03,1.925117126568560641e-02,9.747981903326070896e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.761450858768620176e-01,-9.970986762475738674e-01,3.001393320079245863e-03,1.925493089310447575e-02,9.860557158516207454e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.751328910220110835e-01,-9.976215804311204804e-01,2.401804844826777984e-03,1.925798701195508347e-02,9.973624210238178633e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.741168522826797682e-01,-9.980935079451213054e-01,1.869430458793765399e-03,1.926041070864043664e-02,1.008717724692521078e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.730969741216594304e-01,-9.985142051766598970e-01,1.404108338983859044e-03,1.926227379061673170e-02,1.020121036065810560e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.720732610186056055e-01,-9.988834010229872273e-01,1.005625534215415836e-03,1.926364872417786825e-02,1.031571754497194876e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.710457174700181326e-01,-9.992008085733767864e-01,6.737179164589050763e-04,1.926460857014493377e-02,1.043069269259340043e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.700143479892213927e-01,-9.994661262498868437e-01,4.080703111188431595e-04,1.926522691888056180e-02,1.054612959339606276e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.689791571063447684e-01,-9.996790388905770763e-01,2.083171086507398177e-04,1.926557782434622570e-02,1.066202193238974666e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.679401493683025492e-01,-9.998392182680491125e-01,7.404146874101561165e-05,1.926573573748130083e-02,1.077836328782434960e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.668973293387736145e-01,-9.999463247469773108e-01,4.774925498419486896e-06,1.926577543699280540e-02,1.089514712934134960e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.658507015981822263e-01,-1.000000007751896458e+00,-1.230616374526293431e-09,1.926577196038540413e-02,1.101236681629405439e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.648002707436770464e-01,-9.999999077576271489e-01,5.914320235482091437e-05,1.926580053380841720e-02,1.113001559609651386e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.637460413891117073e-01,-9.999456578602164969e-01,1.815884074859663530e-04,1.926593650116099898e-02,1.124808660289342704e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.626880181650236068e-01,-9.998368832370252424e-01,3.666635693664287154e-04,1.926625525217290358e-02,1.136657285614096485e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.616262057186147016e-01,-9.996732054601517037e-01,6.136482335037950252e-04,1.926683214940606848e-02,1.148546725949101538e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.605606087137299687e-01,-9.994542406489989039e-01,9.217723086201802573e-04,1.926774245551599735e-02,1.160476259980227909e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.594912318308379762e-01,-9.991796039975705535e-01,1.290216455761201845e-03,1.926906125919281010e-02,1.172445154621612057e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.584180797670095675e-01,-9.988489094520610623e-01,1.718112412977101239e-03,1.927086340100733763e-02,1.184452664967527019e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.573411572358974331e-01,-9.984617699428306459e-01,2.204543109516168050e-03,1.927322339801359682e-02,1.196498034217825385e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.562604689677154601e-01,-9.980178030534325728e-01,2.748544230646353479e-03,1.927621536874077335e-02,1.208580493661883803e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.551760197092181937e-01,-9.975166275062458121e-01,3.349104117573471675e-03,1.927991295804295027e-02,1.220699262666944812e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.540878142236794091e-01,-9.969578684120418144e-01,4.005164644588799297e-03,1.928438926128141911e-02,1.232853548670468369e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.529958572908716841e-01,-9.963411586664225883e-01,4.715622021159760618e-03,1.928971674883107196e-02,1.245042547232717567e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.519001537070450825e-01,-9.956661381670158795e-01,5.479327269517704144e-03,1.929596719025768181e-02,1.257265442077525330e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.508007082849067260e-01,-9.949324585036238844e-01,6.295087738789324414e-03,1.930321157887383496e-02,1.269521405165587868e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.496975258535985898e-01,-9.941397820153069498e-01,7.161667154475618094e-03,1.931152005663006738e-02,1.281809596800629059e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.485906112586771854e-01,-9.932877833057115469e-01,8.077786470814659822e-03,1.932096183810244963e-02,1.294129165722828212e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.474799693620920227e-01,-9.923761547738875199e-01,9.042126604652265046e-03,1.933160513641128675e-02,1.306479249268487630e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.463656050421637378e-01,-9.914046037749569829e-01,1.005332821368983519e-02,1.934351708996057476e-02,1.318858973542559876e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.452475231935638877e-01,-9.903728560071820564e-01,1.110999232348694465e-02,1.935676368813891643e-02,1.331267453591768735e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.441257287272921905e-01,-9.892806572297572210e-01,1.221068279244752648e-02,1.937140969848639288e-02,1.343703793623525489e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.430002265706553199e-01,-9.881277753866988256e-01,1.335392723381806715e-02,1.938751859550820814e-02,1.356167087247312220e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.418710216672458113e-01,-9.869140009626686805e-01,1.453821798470181746e-02,1.940515248951473967e-02,1.368656417740365394e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.407381189769198571e-01,-9.856391500400372152e-01,1.576201420172653953e-02,1.942437205710749459e-02,1.381170858333378992e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.396015234757753243e-01,-9.843030645573324078e-01,1.702374274888314443e-02,1.944523647278042014e-02,1.393709472534334914e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.384612401561303274e-01,-9.829056133799581874e-01,1.832180002918303019e-02,1.946780334166492779e-02,1.406271314449631660e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.373172740265014680e-01,-9.814466966747416032e-01,1.965455340322291508e-02,1.949212863379403901e-02,1.418855429160976733e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.361696301115808527e-01,-9.799262438682587728e-01,2.102034330537992310e-02,1.951826662012428812e-02,1.431460853118929621e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.350183134522153328e-01,-9.783442162967832534e-01,2.241748451391179994e-02,1.954626981030391578e-02,1.444086614538807878e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.338633291053832997e-01,-9.767006098784356549e-01,2.384426805674418406e-02,1.957618889191561559e-02,1.456731733848717347e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.327046821441730362e-01,-9.749954552192263701e-01,2.529896294722294367e-02,1.960807267175321278e-02,1.469395224158006885e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.315423776577602899e-01,-9.732288183442767693e-01,2.677981860506994244e-02,1.964196801935648093e-02,1.482076091736508061e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.303764207513859574e-01,-9.714008029658010024e-01,2.828506632870409973e-02,1.967791981296948198e-02,1.494773336522601526e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.292068165463334362e-01,-9.695115505451069593e-01,2.981292126332967321e-02,1.971597088707952722e-02,1.507485952654664674e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.280335701799065307e-01,-9.675612435888468754e-01,3.136158499236728348e-02,1.975616198307054983e-02,1.520212929028613436e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.268566868054064711e-01,-9.655501033597828631e-01,3.292924674320334838e-02,1.979853170204524948e-02,1.532953249882050284e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.256761715921098199e-01,-9.634783929663753854e-01,3.451408620048031228e-02,1.984311645988331638e-02,1.545705895366239757e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.244920297252450458e-01,-9.613464190908393991e-01,3.611427571237239842e-02,1.988995044609672899e-02,1.558469842195285837e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.233042664059700977e-01,-9.591545282785313153e-01,3.772798148044380351e-02,1.993906558424590400e-02,1.571244064253184414e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.221128868513501997e-01,-9.569031140409796077e-01,3.935336728705522702e-02,1.999049149588177574e-02,1.584027533256428810e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.209178962943334268e-01,-9.545926121359171734e-01,4.098859579289861749e-02,2.004425546812808531e-02,1.596819219436653725e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.197192999837294991e-01,-9.522235032757799811e-01,4.263183077777563806e-02,2.010038242312025061e-02,1.609618092208801310e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.185171031841850242e-01,-9.497963139390591669e-01,4.428124035760573618e-02,2.015889489169354204e-02,1.622423120882191261e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.173113111761618477e-01,-9.473116164960707630e-01,4.593499831640522613e-02,2.021981299035236629e-02,1.635233275386392404e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.161019292559127392e-01,-9.447700272767494134e-01,4.759128617164574970e-02,2.028315440018896645e-02,1.648047526985863265e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.148889627354590770e-01,-9.421722099478916368e-01,4.924829713847155704e-02,2.034893435068201029e-02,1.660864849020126655e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.136724169425666453e-01,-9.395188728846677328e-01,5.090423693715315917e-02,2.041716560708436295e-02,1.673684217657046280e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.124522972207226523e-01,-9.368107716433748955e-01,5.255732653170795687e-02,2.048785845999582730e-02,1.686504612650547852e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.112286089291126379e-01,-9.340487064937004202e-01,5.420580489128593721e-02,2.056102071975195872e-02,1.699325018113280183e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.100013574425958263e-01,-9.312335237389656184e-01,5.584793086136314449e-02,2.063665771407866001e-02,1.712144423283272210e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.087705481516828110e-01,-9.283661133191839498e-01,5.748198533255594045e-02,2.071477228908808962e-02,1.724961823297455621e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.075361864625111297e-01,-9.254474117751082529e-01,5.910627421196944925e-02,2.079536481426553368e-02,1.737776219970832703e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.062982777968216164e-01,-9.224783978098908266e-01,6.071913010208872946e-02,2.087843319163600747e-02,1.750586622589166974e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.050568275919344208e-01,-9.194600930537157879e-01,6.231891439282129935e-02,2.096397286749744279e-02,1.763392048658471434e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.038118413007259155e-01,-9.163935633004959280e-01,6.390402049934268414e-02,2.105197684918722056e-02,1.776191524702839253e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.025633243916036053e-01,-9.132799157394344558e-01,6.547287448031949286e-02,2.114243572501600318e-02,1.788984087046310290e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.013112823484830338e-01,-9.101202967033260594e-01,6.702393796649475100e-02,2.123533768704068819e-02,1.801768782568538285e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.000557206707632485e-01,-9.069158932246959015e-01,6.855571069252136551e-02,2.133066855915519491e-02,1.814544669461152782e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.987966448733025970e-01,-9.036679299141320287e-01,7.006673165946307458e-02,2.142841182809394202e-02,1.827310817989490632e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.975340604863950800e-01,-9.003776694511471668e-01,7.155558156322808228e-02,2.152854867802210526e-02,1.840066311228601426e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.962679730557449265e-01,-8.970464089751061243e-01,7.302088413800121580e-02,2.163105802866392702e-02,1.852810245801951583e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.949983881424437238e-01,-8.936754806443794052e-01,7.446130888225015176e-02,2.173591657710552405e-02,1.865541732580154599e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.937253113229446599e-01,-8.902662493048369985e-01,7.587557205777212266e-02,2.184309884371984228e-02,1.878259897405573364e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.924487481890386542e-01,-8.868201095101287956e-01,7.726243779241777354e-02,2.195257721979942955e-02,1.890963881748408681e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.911687043478299319e-01,-8.833384871895550994e-01,7.862072120733101799e-02,2.206432201976016305e-02,1.903652843390892802e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.898851854217111557e-01,-8.798228342946174951e-01,7.994928889864247323e-02,2.217830153713889335e-02,1.916325957069203445e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.885981970483383341e-01,-8.762746292362256773e-01,8.124705999169899717e-02,2.229448210217636200e-02,1.928982415094899205e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.873077448806070633e-01,-8.726953732056423885e-01,8.251300844079204055e-02,2.241282814334131374e-02,1.941621427951529255e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.860138345866267695e-01,-8.690865905487592524e-01,8.374616387324726463e-02,2.253330225225881611e-02,1.954242224874800649e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.847164718496960623e-01,-8.654498240861682179e-01,8.494561219444442257e-02,2.265586525071740590e-02,1.966844054406917675e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.834156623682784204e-01,-8.617866341095896665e-01,8.611049650977718739e-02,2.278047625979897836e-02,1.979426184903096964e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.821114118559759909e-01,-8.580985975343069505e-01,8.724001897674965589e-02,2.290709277236523123e-02,1.991987905037970463e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.808037260415053860e-01,-8.543873032011726210e-01,8.833344111346977390e-02,2.303567072846439759e-02,2.004528524273023193e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.794926106686721479e-01,-8.506543499026170130e-01,8.939008408767351588e-02,2.316616459207563108e-02,2.017047373262083332e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.781780714963458800e-01,-8.469013478682355656e-01,9.040932918576667643e-02,2.329852742973028898e-02,2.029543804274055729e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.768601142984342678e-01,-8.431299110587157086e-01,9.139061886431298909e-02,2.343271099133075255e-02,2.042017191564737266e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.755387448638585424e-01,-8.393416579237135133e-01,9.233345720161266479e-02,2.356866579363400133e-02,2.054466931681732667e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.742139689965273908e-01,-8.355382099241154270e-01,9.323740916294387393e-02,2.370634120447655258e-02,2.066892443791729927e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.728857925153117536e-01,-8.317211854804420490e-01,9.410210102743117588e-02,2.384568552782879991e-02,2.079293169930510610e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.715542212540194011e-01,-8.278922005947046969e-01,9.492722086335200760e-02,2.398664609106828868e-02,2.091668575212649039e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.702192610613689538e-01,-8.240528666795141621e-01,9.571251780061863779e-02,2.412916933311531897e-02,2.104018148043299841e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.688809178009646805e-01,-8.202047854389188375e-01,9.645780190173385005e-02,2.427320089302944378e-02,2.116341400255946081e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.675391973512698529e-01,-8.163495506091974496e-01,9.716294376110376496e-02,2.441868569952608536e-02,2.128637867230358249e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.661941056055823207e-01,-8.124887411219486877e-01,9.782787391776788521e-02,2.456556806085164607e-02,2.140907107977664814e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.648456484720069781e-01,-8.086239214809168852e-01,9.845258231120913861e-02,2.471379175489065577e-02,2.153148705148317521e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.634938318734313389e-01,-8.047566392519180933e-01,9.903711785069856111e-02,2.486330011986896682e-02,2.165362265054808499e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.621386617474984471e-01,-8.008884218715165426e-01,9.958158640516863447e-02,2.501403614424184407e-02,2.177547417612275826e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.607801440465815634e-01,-7.970207761277511294e-01,1.000861508018400098e-01,2.516594255574763558e-02,2.189703816277078219e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.594182847377570766e-01,-7.931551835736769807e-01,1.005510299196201440e-01,2.531896191166226745e-02,2.201831137916583914e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.580530898027795228e-01,-7.892931007945316324e-01,1.009764965521968982e-01,2.547303668701360671e-02,2.213929082658256109e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.566845652380541631e-01,-7.854359564505605240e-01,1.013628769738697671e-01,2.562810936220611194e-02,2.225997373699689919e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.553127170546113378e-01,-7.815851493738897871e-01,1.017105488522143941e-01,2.578412250972434219e-02,2.238035757091573485e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.539375512780798205e-01,-7.777420445608128619e-01,1.020199400818929680e-01,2.594101887901249098e-02,2.250044001444983233e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.525590739486605063e-01,-7.739079757259634818e-01,1.022915273627505173e-01,2.609874148062464941e-02,2.262021897643887169e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.511772911210995440e-01,-7.700842392442609485e-01,1.025258341392472627e-01,2.625723366839950723e-02,2.273969258510675218e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.497922088646621352e-01,-7.662720957735662530e-01,1.027234289065677014e-01,2.641643921948337928e-02,2.285885918432162822e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.484038332631057777e-01,-7.624727664070709565e-01,1.028849238879466776e-01,2.657630241306029040e-02,2.297771732963149183e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.470121704146531760e-01,-7.586874328005167589e-01,1.030109719396845730e-01,2.673676810637555870e-02,2.309626578392406926e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.456172264319661513e-01,-7.549172328720453740e-01,1.031022659111988232e-01,2.689778180808429647e-02,2.321450351264854606e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.442190074421178858e-01,-7.511632643476540139e-01,1.031595360491185664e-01,2.705928975093609828e-02,2.333242967888643216e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.428175195865669433e-01,-7.474265803896148475e-01,1.031835471393047071e-01,2.722123895931887610e-02,2.345004363839531536e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.414127690211299582e-01,-7.437081889674573842e-01,1.031750981398528860e-01,2.738357731618707228e-02,2.356734493400854669e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.400047619159539902e-01,-7.400090505381236028e-01,1.031350180291184387e-01,2.754625362636280161e-02,2.368433328980169283e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.385935044554904350e-01,-7.363300810283561759e-01,1.030641648169126900e-01,2.770921767628778051e-02,2.380100860516933814e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.371790028384670457e-01,-7.326721467078289862e-01,1.029634225282525378e-01,2.787242029142705108e-02,2.391737094878755021e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.357612632778612882e-01,-7.290360660457668729e-01,1.028336995787676456e-01,2.803581339002247236e-02,2.403342055193024018e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.343402920008724744e-01,-7.254226097289149244e-01,1.026759261343370283e-01,2.819935003427101114e-02,2.414915780219940578e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.329160952488951164e-01,-7.218324977944733378e-01,1.024910519449426516e-01,2.836298447769454098e-02,2.426458323665131034e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.314886792774907276e-01,-7.182664017765537645e-01,1.022800436210659081e-01,2.852667220912395371e-02,2.437969753488551106e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.300580503563610657e-01,-7.147249436527243205e-01,1.020438826718157582e-01,2.869036999302273724e-02,2.449450151218128813e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.286242147693199334e-01,-7.112086940146365999e-01,1.017835627938658005e-01,2.885403590643787972e-02,2.460899611216629812e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.271871788142661996e-01,-7.077181757243921068e-01,1.015000883610661236e-01,2.901762937252176308e-02,2.472318239964537701e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.257469488031560445e-01,-7.042538611839558316e-01,1.011944713885734731e-01,2.918111119097716380e-02,2.483706155339817090e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.243035310619742040e-01,-7.008161752309324255e-01,1.008677292524104291e-01,2.934444356362753831e-02,2.495063485885781751e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.228569319307079910e-01,-6.974054912174872944e-01,1.005208828961727946e-01,2.950759011730741052e-02,2.506390370068804496e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.214071577633178745e-01,-6.940221363300544954e-01,1.001549540967373170e-01,2.967051592289337777e-02,2.517686955521087344e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.199542149277103897e-01,-6.906663885779597001e-01,9.977096342598275325e-02,2.983318751016401579e-02,2.528953398323914858e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.184981098057098281e-01,-6.873384797746127095e-01,9.936992917308079543e-02,2.999557288050028539e-02,2.540189862241988838e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.170388487930305921e-01,-6.840385950889971145e-01,9.895286367963083973e-02,3.015764151548222180e-02,2.551396518013094861e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.155764382992484407e-01,-6.807668747416311117e-01,9.852077235893005880e-02,3.031936438109781098e-02,2.562573542595741882e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.141108847477731780e-01,-6.775234143385543417e-01,9.807465100981893613e-02,3.048071392953570144e-02,2.573721118470642111e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.126421945758196763e-01,-6.743082652802415700e-01,9.761548480785159354e-02,3.064166409674248109e-02,2.584839432891409272e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.111703742343798984e-01,-6.711214386716500835e-01,9.714424602301696687e-02,3.080219029828750940e-02,2.595928677211855606e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.096954301881950311e-01,-6.679629017991975948e-01,9.666189206931867506e-02,3.096226941996182633e-02,2.606989046169030932e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.082173689157260643e-01,-6.648325852800189839e-01,9.616936489954823475e-02,3.112187980737659426e-02,2.618020737209040272e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.067361969091264795e-01,-6.617303807954519890e-01,9.566758783333274840e-02,3.128100125112018293e-02,2.629023949851016506e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.052519206742123847e-01,-6.586561415619610615e-01,9.515746533656986583e-02,3.143961496903825703e-02,2.639998885011133889e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.037645467304356472e-01,-6.556096868737077976e-01,9.463988101850256962e-02,3.159770358648075761e-02,2.650945744373270907e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.022740816108536954e-01,-6.525908007624051654e-01,9.411569635286827495e-02,3.175525111300243897e-02,2.661864729786229677e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.007805318621018742e-01,-6.495992359440913022e-01,9.358575000141730604e-02,3.191224291726099671e-02,2.672756042654131914e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.992839040443636911e-01,-6.466347147015658603e-01,9.305085609297154836e-02,3.206866569956517976e-02,2.683619883396898831e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.977842047313435048e-01,-6.436969292590907221e-01,9.251180319849894307e-02,3.222450746116162223e-02,2.694456450875556963e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.962814405102357718e-01,-6.407855458441260588e-01,9.196935407697527654e-02,3.237975747247739261e-02,2.705265941887610714e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.947756179816976241e-01,-6.379002030050242578e-01,9.142424418208458525e-02,3.253440623919649083e-02,2.716048550648288429e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.932667437598194482e-01,-6.350405166411439151e-01,9.087718095306353272e-02,3.268844546582369559e-02,2.726804468313495655e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.917548244720952422e-01,-6.322060806119566001e-01,9.032884373483709983e-02,3.284186801819578838e-02,2.737533882537236396e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.902398667593944159e-01,-6.293964685620763166e-01,8.977988252566837524e-02,3.299466788452961458e-02,2.748236977058508002e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.887218772759319263e-01,-6.266112336426069573e-01,8.923091791397205763e-02,3.314684013415569036e-02,2.758913931276731901e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.872008626892394112e-01,-6.238499140022012446e-01,8.868254113121080973e-02,3.329838087665502877e-02,2.769564919890554200e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.856768296801356577e-01,-6.211120303036034018e-01,8.813531241803232441e-02,3.344928721828452356e-02,2.780190112546183090e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.841497849426976252e-01,-6.183970916155819841e-01,8.758976227901682510e-02,3.359955721770993553e-02,2.790789673513878744e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.826197351842306915e-01,-6.157045939812332280e-01,8.704639125952925993e-02,3.374918984246998849e-02,2.801363761409839115e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.810866871252394539e-01,-6.130340239213566411e-01,8.650566933493640531e-02,3.389818492384057830e-02,2.811912528919476850e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.795506474993976420e-01,-6.103848583994224697e-01,8.596803607274140779e-02,3.404654311063635974e-02,2.822436122563318217e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.780116230535196964e-01,-6.077565689194891485e-01,8.543390149166286973e-02,3.419426582367104256e-02,2.832934682481573763e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.764696205475297930e-01,-6.051486209887717571e-01,8.490364554665842667e-02,3.434135520991904444e-02,2.843408342270264400e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.749246467544329775e-01,-6.025604753473445818e-01,8.437761856809977701e-02,3.448781409583153573e-02,2.853857228796015666e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.733767084602854114e-01,-5.999915905408259587e-01,8.385614214649934894e-02,3.463364594144592579e-02,2.864281462064890427e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.718258124641639517e-01,-5.974414247134796074e-01,8.333950962219778547e-02,3.477885479539689906e-02,2.874681155115534348e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.702719655781371744e-01,-5.949094362146349901e-01,8.282798567443168392e-02,3.492344524912981141e-02,2.885056413945679132e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.687151746272345099e-01,-5.923950859314701667e-01,8.232180818476904360e-02,3.506742239188893884e-02,2.895407337447724605e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.671554464494171555e-01,-5.898978370639275104e-01,8.182118822003446590e-02,3.521079176738046990e-02,2.905734017389757229e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.655927878955469890e-01,-5.874171552737835755e-01,8.132631089347572284e-02,3.535355932991138955e-02,2.916036538357598129e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.640272058293580359e-01,-5.849525157616038973e-01,8.083733586734394294e-02,3.549573140212489153e-02,2.926314977800149042e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.624587071274248284e-01,-5.825033961128966986e-01,8.035439822479899241e-02,3.563731463251913062e-02,2.936569406062093379e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.608872986791325399e-01,-5.800692825951205833e-01,7.987761022298026092e-02,3.577831595590904440e-02,2.946799886384001699e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.593129873866474533e-01,-5.776496724257135229e-01,7.940706083660764580e-02,3.591874255394970905e-02,2.957006475029327341e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.577357801648857638e-01,-5.752440693874317779e-01,7.894281735352540186e-02,3.605860181638990969e-02,2.967189221354770701e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.561556839414840470e-01,-5.728519890934480330e-01,7.848492666952994190e-02,3.619790130455240867e-02,2.977348167899772635e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.545727056567677282e-01,-5.704729566872124291e-01,7.803341585513715040e-02,3.633664871656686751e-02,2.987483350522511238e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.529868522637218842e-01,-5.681065081759942847e-01,7.758829213334605956e-02,3.647485185177969796e-02,2.997594798491797774e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.513981307279594901e-01,-5.657521952299100487e-01,7.714954614971321345e-02,3.661251857849505409e-02,3.007682534677897923e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.498065480276917771e-01,-5.634095794119139411e-01,7.671715108289162144e-02,3.674965680364587067e-02,3.017746575712528245e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.482121111536969238e-01,-5.610782370880019743e-01,7.629106479463611257e-02,3.688627444245855258e-02,3.027786932139809317e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.466148271092899691e-01,-5.587577581571101515e-01,7.587123021592064032e-02,3.702237939133704719e-02,3.037803608623910190e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.450147029102913931e-01,-5.564477459550981608e-01,7.545757621352849887e-02,3.715797950038121633e-02,3.047796604124490272e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.434117455849966971e-01,-5.541478178793545206e-01,7.505001919510093100e-02,3.729308254884872165e-02,3.057765912091858773e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.418059621741456500e-01,-5.518576064957830818e-01,7.464846392792379159e-02,3.742769622207473695e-02,3.067711520675733827e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.401973597308906472e-01,-5.495767596261521559e-01,7.425280458276643791e-02,3.756182809015162771e-02,3.077633412948591607e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.385859453207668457e-01,-5.473049403881424801e-01,7.386292570230212906e-02,3.769548558810988798e-02,3.087531567139001898e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.369717260216599675e-01,-5.450418244064755280e-01,7.347870288546776152e-02,3.782867599748214460e-02,3.097405956827572471e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.353547089237763235e-01,-5.427871057008817335e-01,7.310000432897115208e-02,3.796140642972024859e-02,3.107256551184313653e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.337349011296107282e-01,-5.405404897417875887e-01,7.272669061650106903e-02,3.809368381091868955e-02,3.117083315209384087e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.321123097539158575e-01,-5.383016995321004972e-01,7.235861718030470180e-02,3.822551486767823692e-02,3.126886209929134464e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.304869419236707184e-01,-5.360704723007315708e-01,7.199563475283764225e-02,3.835690611710883269e-02,3.136665192685179071e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.288588047780496737e-01,-5.338465586081656555e-01,7.163758885959183031e-02,3.848786385496966167e-02,3.146420217310955114e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.272279054683905786e-01,-5.316297249027758243e-01,7.128432198901676897e-02,3.861839414631496742e-02,3.156151234397336913e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.255942511581639165e-01,-5.294197493489199990e-01,7.093567402516466613e-02,3.874850281811718228e-02,3.165858191488649109e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.239578490229409358e-01,-5.272164261127109119e-01,7.059148315286857545e-02,3.887819545329602305e-02,3.175541033321496176e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.223187062503623412e-01,-5.250195621016096048e-01,7.025158630978745045e-02,3.900747738567715561e-02,3.185199702052598503e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.206768300401065419e-01,-5.228289774747290064e-01,6.991582004147967044e-02,3.913635369647002621e-02,3.194834137481267211e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.190322276038583427e-01,-5.206445031389658196e-01,6.958402069337726870e-02,3.926482921142183252e-02,3.204444277231790483e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.173849061652770809e-01,-5.184659852398556534e-01,6.925602540798975193e-02,3.939290849917479603e-02,3.214030056979516603e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.157348729599646520e-01,-5.162932799302174525e-01,6.893167313045317490e-02,3.952059587150936332e-02,3.223591410664694012e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.140821352354338680e-01,-5.141262554168125609e-01,6.861080432232199988e-02,3.964789538461128970e-02,3.233128270668736426e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.124267002510773716e-01,-5.119647912264099210e-01,6.829326144439776258e-02,3.977481083965924563e-02,3.242640568019387604e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.107685752781343291e-01,-5.098087772825076192e-01,6.797889059139285151e-02,3.990134578643349250e-02,3.252128232575807187e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.091077675996596774e-01,-5.076581118069304344e-01,6.766754035962162239e-02,4.002750352704449732e-02,3.261591193184805171e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.074442845104918165e-01,-5.055127031481462252e-01,6.735906250355164215e-02,4.015328711891791996e-02,3.271029377835245611e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.057781333172199689e-01,-5.033724696427420708e-01,6.705331310601915507e-02,4.027869937997067412e-02,3.280442713837419810e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.041093213381530935e-01,-5.012373376736231068e-01,6.675015231058489817e-02,4.040374289481950265e-02,3.289831127982991710e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.024378559032870228e-01,-4.991072405325754913e-01,6.644944473791054851e-02,4.052842002057339271e-02,3.299194546666070482e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.007637443542724887e-01,-4.969821207377011496e-01,6.615105987416018685e-02,4.065273289434647580e-02,3.308532896029127146e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.990869940443830366e-01,-4.948619274038358329e-01,6.585487106730457396e-02,4.077668343941298623e-02,3.317846102106196127e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.974076123384820525e-01,-4.927466157312824224e-01,6.556075736898334116e-02,4.090027337248236528e-02,3.327134090930485844e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.957256066129916761e-01,-4.906361472006815494e-01,6.526860303658503504e-02,4.102350421295779953e-02,3.336396788643581357e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.940409842558590503e-01,-4.885304882188235553e-01,6.497829648599093288e-02,4.114637728990370308e-02,3.345634121601558375e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.923537526665245689e-01,-4.864296098998260320e-01,6.468973218584164742e-02,4.126889375006290162e-02,3.354846016447499135e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.906639192558894580e-01,-4.843334897871974798e-01,6.440280977196688461e-02,4.139105456793519544e-02,3.364032400227963127e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.889714914462828022e-01,-4.822421067899030911e-01,6.411743345708999231e-02,4.151286055282038534e-02,3.373193200451841678e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.872764766714295703e-01,-4.801554467492751277e-01,6.383351373200535361e-02,4.163431235852396717e-02,3.382328345165202532e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.855788823764169759e-01,-4.780734968055855849e-01,6.355096557013885372e-02,4.175541049233861407e-02,3.391437763038386843e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.838787160176629465e-01,-4.759962480339235968e-01,6.326970922454290547e-02,4.187615532315891453e-02,3.400521383395362918e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.821759850628824839e-01,-4.739236931172002842e-01,6.298966974624052584e-02,4.199654709013603965e-02,3.409579136281858380e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.804706969910552461e-01,-4.718558286765781040e-01,6.271077771317037919e-02,4.211658591159655324e-02,3.418610952496663469e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.787628592923925730e-01,-4.697926519560427505e-01,6.243296820560564203e-02,4.223627179389408820e-02,3.427616763661271992e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.770524794683048464e-01,-4.677341606245665750e-01,6.215618130578815653e-02,4.235560463950543281e-02,3.436596502193567937e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.753395650313679610e-01,-4.656803567143563138e-01,6.188036120293406023e-02,4.247458425528482379e-02,3.445550101381587016e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.736241235052911280e-01,-4.636312409171929327e-01,6.160545672179083138e-02,4.259321035976049857e-02,3.454477495397222375e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.719061624248831244e-01,-4.615868153327273848e-01,6.133142129405498577e-02,4.271148259178945278e-02,3.463378619299818939e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.701856893360195411e-01,-4.595470821628273583e-01,6.105821232541942117e-02,4.282940051813396987e-02,3.472253409044960271e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.684627117956096987e-01,-4.575120454189410268e-01,6.078579042844726305e-02,4.294696363984598642e-02,3.481101801508945659e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.667372373715630074e-01,-4.554817074830540546e-01,6.051412079179769449e-02,4.306417139893478629e-02,3.489923734487389573e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.650092736427564377e-01,-4.534560700686992574e-01,6.024317223044282216e-02,4.318102318671960399e-02,3.498719146670036806e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.632788281990006585e-01,-4.514351358882814913e-01,5.997291613933652954e-02,4.329751834860348647e-02,3.507487977636906118e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.615459086410068412e-01,-4.494189077083461381e-01,5.970332766310237210e-02,4.341365619031864465e-02,3.516230167888117486e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.598105225803533536e-01,-4.474073852104987203e-01,5.943438472052219729e-02,4.352943598375497158e-02,3.524945658785487712e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.580726776394524524e-01,-4.454005725256106052e-01,5.916606837697329507e-02,4.364485697262252689e-02,3.533634392586545991e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.563323814515162002e-01,-4.433984667523676038e-01,5.889836197821211178e-02,4.375991837730100475e-02,3.542296312417216875e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.545896416605240464e-01,-4.414010681419641147e-01,5.863125179207296506e-02,4.387461939989382997e-02,3.550931362208452069e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.528444659211878554e-01,-4.394083735588171225e-01,5.836472540485384414e-02,4.398895922814825832e-02,3.559539486732065661e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.510968618989195988e-01,-4.374203808132056115e-01,5.809877307512693184e-02,4.410293703891658007e-02,3.568120631528172715e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.493468372697968283e-01,-4.354370869500221453e-01,5.783338688427713814e-02,4.421655200285980886e-02,3.576674742958120823e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.475943997205290348e-01,-4.334584849490663583e-01,5.756856095254390265e-02,4.432980328782831769e-02,3.585201768107385001e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.458395569484247867e-01,-4.314845691534400229e-01,5.730429011734135269e-02,4.444269006177721526e-02,3.593701654788981070e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.440823166613565354e-01,-4.295153318957159461e-01,5.704057144393088336e-02,4.455521149495036970e-02,3.602174351515865514e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.423226865777274197e-01,-4.275507642427991417e-01,5.677740270065723127e-02,4.466736676381678761e-02,3.610619807486978305e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.405606744264380703e-01,-4.255908551374085191e-01,5.651478253659104195e-02,4.477915505117652240e-02,3.619037972521075841e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.387962879468511934e-01,-4.236355968086172319e-01,5.625271165783107818e-02,4.489057555029635910e-02,3.627428797099350732e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.370295348887590414e-01,-4.216849739977034384e-01,5.599119062093656229e-02,4.500162746610861936e-02,3.635792232305640193e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.352604230123479967e-01,-4.197389744722112015e-01,5.573022080541369105e-02,4.511231001605208596e-02,3.644128229765876692e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.334889600881657090e-01,-4.177975847125744435e-01,5.546980448039159212e-02,4.522262243152602029e-02,3.652436741693155797e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.317151538970863456e-01,-4.158607885937666837e-01,5.520994432072243857e-02,4.533256395979444464e-02,3.660717720822338483e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.299390122302762851e-01,-4.139285696433678829e-01,5.495064309561120686e-02,4.544213386360421852e-02,3.668971120384528772e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.281605428891603671e-01,-4.120009111857466100e-01,5.469190420518504903e-02,4.555133142294664345e-02,3.677196894103475500e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.263797536853872527e-01,-4.100777928284069507e-01,5.443373034343575861e-02,4.566015593387513782e-02,3.685394996149223834e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.245966524407953413e-01,-4.081591971122570661e-01,5.417612619009429947e-02,4.576860671068579134e-02,3.693565381115320734e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.228112469873781309e-01,-4.062451048594815095e-01,5.391909470081110434e-02,4.587668308605141049e-02,3.701708004060934032e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.210235451672501350e-01,-4.043354932048924821e-01,5.366263946999479117e-02,4.598438440945731842e-02,3.709822820414297451e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.192335548326121319e-01,-4.024303427235654818e-01,5.340676385752014116e-02,4.609171004810640876e-02,3.717909785988850468e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.174412838457173036e-01,-4.005296310405179194e-01,5.315147182741732784e-02,4.619865938715038423e-02,3.725968856985972244e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.156467400788354860e-01,-3.986333355682882829e-01,5.289676591869962863e-02,4.630523182907556740e-02,3.733999989947173748e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.138499314142201957e-01,-3.967414329163614917e-01,5.264264965227701681e-02,4.641142679260483461e-02,3.742003141751297224e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.120508657440723255e-01,-3.948539001897661271e-01,5.238912576717864866e-02,4.651724371410505959e-02,3.749978269598918668e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.102495509705069487e-01,-3.929707127334657324e-01,5.213619617274797835e-02,4.662268204467931443e-02,3.757925331009411285e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.084459950055177924e-01,-3.910918456605667060e-01,5.188386332801234180e-02,4.672774125068039630e-02,3.765844283775607315e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.066402057709425977e-01,-3.892172748153540640e-01,5.163212916746877379e-02,4.683242081359414999e-02,3.773735085989796678e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.048321911984281485e-01,-3.873469743585175595e-01,5.138099506248661880e-02,4.693672022881861805e-02,3.781597696007463760e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.030219592293964093e-01,-3.854809212636427507e-01,5.113046231526584273e-02,4.704063900510570967e-02,3.789432072471765611e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.012095178150079988e-01,-3.836190893330909124e-01,5.088053153183096783e-02,4.714417666373861560e-02,3.797238174302438596e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.993948749161288836e-01,-3.817614526651256779e-01,5.063120372246188855e-02,4.724733273817201956e-02,3.805015960656292218e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.975780385032944064e-01,-3.799079852224910292e-01,5.038247879525301759e-02,4.735010677364143855e-02,3.812765390926879383e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.957590165566744811e-01,-3.780586635769292991e-01,5.013435670238558517e-02,4.745249832544583268e-02,3.820486424770819234e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.939378170660390643e-01,-3.762134606209793786e-01,4.988683685991725236e-02,4.755450695915177522e-02,3.828179022096801498e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.921144480307220181e-01,-3.743723511531212855e-01,4.963991842666405641e-02,4.765613224907044865e-02,3.835843143023617841e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.902889174595871369e-01,-3.725353099578139759e-01,4.939360027003560083e-02,4.775737377824840896e-02,3.843478747922701166e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.884612333709922316e-01,-3.707023116206288882e-01,4.914788115986466061e-02,4.785823113757894237e-02,3.851085797389904863e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.866314037927538805e-01,-3.688733304158525739e-01,4.890275902297765848e-02,4.795870392534354321e-02,3.858664252253038440e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.847994367621128453e-01,-3.670483413240622772e-01,4.865823184966003301e-02,4.805879174553584487e-02,3.866214073556357711e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.829653403256978783e-01,-3.652273188853800701e-01,4.841429759055439902e-02,4.815849420890241928e-02,3.873735222578482129e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.811291225394909166e-01,-3.634102382231814699e-01,4.817095346151315655e-02,4.825781093114445142e-02,3.881227660806790269e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.792907914687918880e-01,-3.615970754385930963e-01,4.792819692656456743e-02,4.835674153306809137e-02,3.888691349976450828e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.774503551881826846e-01,-3.597878060498483910e-01,4.768602512256923942e-02,4.845528563992256010e-02,3.896126252044235794e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.756078217814919684e-01,-3.579824075233697389e-01,4.744443510506242412e-02,4.855344288158469634e-02,3.903532329225871123e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.737631993417600329e-01,-3.561808529145552016e-01,4.720342284199229022e-02,4.865121289055700932e-02,3.910909543948259404e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.719164959712024987e-01,-3.543831195946279267e-01,4.696298545803551799e-02,4.874859530236066302e-02,3.918257858839901964e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.700677197811755081e-01,-3.525891854268833336e-01,4.672311925145212347e-02,4.884558975607899978e-02,3.925577236803319825e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.682168788921395874e-01,-3.507990254145236242e-01,4.648382047223853508e-02,4.894219589284833083e-02,3.932867640965979072e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.663639814336241196e-01,-3.490126169841267778e-01,4.624508514132854436e-02,4.903841335604523094e-02,3.940129034670264430e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.645090355441917063e-01,-3.472299378783992863e-01,4.600690911201409800e-02,4.913424179028345190e-02,3.947361381518706969e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.626520493714022519e-01,-3.454509646362655118e-01,4.576928881600580978e-02,4.922968084228003527e-02,3.954564645338288775e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.607930310717773814e-01,-3.436756764411154075e-01,4.553221976273404831e-02,4.932473016025289148e-02,3.961738790208024219e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.589319888107645240e-01,-3.419040502789995606e-01,4.529569830530461721e-02,4.941938939349722199e-02,3.968883780457603261e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.570689307627009978e-01,-3.401360640734931162e-01,4.505971998366299636e-02,4.951365819318564315e-02,3.975999580642168296e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.552038651107780942e-01,-3.383716971298446063e-01,4.482428028424298438e-02,4.960753621015020348e-02,3.983086155572526099e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.533368000470053838e-01,-3.366109281021211208e-01,4.458937552328430531e-02,4.970102309685948960e-02,3.990143470321336761e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.514677437721743569e-01,-3.348537356108577945e-01,4.435500124051357240e-02,4.979411850669099998e-02,3.997171490201572031e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.495967044958226744e-01,-3.331000985429223848e-01,4.412115296859285551e-02,4.988682209342246848e-02,4.004170180781885247e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.477236904361980296e-01,-3.313499952548600858e-01,4.388782651476653279e-02,4.997913351086186867e-02,4.011139507859103892e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.458487098202219556e-01,-3.296034077545161867e-01,4.365501837048674227e-02,5.007105241482045360e-02,4.018079437510966856e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.439717708834537979e-01,-3.278603139601054539e-01,4.342272339346896215e-02,5.016257846117637420e-02,4.024989936078507413e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.420928818700546881e-01,-3.261206947999337746e-01,4.319093797212356783e-02,5.025371130582704376e-02,4.031870970143970889e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.402120510327509062e-01,-3.243845293945762931e-01,4.295965786149214677e-02,5.034445060618272794e-02,4.038722506558002356e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.383292866327981319e-01,-3.226517990880932718e-01,4.272887881068129085e-02,5.043479601966745912e-02,4.045544512419585459e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.364445969399446956e-01,-3.209224841538562689e-01,4.249859692872211270e-02,5.052474720448318218e-02,4.052336955111563999e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.345579902323956079e-01,-3.191965653943290571e-01,4.226880839800947753e-02,5.061430382004499656e-02,4.059099802263360091e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.326694747967760879e-01,-3.174740238041954266e-01,4.203950872417175355e-02,5.070346552607175178e-02,4.065833021783991086e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.307790589280949822e-01,-3.157548394664520530e-01,4.181069452931148595e-02,5.079223198290432062e-02,4.072536581826661783e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.288867509297087377e-01,-3.140389940701710514e-01,4.158236196473711482e-02,5.088060285276760353e-02,4.079210450810824007e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.269925591132845422e-01,-3.123264688059790295e-01,4.135450714455513044e-02,5.096857779832794644e-02,4.085854597422459711e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.250964917987639091e-01,-3.106172450456022305e-01,4.112712630985529416e-02,5.105615648348862945e-02,4.092468990613829516e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.231985573143263735e-01,-3.089113033505569983e-01,4.090021560611258739e-02,5.114333857272319872e-02,4.099053599588311503e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.212987639963524100e-01,-3.072086261593881540e-01,4.067377186144514184e-02,5.123012373218313598e-02,4.105608393807438916e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.193971201893874623e-01,-3.055091968050932527e-01,4.044779131467417937e-02,5.131651162956400031e-02,4.112133343030896504e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.174936342461046945e-01,-3.038129950253450895e-01,4.022227047891458601e-02,5.140250193355578451e-02,4.118628417276518072e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.155883145272687984e-01,-3.021200031032469369e-01,3.999720587118586745e-02,5.148809431438212958e-02,4.125093586792796252e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.136811694016988561e-01,-3.004302047598121361e-01,3.977259419857349237e-02,5.157328844363685133e-02,4.131528822126334100e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.117722072462318139e-01,-2.987435816134337285e-01,3.954843244165851673e-02,5.165808399514321136e-02,4.137934094095662152e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.098614364456857340e-01,-2.970601142628586655e-01,3.932471691291709870e-02,5.174248064420303855e-02,4.144309373756757120e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.079488653928228237e-01,-2.953797860296122901e-01,3.910144453815816296e-02,5.182647806738070045e-02,4.150654632411227007e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.060345024883124099e-01,-2.937025796173415859e-01,3.887861215389811637e-02,5.191007594348912707e-02,4.156969841653598285e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.041183561406945790e-01,-2.920284753699232683e-01,3.865621647056965982e-02,5.199327395276098079e-02,4.163254973301040995e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.022004347663427071e-01,-2.903574580871810529e-01,3.843425469493789298e-02,5.207607177764341649e-02,4.169509999435803693e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.002807467894267113e-01,-2.886895096368481961e-01,3.821272337754814258e-02,5.215846910245060253e-02,4.175734892421805733e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.983593006418760796e-01,-2.870246129904621690e-01,3.799161976357332687e-02,5.224046561300459135e-02,4.181929624861588368e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.964361047633429003e-01,-2.853627520577611265e-01,3.777094138244370403e-02,5.232206099842372921e-02,4.188094169641654596e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.945111676011643920e-01,-2.837039077703729095e-01,3.755068472740191038e-02,5.240325494967859599e-02,4.194228499892125872e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.925844976103263773e-01,-2.820480630705451963e-01,3.733084689941720824e-02,5.248404715904445017e-02,4.200332588969816761e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.906561032534255906e-01,-2.803952013726989101e-01,3.711142515891788923e-02,5.256443732131733243e-02,4.206406410500159732e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.887259930006330966e-01,-2.787453049959661699e-01,3.689241656307880313e-02,5.264442513339075147e-02,4.212449938352126888e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.867941753296565421e-01,-2.770983578407213432e-01,3.667381858300441250e-02,5.272401029453961663e-02,4.218463146648452899e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.848606587257031308e-01,-2.754543426266470862e-01,3.645562830891448808e-02,5.280319250663997183e-02,4.224446009770073118e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.829254516814425413e-01,-2.738132428237172733e-01,3.623784295337748040e-02,5.288197147329540965e-02,4.230398502338418298e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.809885626969692352e-01,-2.721750415760257891e-01,3.602046016860861732e-02,5.296034690113549798e-02,4.236320599236014783e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.790500002797654311e-01,-2.705397200359498378e-01,3.580347678830345104e-02,5.303831849880012023e-02,4.242212275556668244e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.771097729446636349e-01,-2.689072637838904600e-01,3.558689051338531639e-02,5.311588597713320231e-02,4.248073506641251162e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.751678892138092802e-01,-2.672776550727771760e-01,3.537069843175431189e-02,5.319304904963925024e-02,4.253904268100536235e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.732243576166232035e-01,-2.656508778067487309e-01,3.515489814546209069e-02,5.326980743185445732e-02,4.259704535759036292e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.712791866897643955e-01,-2.640269168191012872e-01,3.493948718681581800e-02,5.334616084243781753e-02,4.265474285728457216e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.693323849770918654e-01,-2.624057515704821264e-01,3.472446260910766469e-02,5.342210900205553170e-02,4.271213494317327997e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.673839610296283364e-01,-2.607873680066410405e-01,3.450982166302115489e-02,5.349765163304009280e-02,4.276922138044007005e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.654339234055213326e-01,-2.591717516147429290e-01,3.429556247396046281e-02,5.357278846075334833e-02,4.282600193737639560e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.634822806700064302e-01,-2.575588830009680752e-01,3.408168223712938066e-02,5.364751921368279136e-02,4.288247638438603682e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.615290413953694548e-01,-2.559487457172385239e-01,3.386817847791396413e-02,5.372184362202286206e-02,4.293864449383007487e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.595742141609086784e-01,-2.543413234357141950e-01,3.365504867178033960e-02,5.379576141889539503e-02,4.299450604046566315e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.576178075528972378e-01,-2.527366004503950880e-01,3.344228983862282401e-02,5.386927233890054928e-02,4.305006080138746860e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.556598301645456095e-01,-2.511345603466804688e-01,3.322989972803354342e-02,5.394237611883241634e-02,4.310530855610543166e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.537002905959633070e-01,-2.495351868780310567e-01,3.301787636058167719e-02,5.401507249873295502e-02,4.316024908632367652e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.517391974541219102e-01,-2.479384640642217641e-01,3.280621699823875470e-02,5.408736122150886727e-02,4.321488217612828309e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.497765593528163741e-01,-2.463443746349964492e-01,3.259491904519199595e-02,5.415924203149822397e-02,4.326920761170457430e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.478123849126281697e-01,-2.447529037279323749e-01,3.238398038354890068e-02,5.423071467604905632e-02,4.332322518150482638e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.458466827608864258e-01,-2.431640345832005168e-01,3.217339821502858410e-02,5.430177890461603679e-02,4.337693467637944167e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.438794615316307368e-01,-2.415777500797698651e-01,3.196317006437204844e-02,5.437243446860173107e-02,4.343033588908270515e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.419107298655730265e-01,-2.399940347836647392e-01,3.175329363829797191e-02,5.444268112168899193e-02,4.348342861462378695e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.399404964100594673e-01,-2.384128720956989189e-01,3.154376673292508260e-02,5.451251862055871145e-02,4.353621265013566943e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.379687698190326217e-01,-2.368342476493640048e-01,3.133458657430254063e-02,5.458194672374643008e-02,4.358868779511614333e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.359955587529934729e-01,-2.352581436610858834e-01,3.112575121446605614e-02,5.465096519224434124e-02,4.364085385118365301e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.340208718789632880e-01,-2.336845462265491613e-01,3.091725812009240085e-02,5.471957378983226261e-02,4.369271062208846534e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.320447178704457047e-01,-2.321134366902992685e-01,3.070910461109397846e-02,5.478777228190175042e-02,4.374425791378676598e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.300671054073883171e-01,-2.305448001669923863e-01,3.050128863401264587e-02,5.485556043600735981e-02,4.379549553390184591e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.280880431761448723e-01,-2.289786208520001587e-01,3.029380810715975522e-02,5.492293802303171990e-02,4.384642329259025306e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.261075398694370242e-01,-2.274148832607233439e-01,3.008666017943820853e-02,5.498990481554315995e-02,4.389704100185874980e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.241256041863160853e-01,-2.258535720879291664e-01,2.987984276813863277e-02,5.505646058818317035e-02,4.394734847619538032e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.221422448321247800e-01,-2.242946698112092130e-01,2.967335334975788538e-02,5.512260511792135831e-02,4.399734553174280416e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.201574705184592196e-01,-2.227381615886918909e-01,2.946718984990668103e-02,5.518833818400794428e-02,4.404703198682196064e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.181712899631304881e-01,-2.211840308052820114e-01,2.926134983371223020e-02,5.525365956809925178e-02,4.409640766161550540e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.161837118901262844e-01,-2.196322631692371186e-01,2.905583119592191482e-02,5.531856905415503262e-02,4.414547237858754136e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.141947450295727307e-01,-2.180828413464480087e-01,2.885063145782363822e-02,5.538306642834930210e-02,4.419422596201603159e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.122043981176959027e-01,-2.165357521674018126e-01,2.864574875429666198e-02,5.544715147934440452e-02,4.424266823841414564e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.102126798967836940e-01,-2.149909770011372567e-01,2.844118020458268406e-02,5.551082399799425054e-02,4.429079903619868031e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.082195991151471803e-01,-2.134485009702106195e-01,2.823692363626470470e-02,5.557408377634905083e-02,4.433861818544097555e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.062251645270823164e-01,-2.119083085123749655e-01,2.803297680988084406e-02,5.563693060926123979e-02,4.438612551831789821e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.042293848928312450e-01,-2.103703850418109089e-01,2.782933741704336078e-02,5.569936429334960970e-02,4.443332086910218304e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.022322689785442162e-01,-2.088347132720558275e-01,2.762600348928348146e-02,5.576138462790763278e-02,4.448020407386729103e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.002338255562408409e-01,-2.073012799719763655e-01,2.742297265355748970e-02,5.582299141454128560e-02,4.452677497068658896e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.982340634037716764e-01,-2.057700665582694322e-01,2.722024230485868904e-02,5.588418445637013260e-02,4.457303339956003585e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.962329913047793695e-01,-2.042410588954778616e-01,2.701781046890668753e-02,5.594496355842987906e-02,4.461897920195037615e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.942306180486604639e-01,-2.027142422537059618e-01,2.681567488563320334e-02,5.600532852839856107e-02,4.466461222174009094e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.922269524305266541e-01,-2.011895995412427696e-01,2.661383322897988707e-02,5.606527917567207669e-02,4.470993230438357058e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.902220032511660386e-01,-1.996671153627049000e-01,2.641228334575971420e-02,5.612481531178513394e-02,4.475493929709080665e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.882157793170046500e-01,-1.981467750797328287e-01,2.621102332289485318e-02,5.618393675076923527e-02,4.479963304895746568e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.862082894400676536e-01,-1.966285630879540391e-01,2.601005043339884096e-02,5.624264330864365419e-02,4.484401341109501282e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.841995424379405444e-01,-1.951124632100794121e-01,2.580936282216150096e-02,5.630093480275954326e-02,4.488808023617233967e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.821895471337307892e-01,-1.935984621876825629e-01,2.560895860330727331e-02,5.635881105402898439e-02,4.493183337891250573e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.801783123560285804e-01,-1.920865429480611009e-01,2.540883486289282137e-02,5.641627188436292711e-02,4.497527269593218358e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.781658469388685329e-01,-1.905766878036706247e-01,2.520898975430542130e-02,5.647331711743162203e-02,4.501839804505479159e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.761521597216903823e-01,-1.890688830224739048e-01,2.500942089349093844e-02,5.652994657912091547e-02,4.506120928573885687e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.741372595493007380e-01,-1.875631161920616741e-01,2.481012649380036003e-02,5.658616009737706815e-02,4.510370627987671521e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.721211552718337257e-01,-1.860593684942971615e-01,2.461110428078539317e-02,5.664195750282323416e-02,4.514588889103831604e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.701038557447121846e-01,-1.845576241142625473e-01,2.441235163488455692e-02,5.669733862703271038e-02,4.518775698384284167e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.680853698286093101e-01,-1.830578691791947266e-01,2.421386681090191215e-02,5.675230330364224030e-02,4.522931042493445464e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.660657063894090180e-01,-1.815600886797041624e-01,2.401564756127596428e-02,5.680685136873430546e-02,4.527054908273340228e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.640448742981674202e-01,-1.800642671622897928e-01,2.381769177180428562e-02,5.686098266015291586e-02,4.531147282748982374e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.620228824310735782e-01,-1.785703883700370531e-01,2.361999707961173062e-02,5.691469701754744981e-02,4.535208153080649840e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.599997396694109231e-01,-1.770784385958131812e-01,2.342256172754215993e-02,5.696799428252500436e-02,4.539237506619213658e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.579754548995178420e-01,-1.755884001964949648e-01,2.322538344234151286e-02,5.702087429925957462e-02,4.543235330857878784e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.559500370127490432e-01,-1.741002595899645011e-01,2.302845973825099271e-02,5.707333691293405703e-02,4.547201613451030133e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.539234949054359203e-01,-1.726139980823635511e-01,2.283178825490756944e-02,5.712538197009878194e-02,4.551136342195298279e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.518958374788481391e-01,-1.711296037276920368e-01,2.263536736588295586e-02,5.717700931925706609e-02,4.555039505028627977e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.498670736391540859e-01,-1.696470614896304907e-01,2.243919505838475048e-02,5.722821881199047916e-02,4.558911090119163179e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.478372122973819258e-01,-1.681663548468704528e-01,2.224326896845690502e-02,5.727901030109465441e-02,4.562751085753935509e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.458062623693803850e-01,-1.666874683208811048e-01,2.204758692130679223e-02,5.732938364122118491e-02,4.566559480371873980e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.437742327757797534e-01,-1.652103871010052738e-01,2.185214663216566294e-02,5.737933868823724692e-02,4.570336262559021590e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.417411324419524721e-01,-1.637350954190754659e-01,2.165694621311077886e-02,5.742887530017841413e-02,4.574081421064441488e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.397069702979740813e-01,-1.622615795338143985e-01,2.146198377606614929e-02,5.747799333712829556e-02,4.577794944783317721e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.376717552785839183e-01,-1.607898235842844803e-01,2.126725722311151312e-02,5.752669266168103368e-02,4.581476822799553372e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.356354963231459820e-01,-1.593198097382980127e-01,2.107276393411300006e-02,5.757497313707032882e-02,4.585127044274908137e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.335982023756095483e-01,-1.578515262452400980e-01,2.087850231302905424e-02,5.762283462862888977e-02,4.588745598549832105e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.315598823844699505e-01,-1.563849558053212740e-01,2.068446989814960724e-02,5.767027700350901148e-02,4.592332475125480706e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.295205453027292508e-01,-1.549200848265611385e-01,2.049066490148236663e-02,5.771730013047198654e-02,4.595887663632939879e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.274802000878569097e-01,-1.534568969778068170e-01,2.029708518632964559e-02,5.776390388044456980e-02,4.599411153869753521e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.254388557017504569e-01,-1.519953765383946220e-01,2.010372848844975413e-02,5.781008812579001982e-02,4.602902935737563372e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.233965211106962168e-01,-1.505355089774873534e-01,1.991059254658266234e-02,5.785585274001278644e-02,4.606362999297087257e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.213532052853296739e-01,-1.490772784086391023e-01,1.971767555653507892e-02,5.790119759860391790e-02,4.609791334738552182e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.193089172005963650e-01,-1.476206705884352544e-01,1.952497524902672282e-02,5.794612257877363587e-02,4.613187932400925617e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.172636658357122441e-01,-1.461656687673354438e-01,1.933248967407266011e-02,5.799062755934604463e-02,4.616552782752834649e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.152174601741243254e-01,-1.447122590055159586e-01,1.914021673160931636e-02,5.803471242086216675e-02,4.619885876386500834e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.131703092034713254e-01,-1.432604271180794031e-01,1.894815460959788436e-02,5.807837704591425898e-02,4.623187204071342871e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.111222219155439450e-01,-1.418101558308799970e-01,1.875630077103604756e-02,5.812162131844875873e-02,4.626456756690022210e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.090732073062456231e-01,-1.403614300697661188e-01,1.856465330381697545e-02,5.816444512353410867e-02,4.629694525230498292e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.070232743755528182e-01,-1.389142349350321326e-01,1.837321006269787907e-02,5.820684834814183334e-02,4.632900500824016499e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.049724321274755956e-01,-1.374685558145125330e-01,1.818196908247006471e-02,5.824883088069000847e-02,4.636074674736522239e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.029206895700181590e-01,-1.360243777711538926e-01,1.799092839724995146e-02,5.829039261153351470e-02,4.639217038379672142e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.008680557151390489e-01,-1.345816857603361061e-01,1.780008604755458149e-02,5.833153343280347153e-02,4.642327583288804460e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.988145395787117298e-01,-1.331404641573453451e-01,1.760943963882382821e-02,5.837225323799757892e-02,4.645406301135759364e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.967601501804849828e-01,-1.317006958175198084e-01,1.741898698014578487e-02,5.841255192143000180e-02,4.648453183673164624e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.947048965440432433e-01,-1.302623671121353466e-01,1.722872601188265088e-02,5.845242937893968538e-02,4.651468222784926887e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.926487876967668822e-01,-1.288254619597506700e-01,1.703865493376442025e-02,5.849188550802610770e-02,4.654451410477745688e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.905918326697928489e-01,-1.273899685604782950e-01,1.684877187418076794e-02,5.853092020850012706e-02,4.657402738915559781e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.885340404979746198e-01,-1.259558675252432647e-01,1.665907436681268033e-02,5.856953338116930446e-02,4.660322200382414071e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.864754202198428135e-01,-1.245231457369719991e-01,1.646956064364839614e-02,5.860772492818651369e-02,4.663209787226038072e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.844159808775653053e-01,-1.230917881279543075e-01,1.628022828696635768e-02,5.864549475328109662e-02,4.666065491979681790e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.823557315169075932e-01,-1.216617785568563231e-01,1.609107545546428417e-02,5.868284276105321245e-02,4.668889307247218734e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.802946811871931343e-01,-1.202331026046514206e-01,1.590210025064894492e-02,5.871976885848301936e-02,4.671681225775596302e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.782328389412633773e-01,-1.188057441705780426e-01,1.571330044877443724e-02,5.875627295352636725e-02,4.674441240401654429e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.761702138354382385e-01,-1.173796887671384598e-01,1.552467398081067845e-02,5.879235495569548609e-02,4.677169344095864489e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.741068149294762724e-01,-1.159549202453131200e-01,1.533621878356730694e-02,5.882801477538730089e-02,4.679865529915039346e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.720426512865346202e-01,-1.145314266511411327e-01,1.514793340452116541e-02,5.886325232562934773e-02,4.682529791076426307e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.699777319731296532e-01,-1.131091889205937395e-01,1.495981496759856040e-02,5.889806752027934578e-02,4.685162120894478255e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.679120660590968095e-01,-1.116881933829294710e-01,1.477186158211494550e-02,5.893246027371225326e-02,4.687762512753539945e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.658456626175507931e-01,-1.102684245048640710e-01,1.458407108239008140e-02,5.896643050174991962e-02,4.690330960178776265e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.637785307248458277e-01,-1.088498682818584140e-01,1.439644208179179270e-02,5.899997812221940280e-02,4.692867456796440129e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.617106794605358555e-01,-1.074325088353507290e-01,1.420897188646247714e-02,5.903310305451996837e-02,4.695371996366781708e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.596421179073343188e-01,-1.060163294593241129e-01,1.402165876232334962e-02,5.906580521837908465e-02,4.697844572705692223e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.575728551510746644e-01,-1.046013176087567664e-01,1.383450080589294050e-02,5.909808453611651441e-02,4.700285179762209720e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.555029002806702365e-01,-1.031874557145315147e-01,1.364749527807913518e-02,5.912994093017510944e-02,4.702693811597853379e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.534322623880743364e-01,-1.017747276833983472e-01,1.346064045156428537e-02,5.916137432386921885e-02,4.705070462319538849e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.513609505682403933e-01,-1.003631208316762696e-01,1.327393460538608093e-02,5.919238464274181877e-02,4.707415126167044384e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.492889739190818854e-01,-9.895262000248444823e-02,1.308737563746422902e-02,5.922297181380964393e-02,4.709727797514952630e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.472163415414325383e-01,-9.754320996386572906e-02,1.290096175509167914e-02,5.925313576554522982e-02,4.712008470830252316e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.451430625390061624e-01,-9.613487505149494183e-02,1.271469050271165427e-02,5.928287642765662363e-02,4.714257140680948033e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.430691460183568520e-01,-9.472759965562742301e-02,1.252855985418199064e-02,5.931219373017952018e-02,4.716473801712377512e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.409946010888388224e-01,-9.332136941751681403e-02,1.234256795479539946e-02,5.934108760513322894e-02,4.718658448684597828e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.389194368625665810e-01,-9.191616723244502851e-02,1.215671226379707107e-02,5.936955798497015740e-02,4.720811076433655828e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.368436624543747093e-01,-9.051197964596299772e-02,1.197099133040905108e-02,5.939760480355390365e-02,4.722931679888878187e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.347672869817779229e-01,-8.910879067824366284e-02,1.178540284273728832e-02,5.942522799652708715e-02,4.725020254094040029e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.326903195649310474e-01,-8.770658561681854426e-02,1.159994482249006309e-02,5.945242750010194810e-02,4.727076794174975283e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.306127693265889400e-01,-8.630534820066620982e-02,1.141461536789783841e-02,5.947920325217641302e-02,4.729101295350442435e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.285346453920664100e-01,-8.490506541227128834e-02,1.122941255323259800e-02,5.950555519195306620e-02,4.731093752935008889e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.264559568891981123e-01,-8.350572041249139377e-02,1.104433398579714003e-02,5.953148325970109705e-02,4.733054162362284045e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.243767129482984818e-01,-8.210729820484592711e-02,1.085937761270789302e-02,5.955698739600694819e-02,4.734982519110320642e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.222969227021216548e-01,-8.070978374068409067e-02,1.067454164102685232e-02,5.958206754322754178e-02,4.736878818782257472e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.202165952858213066e-01,-7.931316151393753844e-02,1.048982419885434621e-02,5.960672364507956650e-02,4.738743057039594975e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.181357398369105166e-01,-7.791741684859289363e-02,1.030522291380164997e-02,5.963095564657291264e-02,4.740575229665173751e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.160543654952217590e-01,-7.652253284862645111e-02,1.012073564334158710e-02,5.965476349262582856e-02,4.742375332486066797e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.139724814028665600e-01,-7.512849686628721158e-02,9.936360934148586593e-03,5.967814713050676595e-02,4.744143361450237628e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.118900967041955297e-01,-7.373529192288270917e-02,9.752096335256373039e-03,5.970110650832294308e-02,4.745879312587843457e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.098072205457580330e-01,-7.234290453802724730e-02,9.567940105783732940e-03,5.972364157538648938e-02,4.747583182021369308e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.077238620762621668e-01,-7.095131754981724714e-02,9.383889809724385925e-03,5.974575228173921448e-02,4.749254965953928487e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.056400304465344858e-01,-6.956051594429164819e-02,9.199943670298206186e-03,5.976743857849501490e-02,4.750894660651194257e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.035557348094797714e-01,-6.817048399115202550e-02,9.016099330133571163e-03,5.978870041755176418e-02,4.752502262454190718e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.014709843200409523e-01,-6.678120914771626393e-02,8.832355306031828418e-03,5.980953775228917135e-02,4.754077767819993028e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.938578813515881738e-02,-6.539267301028423018e-02,8.648709067146979901e-03,5.982995053717623385e-02,4.755621173286787018e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.730015541373175647e-02,-6.400486132075781931e-02,8.465158889939521786e-03,5.984993872736596443e-02,4.757132475424907248e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.521409531657563929e-02,-6.261776126938640996e-02,8.281702587381900224e-03,5.986950227931035068e-02,4.758611670950391725e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.312761700638348683e-02,-6.123135495497440683e-02,8.098338654525269117e-03,5.988864115089614221e-02,4.760058756652465473e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.104072964768519793e-02,-5.984562749675664123e-02,7.915064482379484748e-03,5.990735530111510054e-02,4.761473729351760054e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.895344240680749803e-02,-5.846056597034326313e-02,7.731878300019603099e-03,5.992564468925173221e-02,4.762856586008280546e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.686576445183338824e-02,-5.707615090914089362e-02,7.548777777099825899e-03,5.994350927593718875e-02,4.764207323622930979e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.477770495256206629e-02,-5.569236846309981448e-02,7.365760977182808471e-03,5.996094902218439271e-02,4.765525939234598884e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.268927308046866709e-02,-5.430920368820824767e-02,7.182825825226212338e-03,5.997796389028652031e-02,4.766812429988216970e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-8.060047800866380896e-02,-5.292664449565083473e-02,6.999970984560577487e-03,5.999455384406729430e-02,4.768066793141906845e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.851132891185348683e-02,-5.154467307981271118e-02,6.817194107232070824e-03,6.001071884924681632e-02,4.769289026052945801e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.642183496629867401e-02,-5.016327358370398226e-02,6.634493001930581550e-03,6.002645887133928326e-02,4.770479126088049693e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.433200534977503493e-02,-4.878243170036152682e-02,6.451865610122209035e-03,6.004177387703182184e-02,4.771637090715595275e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.224184924153258247e-02,-4.740213239449219546e-02,6.269310039469570134e-03,6.005666383377069456e-02,4.772762917479015377e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.015137582225546009e-02,-4.602236041228577323e-02,6.086824184815839076e-03,6.007112871036798807e-02,4.773856604010554006e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.806059427402150197e-02,-4.464309900914002760e-02,5.904406048185510333e-03,6.008516847618132484e-02,4.774918147988019834e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.596951378026195967e-02,-4.326433353008848209e-02,5.722053626621020211e-03,6.009878310185295253e-02,4.775947547152858408e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.387814352572118715e-02,-4.188604938561812235e-02,5.539765058033714433e-03,6.011197255899879133e-02,4.776944799338673775e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-6.178649269641621478e-02,-4.050823045309633746e-02,5.357537731555265907e-03,6.012473681996895192e-02,4.777909902447747270e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.969457047959648988e-02,-3.913086222449792262e-02,5.175369941914104849e-03,6.013707585707685210e-02,4.778842854447249433e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.760238606370348541e-02,-3.775392975062330514e-02,4.993259968842729025e-03,6.014898964516567775e-02,4.779743653397108827e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.550994863833031562e-02,-3.637741724073631500e-02,4.811205394477899820e-03,6.016047815920838593e-02,4.780612297420187562e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.341726739418143494e-02,-3.500130875535285846e-02,4.629204296987086444e-03,6.017154137522244539e-02,4.781448784697523990e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.132435152303221199e-02,-3.362558935233434659e-02,4.447254393567797587e-03,6.018217926952569574e-02,4.782253113471432449e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.923121021768859379e-02,-3.225024491601384546e-02,4.265354177562559988e-03,6.019239181998582711e-02,4.783025282076290852e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.713785267194667977e-02,-3.087525957128391119e-02,4.083501350679577188e-03,6.020217900557436885e-02,4.783765288927019910e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.504428808055239986e-02,-2.950061637017211899e-02,3.901693885977874313e-03,6.021154080580166240e-02,4.784473132470151713e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.295052563916110233e-02,-2.812630209817643112e-02,3.719929946605185587e-03,6.022047720120710795e-02,4.785148811227473153e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-4.085657454429714869e-02,-2.675230115589279090e-02,3.538207604266534190e-03,6.022898817385696768e-02,4.785792323817930627e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.876244399331353618e-02,-2.537859920661539662e-02,3.356524568471612284e-03,6.023707370615414419e-02,4.786403668925880428e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.666814318435148573e-02,-2.400517929440811049e-02,3.174879053393636109e-03,6.024473378164006687e-02,4.786982845308644374e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.457368131630006447e-02,-2.263202660776864050e-02,2.993268687944358915e-03,6.025196838441280317e-02,4.787529851753242749e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.247906758875574590e-02,-2.125912661386868130e-02,2.811691717030550663e-03,6.025877749948756884e-02,4.788044687157500534e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.038431120198204285e-02,-1.988646170443703065e-02,2.630145744043349901e-03,6.026516111256791791e-02,4.788527350442914887e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.828942135686907111e-02,-1.851401953296162237e-02,2.448629299798411700e-03,6.027111921038853098e-02,4.788977840615179504e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.619440725489314420e-02,-1.714178296834829862e-02,2.267139864685898017e-03,6.027665178096636156e-02,4.789396156767789514e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.409927809807633700e-02,-1.576973700367740802e-02,2.085675595429236241e-03,6.028175881220539795e-02,4.789782298022020046e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-2.200404308894610833e-02,-1.439786737261817412e-02,1.904234631074033183e-03,6.028644029399456844e-02,4.790136263601682542e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.990871143049483677e-02,-1.302615738834221083e-02,1.722814650919555265e-03,6.029069621629656872e-02,4.790458052777324949e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.781329232613943978e-02,-1.165459384057771983e-02,1.541414388233565370e-03,6.029452657088119061e-02,4.790747664899173142e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.571779497968089911e-02,-1.028315901099415182e-02,1.360031189844050897e-03,6.029793135058881398e-02,4.791005099386639321e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.362222859526389211e-02,-8.911838273879051434e-03,1.178663074470144177e-03,6.030091054802686618e-02,4.791230355682438713e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.152660237733631014e-02,-7.540617353338227927e-03,9.973080024681805263e-04,6.030346415711960312e-02,4.791423433338795057e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-9.430925530608870802e-03,-6.169480691597984366e-03,8.159640322160908749e-04,6.030559217234030683e-02,4.791584331963795740e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-7.335207260014662417e-03,-4.798413746078192786e-03,6.346294372857755675e-04,6.030729458985728542e-02,4.791713051251573763e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-5.239456770668723257e-03,-3.427399823227861558e-03,4.533019028824298202e-04,6.030857140647433862e-02,4.791809590941950026e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-3.143683267827608162e-03,-2.056424469099807882e-03,2.719793771797988338e-04,6.030942261957650263e-02,4.791873950839450336e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+-1.047895956848953446e-03,-6.854712333548786032e-04,9.065978101642293576e-05,6.030984822713496979e-02,4.791906130808503694e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+1.047895956848953446e-03,6.854747609913691394e-04,-9.065866544552002890e-05,6.030984822848513283e-02,4.791906130767911165e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.143683267827608162e-03,2.056428449478961765e-03,-2.719785003395683103e-04,6.030942262325027919e-02,4.791873950710415775e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.239456770668723257e-03,3.427403443994493822e-03,-4.533008805037072315e-04,6.030857141229080398e-02,4.791809590721641809e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.335207260014662417e-03,4.798417281660659339e-03,-6.346284481651142144e-04,6.030729459817749799e-02,4.791713050950793251e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.430925530608870802e-03,6.169485654641921994e-03,-8.159633960109273455e-04,6.030559218249947895e-02,4.791584331563709109e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.152660237733631014e-02,7.540622598948598480e-03,-9.973071293973953706e-04,6.030346416895229766e-02,4.791423432815805072e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.362222859526389211e-02,8.911843003398844595e-03,-1.178661748546046013e-03,6.030091056243134379e-02,4.791230355041420363e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.571779497968089911e-02,1.028316228729233943e-02,-1.360029514163022717e-03,6.029793136857455188e-02,4.791005098650062410e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.781329232613943978e-02,1.165459635627093260e-02,-1.541412635191230377e-03,6.029452659290954891e-02,4.790747664098173875e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.990871143049483677e-02,1.302615996569733703e-02,-1.722812738744650737e-03,6.029069624259203453e-02,4.790458051915872928e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.200404308894610833e-02,1.439786981379585079e-02,-1.904232322297100526e-03,6.028644032518190587e-02,4.790136262681881640e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.409927809807633700e-02,1.576973926270058488e-02,-2.085672765948306303e-03,6.028175884945143059e-02,4.789782297045768744e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.619440725489314420e-02,1.714178460696852077e-02,-2.267136706467506348e-03,6.027665182525876464e-02,4.789396155746517558e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.828942135686907111e-02,1.851402190561390534e-02,-2.448626042676379112e-03,6.027111926229975630e-02,4.788977839550367377e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.038431120198204285e-02,1.988646500052378835e-02,-2.630142701762513795e-03,6.026516117184953225e-02,4.788527349308006609e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.247906758875574590e-02,2.125912881813637328e-02,-2.811688582411823317e-03,6.025877756597491225e-02,4.788044685956529545e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.457368131630006447e-02,2.263202883428868870e-02,-2.993265429452636808e-03,6.025196845838190485e-02,4.787529850503589590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.666814318435148573e-02,2.400518130956883628e-02,-3.174875571334434803e-03,6.024473386354666643e-02,4.786982844006500426e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.876244399331353618e-02,2.537860155423338551e-02,-3.356521244286623379e-03,6.023707379608985585e-02,4.786403667577117660e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.085657454429714869e-02,2.675230446225179692e-02,-3.538204224383324509e-03,6.022898827158363472e-02,4.785792322399530785e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.295052563916110233e-02,2.812630480836791555e-02,-3.719926481041102486e-03,6.022047730698060208e-02,4.785148809737831965e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.504428808055239986e-02,2.950061849150538082e-02,-3.901690119280616393e-03,6.021154091998141833e-02,4.784473130922929918e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.713785267194667977e-02,3.087526092030303329e-02,-4.083497294704225601e-03,6.020217912899699514e-02,4.783765287341206207e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.923121021768859379e-02,3.225024762909983050e-02,-4.265350150322039756e-03,6.019239195292341166e-02,4.783025280446712713e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.132435152303221199e-02,3.362559277340006897e-02,-4.447250359840389071e-03,6.018217941184580283e-02,4.782253111763973830e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.341726739418143494e-02,3.500131064575360412e-02,-4.629199951469096727e-03,6.017154152737080874e-02,4.781448782927493757e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.550994863833031562e-02,3.637741917900112754e-02,-4.811201139551473001e-03,6.016047832150963476e-02,4.780612295609048545e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.760238606370348541e-02,3.775393176035016923e-02,-4.993255713898068528e-03,6.014898981738932976e-02,4.779743651537158855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.969457047959648988e-02,3.913086390949568200e-02,-5.175365825669199157e-03,6.013707603916203931e-02,4.778842852544841202e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.178649269641621478e-02,4.050823135640630596e-02,-5.357533553397799200e-03,6.012473701168480344e-02,4.777909900512523067e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.387814352572118715e-02,4.188604961049436498e-02,-5.539760974775389223e-03,6.011197276051709337e-02,4.776944797393812281e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.596951378026195967e-02,4.326433459197555342e-02,-5.722049922363203929e-03,6.009878331247212740e-02,4.775947545193567345e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.806059427402150197e-02,4.464310048664835162e-02,-5.904402781410535671e-03,6.008516869500769159e-02,4.774918145998121588e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.015137582225546009e-02,4.602236281820842878e-02,-6.086821341424686338e-03,6.007112893630821293e-02,4.773856601978069270e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.224184924153258247e-02,4.740213706227016516e-02,-6.269307615891953761e-03,6.005666406589427880e-02,4.772762915364283098e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.433200534977503493e-02,4.878243696327538154e-02,-6.451863321515293077e-03,6.004177411457645164e-02,4.771637088479507849e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.642183496629867401e-02,5.016327897555730719e-02,-6.634490957387815559e-03,6.002645911407106932e-02,4.770479123730140825e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.851132891185348683e-02,5.154467883640012627e-02,-6.817192387139359329e-03,6.001071909629291518e-02,4.769289023562121033e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+8.060047800866380896e-02,5.292665060381950698e-02,-6.999969452851268620e-03,5.999455409496794872e-02,4.768066790515457232e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+8.268927308046866709e-02,5.430920996338164941e-02,-7.182824219572654309e-03,5.997796414468306581e-02,4.766812427211122305e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+8.477770495256206629e-02,5.569237252942919186e-02,-7.365758828565563410e-03,5.996094928101405180e-02,4.765525936336483803e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+8.686576445183338824e-02,5.707615367913236593e-02,-7.548775174824592059e-03,5.994350954025812889e-02,4.764207320645461041e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+8.895344240680749803e-02,5.846056801945608006e-02,-7.731875233204669150e-03,5.992564496032021237e-02,4.762856582978836073e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.104072964768519793e-02,5.984563106063635340e-02,-7.915061422113265494e-03,5.990735557933184879e-02,4.761473726257019479e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.312761700638348683e-02,6.123135831775226162e-02,-8.098335585819540355e-03,5.988864143636272314e-02,4.760058753474242232e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.521409531657563929e-02,6.261776555155813828e-02,-8.281699757242515225e-03,5.986950257157396676e-02,4.758611667686226676e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.730015541373175647e-02,6.400486539448353718e-02,-8.465155550198831189e-03,5.984993902677100686e-02,4.757132472057025163e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.938578813515881738e-02,6.539267549336494345e-02,-8.648705479563796247e-03,5.982995084474065917e-02,4.755621169843527452e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.014709843200409523e-01,6.678121103292931904e-02,-8.832351530160939893e-03,5.980953806846932980e-02,4.754077764327613864e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.035557348094797714e-01,6.817048589812216286e-02,-9.016095544571563203e-03,5.978870074254026556e-02,4.752502258917404854e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.056400304465344858e-01,6.956051677978937475e-02,-9.199939475686775420e-03,5.976743891275260873e-02,4.750894657079391958e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.077238620762621668e-01,7.095131723394508538e-02,-9.383885534661080829e-03,5.974575262597582592e-02,4.749254962379025335e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.098072205457580330e-01,7.234290502458867733e-02,-9.567935802927253833e-03,5.972364192960674667e-02,4.747583178447856156e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.118900967041955297e-01,7.373529335972714427e-02,-9.752092143236504654e-03,5.970110687247916909e-02,4.745879308988799061e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.139724814028665600e-01,7.512849768855255073e-02,-9.936356608610140070e-03,5.967814750453587919e-02,4.744143357823944473e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.160543654952217590e-01,7.652253382835033646e-02,-1.012073146441640724e-02,5.965476387668151165e-02,4.742375328840672810e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.181357398369105166e-01,7.791741785084078942e-02,-1.030521876097525644e-02,5.963095604020924451e-02,4.740575225995360964e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.202165952858213066e-01,7.931316242031656583e-02,-1.048982002198305813e-02,5.960672404855076739e-02,4.738743053347736489e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.222969227021216548e-01,8.070978470654550929e-02,-1.067453780453226676e-02,5.958206795599334943e-02,4.736878815070322268e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.243767129482984818e-01,8.210730087162364244e-02,-1.085937383922006179e-02,5.955698781762158722e-02,4.734982515358166499e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.264559568891981123e-01,8.350572361939204158e-02,-1.104433030182780037e-02,5.953148369000321738e-02,4.733054158537646772e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.285346453920664100e-01,8.490506951728006568e-02,-1.122940921558185194e-02,5.950555563053082364e-02,4.731093749029268158e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.306127693265889400e-01,8.630535268830184792e-02,-1.141461253812714287e-02,5.947920369785541367e-02,4.729101291338755342e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.326903195649310474e-01,8.770658942120304613e-02,-1.159994210815449775e-02,5.945242795221254706e-02,4.727076790070115497e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.347672869817779229e-01,8.910879465888911499e-02,-1.178540019169795235e-02,5.942522845482711008e-02,4.725020249896388913e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.368436624543747093e-01,9.051198335620923296e-02,-1.197098856783196386e-02,5.939760526818574360e-02,4.722931675604810087e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.389194368625665810e-01,9.191617150323443119e-02,-1.215670954310833773e-02,5.936955845592300357e-02,4.720811072053721635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.409946010888388224e-01,9.332137252084969636e-02,-1.234256494580932874e-02,5.934108808277663294e-02,4.718658444219787640e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.430691460183568520e-01,9.472760358246333501e-02,-1.252855719337381615e-02,5.931219421449698131e-02,4.716473797168099225e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.451430625390061624e-01,9.613487868505735112e-02,-1.271468788043745787e-02,5.928287691801660542e-02,4.714257136046221541e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.472163415414325383e-01,9.754321435376631100e-02,-1.290095960130081647e-02,5.925313626163155462e-02,4.712008466106443749e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.492889739190818854e-01,9.895262492008678745e-02,-1.308737385571736439e-02,5.922297231425409370e-02,4.709727792679114788e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.513609505682403933e-01,1.003631250454115470e-01,-1.327393252509790657e-02,5.919238514767105608e-02,4.707415121224636789e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.534322623880743364e-01,1.017747308118250871e-01,-1.346063812147490005e-02,5.916137483387539503e-02,4.705070457291373187e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.555029002806702365e-01,1.031874581038856598e-01,-1.364749246452636421e-02,5.912994144613228226e-02,4.702693806507113883e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.575728551510746644e-01,1.046013199586123682e-01,-1.383449775002701908e-02,5.909808505898361125e-02,4.700285174618417661e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.596421179073343188e-01,1.060163322481383202e-01,-1.402165601595431639e-02,5.906580574801337247e-02,4.697844567500299995e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.617106794605358555e-01,1.074325101813600053e-01,-1.420896898287674764e-02,5.903310359055119472e-02,4.695371991112275434e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.637785307248458277e-01,1.088498707868761906e-01,-1.439643918809653186e-02,5.899997866515128281e-02,4.692867451504336151e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.658456626175507931e-01,1.102684272855219971e-01,-1.458406835881698345e-02,5.896643105100837084e-02,4.690330954817072961e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.679120660590968095e-01,1.116881954729014026e-01,-1.477185839880362783e-02,5.893246082984680834e-02,4.687762507341125540e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.699777319731296532e-01,1.131091913986687209e-01,-1.495981177825260119e-02,5.889806808381281567e-02,4.685162115425001716e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.720426512865346202e-01,1.145314283785666687e-01,-1.514793020815815086e-02,5.886325289660816751e-02,4.682529785561891367e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.741068149294762724e-01,1.159549232084002707e-01,-1.533621608078051396e-02,5.882801535321919434e-02,4.679865524345880878e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.761702138354382385e-01,1.173796911935669485e-01,-1.552467135425936694e-02,5.879235553957372351e-02,4.677169338461998338e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.782328389412633773e-01,1.188057465533184087e-01,-1.571329787967534752e-02,5.875627354347870424e-02,4.674441234714233340e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.802946811871931343e-01,1.202331054181298292e-01,-1.590209791843879769e-02,5.871976945408630133e-02,4.671681220029097470e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.823557315169075932e-01,1.216617817484327846e-01,-1.609107321293050050e-02,5.868284336188336892e-02,4.668889301428590932e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.844159808775653053e-01,1.230917900903400336e-01,-1.628022581030419413e-02,5.864549535951524978e-02,4.666065486101493853e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.864754202198428135e-01,1.245231484245078180e-01,-1.646955806402488134e-02,5.860772554032116916e-02,4.663209781298175982e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.885340404979746198e-01,1.259558701680327519e-01,-1.665907184648451195e-02,5.856953399917031594e-02,4.660322194388382133e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.905918326697928489e-01,1.273899708533593189e-01,-1.684876936213177898e-02,5.853092083230820863e-02,4.657402732867715889e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.926487876967668822e-01,1.288254638299667076e-01,-1.703865243341457295e-02,5.849188613758220939e-02,4.654451404377795698e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.947048965440432433e-01,1.302623676269376585e-01,-1.722872331741932828e-02,5.845243001445341730e-02,4.651468216660459842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.967601501804849828e-01,1.317006971979113039e-01,-1.741898426942718042e-02,5.841255256323682477e-02,4.648453177529163205e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.988145395787117298e-01,1.331404656655899299e-01,-1.760943705989782143e-02,5.837225388587238828e-02,4.645406294955885529e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.008680557151390489e-01,1.345816880349100519e-01,-1.780008370370781898e-02,5.833153408641770371e-02,4.642327577069808586e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.029206895700181590e-01,1.360243817007422973e-01,-1.799092658613661230e-02,5.829039326991071468e-02,4.639217032087088466e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.049724321274755956e-01,1.374685596987106073e-01,-1.818196739685654095e-02,5.824883154299131949e-02,4.636074668352201944e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.070232743755528182e-01,1.389142391263301357e-01,-1.837320834405982775e-02,5.820684901437123138e-02,4.632900494348609066e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.090732073062456231e-01,1.403614338537499118e-01,-1.856465152058454568e-02,5.816444579374483587e-02,4.629694518659491220e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.111222219155439450e-01,1.418101584027957052e-01,-1.875629882861011566e-02,5.812162199296221077e-02,4.626456750048629774e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.131703092034713254e-01,1.432604306848545284e-01,-1.894815281214942693e-02,5.807837772477195126e-02,4.623187197362956802e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.152174601741243254e-01,1.447122635127089940e-01,-1.914021508442551728e-02,5.803471310362891267e-02,4.619885869582952553e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.172636658357122441e-01,1.461656730467849885e-01,-1.933248798246862618e-02,5.799062824590212745e-02,4.616552775846928802e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.193089172005963650e-01,1.476206743062795368e-01,-1.952497334548912444e-02,5.794612326944416358e-02,4.613187925404407808e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.213532052853296739e-01,1.490772823792799184e-01,-1.971767367511259236e-02,5.790119829365454607e-02,4.609791327654353954e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.233965211106962168e-01,1.505355119126157992e-01,-1.991059032179470084e-02,5.785585343960384513e-02,4.606362992130673129e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.254388557017504569e-01,1.519953793096984473e-01,-2.010372579015734518e-02,5.781008883115720809e-02,4.602902928511649616e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.274802000878569097e-01,1.534569008415848368e-01,-2.029708268956399025e-02,5.776390459176871522e-02,4.599411146565559605e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.295205453027292508e-01,1.549200882458502682e-01,-2.049066222071078969e-02,5.771730084762189544e-02,4.595887656244328490e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.315598823844699505e-01,1.563849589075267110e-01,-2.068446684286013443e-02,5.767027772720312306e-02,4.592332467662965656e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.335982023756095483e-01,1.578515295941025909e-01,-2.087849893061297438e-02,5.762283535970885023e-02,4.588745591015492842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.356354963231459820e-01,1.593198130707392246e-01,-2.107276035148326898e-02,5.757497387607745515e-02,4.585127036660529565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.376717552785839183e-01,1.607898254506195390e-01,-2.126725332120013232e-02,5.752669340924164432e-02,4.581476815126965252e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.397069702979740813e-01,1.622615819528470116e-01,-2.146197981427999668e-02,5.747799409367922163e-02,4.577794937065199909e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.417411324419524721e-01,1.637350978520245581e-01,-2.165694200207902292e-02,5.742887606597789352e-02,4.574081413287562348e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.437742327757797534e-01,1.652103891665768209e-01,-2.185214211471383020e-02,5.737933946400335106e-02,4.570336254732791370e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.458062623693803850e-01,1.666874705304931814e-01,-2.204758222964059780e-02,5.732938442748684166e-02,4.566559472496520833e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.478372122973819258e-01,1.681663570057046198e-01,-2.224326418454930857e-02,5.727901109814505232e-02,4.562751077829211299e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.498670736391540859e-01,1.696470640134664365e-01,-2.243919036500047551e-02,5.722821961989455614e-02,4.558911082142249049e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.518958374788481391e-01,1.711296069509967155e-01,-2.263536311998770936e-02,5.717701013731647797e-02,4.555039496986086345e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.539234949054359203e-01,1.726140007734825454e-01,-2.283178395835683966e-02,5.712538279772648442e-02,4.551136334081948287e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.559500370127490432e-01,1.741002611459073479e-01,-2.302845518476683007e-02,5.707333775065390191e-02,4.547201605291559257e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.579754548995178420e-01,1.755884016455049601e-01,-2.322537893398885292e-02,5.702087514731020434e-02,4.543235322664313514e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.599997396694109231e-01,1.770784394560060149e-01,-2.342255751405434086e-02,5.696799514049347146e-02,4.539237498399874005e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.620228824310735782e-01,1.785703902483214700e-01,-2.361999306783127070e-02,5.691469788477998559e-02,4.535208144833859367e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.640448742981674202e-01,1.800642699380346212e-01,-2.381768780869261692e-02,5.686098353642176501e-02,4.531147274446328810e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.660657063894090180e-01,1.815600917557494698e-01,-2.401564373875895997e-02,5.680685225383465958e-02,4.527054899905918472e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.680853698286093101e-01,1.830578731922526681e-01,-2.421386323019787332e-02,5.675230419712483376e-02,4.522931034045885590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.701038557447121846e-01,1.845576285401729633e-01,-2.441234816440078850e-02,5.669733952843732105e-02,4.518775689839423793e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.721211552718337257e-01,1.860593720616441349e-01,-2.461110091150186391e-02,5.664195841201129233e-02,4.514588880466903764e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.741372595493007380e-01,1.875631197849217391e-01,-2.481012344274932668e-02,5.658616101379596725e-02,4.510370619274101101e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.761521597216903823e-01,1.890688870376776576e-01,-2.500941773853820188e-02,5.652994750244640099e-02,4.506120919770955635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.781658469388685329e-01,1.905766906632992974e-01,-2.520898641842213989e-02,5.647331804813801592e-02,4.501839795624483775e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.801783123560285804e-01,1.920865450380199457e-01,-2.540883159248652837e-02,5.641627282252113101e-02,4.497527260657428472e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.821895471337307892e-01,1.935984650753732605e-01,-2.560895539201766755e-02,5.635881199950226045e-02,4.493183328904711282e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.841995424379405444e-01,1.951124678515837318e-01,-2.580935970080866851e-02,5.630093575532454930e-02,4.488808014541809666e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.862082894400676536e-01,1.966285667750598543e-01,-2.601004698313701471e-02,5.624264426852474547e-02,4.484401331938780988e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.882157793170046500e-01,1.981467794609510225e-01,-2.621101967440763597e-02,5.618393771874497650e-02,4.479963295638212384e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.902220032511660386e-01,1.996671197572329193e-01,-2.641227986265418104e-02,5.612481628775087272e-02,4.475493920348277976e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.922269524305266541e-01,2.011896031559197440e-01,-2.661382951655558854e-02,5.606528015964880318e-02,4.470993220989826766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.942306180486604639e-01,2.027142464716726877e-01,-2.681567097833721311e-02,5.600532952097460326e-02,4.466461212639095124e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.962329913047793695e-01,2.042410626743020319e-01,-2.701780629629586816e-02,5.594496455999183049e-02,4.461897910565180148e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+2.982340634037716764e-01,2.057700680602219945e-01,-2.722023773886143166e-02,5.588418546777970675e-02,4.457303330267901598e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.002338255562408409e-01,2.073012815480589355e-01,-2.742296805515838029e-02,5.582299243625220980e-02,4.452677487350752417e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.022322689785442162e-01,2.088347153232777142e-01,-2.762599897870952545e-02,5.576138565984863765e-02,4.448020397626564759e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.042293848928312450e-01,2.103703871487425103e-01,-2.782933306819604727e-02,5.569936533514848603e-02,4.443332077102758459e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.062251645270823164e-01,2.119083095145637974e-01,-2.803297218636766758e-02,5.563693166104280441e-02,4.438612541987747018e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.082195991151471803e-01,2.134485018703365933e-01,-2.823691900299052718e-02,5.557408483855959258e-02,4.433861808682850736e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.102126798967836940e-01,2.149909784497472387e-01,-2.844117569559226344e-02,5.551082507039734742e-02,4.429079893731609485e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.122043981176959027e-01,2.165357553516893407e-01,-2.864574465325376434e-02,5.544715256144733956e-02,4.424266813906134188e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.141947450295727307e-01,2.180828463205686518e-01,-2.885062776522989703e-02,5.538306751903194353e-02,4.419422586170219658e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.161837118901262844e-01,2.196322679155873203e-01,-2.905582741483072468e-02,5.531857015312308112e-02,4.414547227718753075e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.181712899631304881e-01,2.211840347327033718e-01,-2.926134586396791221e-02,5.525366067567348549e-02,4.409640755921143129e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.201574705184592196e-01,2.227381634549834288e-01,-2.946718569108478306e-02,5.518833930065263765e-02,4.404703188379451295e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.221422448321247800e-01,2.242946715658937495e-01,-2.967334899066173190e-02,5.512260624398033354e-02,4.399734542832726691e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.241256041863160853e-01,2.258535729950193471e-01,-2.987983802558838442e-02,5.505646172437274333e-02,4.394734837247531445e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.261075398694370242e-01,2.274148845176577616e-01,-3.008665522962067698e-02,5.498990596248877483e-02,4.389704089792426656e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.280880431761448723e-01,2.289786216750558656e-01,-3.029380284429342499e-02,5.492293918131604946e-02,4.384642318838969377e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.300671054073883171e-01,2.305448006071216249e-01,-3.050128344425365526e-02,5.485556160596154890e-02,4.379549542959070285e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.320447178704457047e-01,2.321134371676186192e-01,-3.070909958222201031e-02,5.478777346313759566e-02,4.374425780936302410e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.340208718789632880e-01,2.336845479050756458e-01,-3.091725327042185029e-02,5.471957498205245324e-02,4.369271051748098156e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.359955587529934729e-01,2.352581473627587683e-01,-3.112574677782346116e-02,5.465096639473596835e-02,4.364085374593722477e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.379687698190326217e-01,2.368342511099247039e-01,-3.133458224315384399e-02,5.458194793583626137e-02,4.358868768907586677e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.399404964100594673e-01,2.384128763964959985e-01,-3.154376231425491672e-02,5.451251984233561332e-02,4.353621254324024914e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.419107298655730265e-01,2.399940375801838111e-01,-3.175328911793701830e-02,5.444268235326474303e-02,4.348342850689054795e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.438794615316307368e-01,2.415777513318667635e-01,-3.196316515661957319e-02,5.437243571059034025e-02,4.343033578094347424e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.458466827608864258e-01,2.431640355833207812e-01,-3.217339344653392247e-02,5.430178015738422459e-02,4.337693456800085778e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.478123849126281697e-01,2.447529062290572932e-01,-3.238397614272162106e-02,5.423071593877825530e-02,4.332322507279112722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.497765593528163741e-01,2.463443786762334053e-01,-3.259491543159840493e-02,5.415924330282332472e-02,4.326920750221330825e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.517391974541219102e-01,2.479384675416138606e-01,-3.280621345778825942e-02,5.408736250060223322e-02,4.321488206582489444e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.537002905959633070e-01,2.495351916171516249e-01,-3.301787296575132286e-02,5.401507378552007632e-02,4.316024897512913405e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.556598301645456095e-01,2.511345647133128578e-01,-3.322989644248092872e-02,5.394237741286669885e-02,4.310530844387363003e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.576178075528972378e-01,2.527366046290835389e-01,-3.344228643901920456e-02,5.386927364028224613e-02,4.305006068824637211e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.595742141609086784e-01,2.543413274864233520e-01,-3.365504534451298907e-02,5.379576272769779482e-02,4.299450592640328139e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.615290413953694548e-01,2.559487496383298089e-01,-3.386817562897619094e-02,5.372184493764461249e-02,4.293864437891308228e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.634822806700064302e-01,2.575588867537079163e-01,-3.408167950818773856e-02,5.364752053524898118e-02,4.288247626861033113e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.654339234055213326e-01,2.591717558325584148e-01,-3.429555963231729898e-02,5.357278978849697865e-02,4.282600182078036277e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.673839610296283364e-01,2.607873740505967097e-01,-3.450981881419831754e-02,5.349765296688435007e-02,4.276922126268978830e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.693323849770918654e-01,2.624057557378959848e-01,-3.472445920728649965e-02,5.342211034273044307e-02,4.271213482425313401e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.712791866897643955e-01,2.640269205218550019e-01,-3.493948376002410650e-02,5.334616219065099058e-02,4.265474273759598534e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.732243576166232035e-01,2.656508839007367673e-01,-3.515489472246697417e-02,5.326980878748070747e-02,4.259704523682206756e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.751678892138092802e-01,2.672776605509011860e-01,-3.537069476714289201e-02,5.319305041293420633e-02,4.253904255891426400e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.771097729446636349e-01,2.689072679467470750e-01,-3.558688654707199001e-02,5.311588734877265711e-02,4.248073494330588118e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.790500002797654311e-01,2.705397237135994670e-01,-3.580347276956746122e-02,5.303831987911117318e-02,4.242212263158762764e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.809885626969692352e-01,2.721750436802548578e-01,-3.602045593280902430e-02,5.296034829041187941e-02,4.236320586776475827e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.829254516814425413e-01,2.738132456890615685e-01,-3.623783875729245235e-02,5.288197287173847932e-02,4.230398489828862130e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.848606587257031308e-01,2.754543453668589215e-01,-3.645562379336526188e-02,5.280319391443789173e-02,4.224445997194619107e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.867941753296565421e-01,2.770983595369576058e-01,-3.667381385851860992e-02,5.272401171242688256e-02,4.218463134026673722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.887259930006330966e-01,2.787453054600792512e-01,-3.689241155115186022e-02,5.264442656170708473e-02,4.212449925704095932e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.906561032534255906e-01,2.803952005184612850e-01,-3.711141968689948140e-02,5.256443876102952911e-02,4.206406397860112389e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.925844976103263773e-01,2.820480625004357367e-01,-3.733084123853901842e-02,5.248404861078097977e-02,4.200332576344978364e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.945111676011643920e-01,2.837039071700639403e-01,-3.755067907888199469e-02,5.240325641371972737e-02,4.194228487281337903e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.964361047633429003e-01,2.853627527633899597e-01,-3.777093617589073055e-02,5.232206247416559819e-02,4.188094157030751719e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+3.983593006418760796e-01,2.870246142047925830e-01,-3.799161474975294023e-02,5.224046709971414543e-02,4.181929612227784365e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.002807467894267113e-01,2.886895107290988194e-01,-3.821271838687029243e-02,5.215847059987341328e-02,4.175734879762498797e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.022004347663427071e-01,2.903574590126544175e-01,-3.843424958853332990e-02,5.207607328597316926e-02,4.169509986756030351e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.041183561406945790e-01,2.920284763996272570e-01,-3.865621171308986775e-02,5.199327547171704589e-02,4.163254960598282706e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.060345024883124099e-01,2.937025792878419361e-01,-3.887860736517938232e-02,5.191007747260024952e-02,4.156969828943128942e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.079488653928228237e-01,2.953797866139650785e-01,-3.910143993469841761e-02,5.182647960668260861e-02,4.150654619703254555e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.098614364456857340e-01,2.970601156396797116e-01,-3.932471260791051137e-02,5.174248219296453954e-02,4.144309361023295057e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.117722072462318139e-01,2.987435831191847524e-01,-3.954842830088801942e-02,5.165808555300734767e-02,4.137934081334503911e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.136811694016988561e-01,3.004302080854056589e-01,-3.977259045749894500e-02,5.157329000989293449e-02,4.131528809314354289e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.155883145272687984e-01,3.021200072973992401e-01,-3.999720212632235788e-02,5.148809588857734126e-02,4.125093573897542498e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.174936342461046945e-01,3.038129991885418213e-01,-4.022226676820302854e-02,5.140250351575156862e-02,4.118628404292204448e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.193971201893874623e-01,3.055092014432720382e-01,-4.044778778045935580e-02,5.131651321948338396e-02,4.112133329953633898e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.212987639963524100e-01,3.072086309592993447e-01,-4.067376845019737147e-02,5.123012532948320596e-02,4.105608380627682741e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.231985573143263735e-01,3.089113067592595963e-01,-4.090021215871862681e-02,5.114334017727762555e-02,4.099053586318297526e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.250964917987639091e-01,3.106172470277812625e-01,-4.112712271576896700e-02,5.105615809555211215e-02,4.092468977290040777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.269925591132845422e-01,3.123264711964581219e-01,-4.135450369551391808e-02,5.096857941790970681e-02,4.085854584052620586e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.288867509297087377e-01,3.140389956520988624e-01,-4.158235857898116145e-02,5.088060447956009591e-02,4.079210437397080002e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.307790589280949822e-01,3.157548410056558730e-01,-4.181069120346580376e-02,5.079223361682413257e-02,4.072536568381985855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.326694747967760879e-01,3.174740249247599566e-01,-4.203950515987493058e-02,5.070346716718149294e-02,4.065833008309717722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.345579902323956079e-01,3.191965674283254728e-01,-4.226880448887284508e-02,5.061430546914671286e-02,4.059099788760023308e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.364445969399446956e-01,3.209224868507172812e-01,-4.249859326307561108e-02,5.052474886166578860e-02,4.052336941553946748e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.383292866327981319e-01,3.226518018969287138e-01,-4.272887559021926185e-02,5.043479768411283926e-02,4.045544498806267208e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.402120510327509062e-01,3.243845318425761515e-01,-4.295965480911387302e-02,5.034445227717728594e-02,4.038722492884824766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.420928818700546881e-01,3.261206968463702949e-01,-4.319093526355546886e-02,5.025371298300580075e-02,4.031870956429196018e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.439717708834537979e-01,3.278603171287119977e-01,-4.342272108265751024e-02,5.016258014349757322e-02,4.024989922305000078e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.458487098202219556e-01,3.296034095957653953e-01,-4.365501589907369584e-02,5.007105410220858560e-02,4.018079423684801643e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.477236904361980296e-01,3.313499975415669652e-01,-4.388782441407201862e-02,4.997913520307821489e-02,4.011139493991481286e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.495967044958226744e-01,3.331001007159369309e-01,-4.412115078919840588e-02,4.988682379004473477e-02,4.004170166867154212e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.514677437721743569e-01,3.348537393648177707e-01,-4.435499921106216686e-02,4.979412020782219422e-02,3.997171476227737164e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.533368000470053838e-01,3.366109321286235856e-01,-4.458937363741075077e-02,4.970102480201137041e-02,3.990143456260673571e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.552038651107780942e-01,3.383717008569456230e-01,-4.482427838975283629e-02,4.960753791927513534e-02,3.983086141434062921e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.570689307627009978e-01,3.401360683384228034e-01,-4.505971796398922091e-02,4.951365990634338382e-02,3.975999566418739750e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.589319888107645240e-01,3.419040547236328331e-01,-4.529569629513546108e-02,4.941939111097271714e-02,3.968883766145316350e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.607930310717773814e-01,3.436756826124021114e-01,-4.553221810349163817e-02,4.932473188149956445e-02,3.961738775786116662e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.626520493714022519e-01,3.454509710659170452e-01,-4.576928687764118819e-02,4.922968256718097957e-02,3.954564630780707524e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.645090355441917063e-01,3.472299435497775977e-01,-4.600690688451048588e-02,4.913424351956214436e-02,3.947361366836308894e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.663639814336241196e-01,3.490126219808183938e-01,-4.624508262772696748e-02,4.903841509022083961e-02,3.940129019875292515e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.682168788921395874e-01,3.507990288455532513e-01,-4.648381779215238901e-02,4.894219763247006222e-02,3.932867626083686452e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.700677197811755081e-01,3.525891883757684697e-01,-4.672311637250975425e-02,4.884559150139650835e-02,3.925577221857436405e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.719164959712024987e-01,3.543831229776815972e-01,-4.696298241935658452e-02,4.874859705390467701e-02,3.918257843829056064e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.737631993417600329e-01,3.561808561066139855e-01,-4.720341981155043443e-02,4.865121464827580622e-02,3.910909528866675089e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.756078217814919684e-01,3.579824097525655668e-01,-4.744443163138586939e-02,4.855344464607722066e-02,3.903532314089733224e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.774503551881826846e-01,3.597878087043267503e-01,-4.768602196533974769e-02,4.845528741133026812e-02,3.896126236858876712e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.792907914687918880e-01,3.615970778244534944e-01,-4.792819359315260430e-02,4.835674331102303436e-02,3.888691334737250926e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.811291225394909166e-01,3.634102399754942092e-01,-4.817094983205585934e-02,4.825781271632413605e-02,3.881227645524552572e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.829653403256978783e-01,3.652273192713189065e-01,-4.841429374217899595e-02,4.815849600177261880e-02,3.873735207273574788e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.847994367621128453e-01,3.670483413411999574e-01,-4.865822810674159254e-02,4.805879354626042821e-02,3.866214058249939356e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.866314037927538805e-01,3.688733304047613903e-01,-4.890275513045725525e-02,4.795870573381633140e-02,3.858664236946248716e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.884612333709922316e-01,3.707023128227257547e-01,-4.914787743843732071e-02,4.785823295401177985e-02,3.851085782074317176e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.902889174595871369e-01,3.725353120863876821e-01,-4.939359685389609761e-02,4.775737560186102404e-02,3.843478732569010070e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.921144480307220181e-01,3.743723531330298004e-01,-4.963991481018003443e-02,4.765613407986846184e-02,3.835843127628906779e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.939378170660390643e-01,3.762134621588666961e-01,-4.988683310629434681e-02,4.755450879748951420e-02,3.828179006663476325e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.957590165566744811e-01,3.780586646621101887e-01,-5.013435330861220479e-02,4.745250017120748731e-02,3.820486409314593490e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.975780385032944064e-01,3.799079869102094387e-01,-5.038247574654960287e-02,4.735010862581452706e-02,3.812765375439478577e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+4.993948749161288836e-01,3.817614527341446906e-01,-5.063120056223117255e-02,4.724733459671751762e-02,3.805015945150003742e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.012095178150079988e-01,3.836190893496582710e-01,-5.088052845022020959e-02,4.714417852856186975e-02,3.797238158799053354e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.030219592293964093e-01,3.854809224430219561e-01,-5.113045895685522368e-02,4.704064087648877901e-02,3.789432056957349748e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.048321911984281485e-01,3.873469761868061356e-01,-5.138099173606525821e-02,4.693672210698698888e-02,3.781597680458533839e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.066402057709425977e-01,3.892172752082220244e-01,-5.163212578532108687e-02,4.683242269857417622e-02,3.773735070418522408e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.084459950055177924e-01,3.910918466331282373e-01,-5.188386008406962319e-02,4.672774314235435267e-02,3.765844268193413447e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.102495509705069487e-01,3.929707134835868088e-01,-5.213619288055654916e-02,4.662268394294748208e-02,3.757925315407058542e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.120508657440723255e-01,3.948539012466351639e-01,-5.238912253973882632e-02,4.651724561895854287e-02,3.749978253980901233e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.138499314142201957e-01,3.967414337092276533e-01,-5.264264653579012188e-02,4.641142870388543368e-02,3.742003126111733136e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.156467400788354860e-01,3.986333369864577536e-01,-5.289676311094386463e-02,4.630523374631888289e-02,3.733999974290371782e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.174412838457173036e-01,4.005296334716465512e-01,-5.315146939487135763e-02,4.619866130970461116e-02,3.725968841286108058e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.192335548326121319e-01,4.024303445737815732e-01,-5.340676169230915149e-02,4.609171197515120333e-02,3.717909770246818346e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.210235451672501350e-01,4.043354945256011912e-01,-5.366263702063171770e-02,4.598438634115748630e-02,3.709822804638095439e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.228112469873781309e-01,4.062451052710731592e-01,-5.391909252231467942e-02,4.587668502243939794e-02,3.701707988270983019e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.245966524407953413e-01,4.081591992274264946e-01,-5.417612422131772715e-02,4.576860865121954586e-02,3.693565365301505476e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.263797536853872527e-01,4.100777947711624627e-01,-5.443372864061693517e-02,4.566015787802157888e-02,3.685394980290402356e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.281605428891603671e-01,4.120009128850969926e-01,-5.469190230735999220e-02,4.555133337069193489e-02,3.677196878211600461e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.299390122302762851e-01,4.139285720368154342e-01,-5.495064175001453111e-02,4.544213581470143981e-02,3.668971104451239085e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.317151538970863456e-01,4.158607913493004848e-01,-5.520994333185680336e-02,4.533256591310378530e-02,3.660717704838034048e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.334889600881657090e-01,4.177975879493989186e-01,-5.546980364786075618e-02,4.522262438664244322e-02,3.652436725649137461e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.352604230123479967e-01,4.197389778999250920e-01,-5.573021971155846244e-02,4.511231197299401391e-02,3.644128213654858062e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.370295348887590414e-01,4.216849765859852495e-01,-5.599118934437052192e-02,4.500162942547023676e-02,3.635792216133204580e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.387962879468511934e-01,4.236355993518910457e-01,-5.625271032989208331e-02,4.489057751223574089e-02,3.627428780880520565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.405606744264380703e-01,4.255908585044723691e-01,-5.651478133451430519e-02,4.477915701561959588e-02,3.619037956240178655e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.423226865777274197e-01,4.275507654963051074e-01,-5.677740112727489019e-02,4.466736873091314147e-02,3.610619791158318215e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.440823166613565354e-01,4.295153327877318516e-01,-5.704056999550081997e-02,4.455521346520760484e-02,3.602174335171165476e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.458395569484247867e-01,4.314845705692794686e-01,-5.730428891136405550e-02,4.444269203450911671e-02,3.593701638419984912e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.475943997205290348e-01,4.334584862458090715e-01,-5.756855961156158441e-02,4.432980526310983244e-02,3.585201751711387663e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.493468372697968283e-01,4.354370880295653778e-01,-5.783338550921716253e-02,4.421655398076076993e-02,3.576674726539405547e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.510968618989195988e-01,4.374203824705562016e-01,-5.809877153584919185e-02,4.410293901969621760e-02,3.568120615083006375e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.528444659211878554e-01,4.394083742036398177e-01,-5.836472368901809288e-02,4.398896121208812487e-02,3.559539470260864036e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.545896416605240464e-01,4.414010686857537391e-01,-5.863125030859205750e-02,4.387462138711954729e-02,3.550931345731139777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.563323814515162002e-01,4.433984680397362044e-01,-5.889836104269941669e-02,4.375992036677706781e-02,3.542296295918270221e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.580726776394524524e-01,4.454005734421970231e-01,-5.916606722216993103e-02,4.364485896408865084e-02,3.533634376068727212e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.598105225803533536e-01,4.474073870412204612e-01,-5.943438356479461421e-02,4.352943797752316379e-02,3.524945642238912491e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.615459086410068412e-01,4.494189075595399485e-01,-5.970332654270105177e-02,4.341365818627989509e-02,3.516230151321872999e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.632788281990006585e-01,4.514351350553761910e-01,-5.997291500996410063e-02,4.329752034674941297e-02,3.507487961085111183e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.650092736427564377e-01,4.534560691593650383e-01,-6.024317127103302749e-02,4.318102518695769027e-02,3.498719130132533772e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.667372373715630074e-01,4.554817062504090686e-01,-6.051412038513049457e-02,4.306417340051087173e-02,3.489923717973532069e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.684627117956096987e-01,4.575120454045062957e-01,-6.078579019849569831e-02,4.294696564193431487e-02,3.481101785007027494e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.701856893360195411e-01,4.595470817084197357e-01,-6.105821211364634293e-02,4.282940252071514714e-02,3.472253392544348283e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.719061624248831244e-01,4.615868142024618370e-01,-6.133142149361384132e-02,4.271148459438930262e-02,3.463378602816428176e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.736241235052911280e-01,4.636312400057323591e-01,-6.160545699223669847e-02,4.259321236184267223e-02,3.454477478934086521e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.753395650313679610e-01,4.656803565782285936e-01,-6.188036161088621839e-02,4.247458625678498384e-02,3.445550084929746570e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.770524794683048464e-01,4.677341609999856598e-01,-6.215618210349008188e-02,4.235560663984804658e-02,3.436596485736859163e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.787628592923925730e-01,4.697926515221416688e-01,-6.243296931918430881e-02,4.223627379239994900e-02,3.427616747205160519e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.804706969910552461e-01,4.718558290370136743e-01,-6.271077927356895343e-02,4.211658790758852217e-02,3.418610936044649828e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.821759850628824839e-01,4.739236945904936049e-01,-6.298967175751184200e-02,4.199654908265058884e-02,3.409579119810171588e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.838787160176629465e-01,4.759962494931453403e-01,-6.326971126371429277e-02,4.187615731180362177e-02,3.400521366894915798e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.855788823764169759e-01,4.780734977657087881e-01,-6.355096802417309343e-02,4.175541247676966411e-02,3.391437746513875084e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.872764766714295703e-01,4.801554472531168183e-01,-6.383351648750827667e-02,4.163431433794967385e-02,3.382328328628222969e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.889714914462828022e-01,4.822421070838862622e-01,-6.411743662744344430e-02,4.151286252668109217e-02,3.373193183905776049e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.906639192558894580e-01,4.843334894988757244e-01,-6.440281337742123635e-02,4.139105653534302931e-02,3.364032383683963068e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.923537526665245689e-01,4.864296109144067848e-01,-6.468973637798366982e-02,4.126889571014409619e-02,3.354845999897778652e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.940409842558590503e-01,4.885304892201637883e-01,-6.497830097363072044e-02,4.114637924165876620e-02,3.345634105029983152e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.957256066129916761e-01,4.906361487283490974e-01,-6.526860772426355817e-02,4.102350615617594148e-02,3.336396772051403170e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.974076123384820525e-01,4.927466181401373158e-01,-6.556076273915309449e-02,4.090027530618855350e-02,3.327134074298986888e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+5.990869940443830366e-01,4.948619295898656345e-01,-6.585487650211194199e-02,4.077668536287595152e-02,3.317846085430888881e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.007637443542724887e-01,4.969821222484950840e-01,-6.615106542330990991e-02,4.065273480756710633e-02,3.308532879317999664e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.024378559032870228e-01,4.991072409747891458e-01,-6.644945066730481575e-02,4.052842192302218149e-02,3.299194529936666509e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.041093213381530935e-01,5.012373372916789638e-01,-6.675015829670903622e-02,4.040374478605510827e-02,3.289831111253931906e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.057781333172199689e-01,5.033724694214705142e-01,-6.705331905171564577e-02,4.027870126007635637e-02,3.280442697115393269e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.074442845104918165e-01,5.055127028203741935e-01,-6.735906855558479456e-02,4.015328898786054562e-02,3.271029361116729595e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.091077675996596774e-01,5.076581115257730037e-01,-6.766754682460786319e-02,4.002750538436858352e-02,3.261591176474242237e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.107685752781343291e-01,5.098087782218558850e-01,-6.797889741465308611e-02,3.990134763135860102e-02,3.252128215859749760e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.124267002510773716e-01,5.119647933063201650e-01,-6.829326847903076370e-02,3.977481267169681589e-02,3.242640551274659777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.140821352354338680e-01,5.141262575035050464e-01,-6.861081131832938440e-02,3.964789720362111441e-02,3.233128253882587844e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.157348729599646520e-01,5.162932808429946396e-01,-6.893168051572440280e-02,3.952059767731740114e-02,3.223591393850576137e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.173849061652770809e-01,5.184659854124719081e-01,-6.925603309477645908e-02,3.939291029093108798e-02,3.214030040156165557e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.190322276038583427e-01,5.206445027448431961e-01,-6.958402843530207826e-02,3.926483098903413294e-02,3.204444260410531653e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.206768300401065419e-01,5.228289769844582935e-01,-6.991582841161107786e-02,3.913635545926952286e-02,3.194834120669579614e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.223187062503623412e-01,5.250195621291970927e-01,-7.025159521030246990e-02,3.900747913258149208e-02,3.185199685245140855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.239578490229409358e-01,5.272164264253427213e-01,-7.059149254955698893e-02,3.887819718339511482e-02,3.175541016510813885e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.255942511581639165e-01,5.294197496179207096e-01,-7.093568390803743628e-02,3.874850453056740557e-02,3.165858174670997949e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.272279054683905786e-01,5.316297245292809226e-01,-7.128433253710628192e-02,3.861839584010193227e-02,3.156151217582238155e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.288588047780496737e-01,5.338465597230808157e-01,-7.163760017584967776e-02,3.848786552878025874e-02,3.146420200491202857e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.304869419236707184e-01,5.360704737739510062e-01,-7.199564655685802839e-02,3.835690776978491201e-02,3.136665175838818653e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.321123097539158575e-01,5.383017020530858066e-01,-7.235862935439421606e-02,3.822551649852459765e-02,3.126886193049878138e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.337349011296107282e-01,5.405404926469538074e-01,-7.272670271254677576e-02,3.809368541960585913e-02,3.117083298275814540e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.353547089237763235e-01,5.427871080196632336e-01,-7.310001639118959837e-02,3.796140801661986031e-02,3.107256534206452314e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.369717260216599675e-01,5.450418273082359155e-01,-7.347871537781375251e-02,3.782867756210599869e-02,3.097405939800989549e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.385859453207668457e-01,5.473049431884848826e-01,-7.386293822611199322e-02,3.769548713011064422e-02,3.087531550062373453e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.401973597308906472e-01,5.495767637203776701e-01,-7.425281746712260322e-02,3.756182960928269171e-02,3.077633395810271955e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.418059621741456500e-01,5.518576107638960559e-01,-7.464847717128299254e-02,3.742769771762374797e-02,3.067711503459620292e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.434117455849966971e-01,5.541478216451929395e-01,-7.505003260501191820e-02,3.729308402043820386e-02,3.057765894803944340e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.450147029102913931e-01,5.564477501078528565e-01,-7.545759006306990146e-02,3.715798094754516057e-02,3.047796586767104743e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.466148271092899691e-01,5.587577626134198061e-01,-7.587124443141295138e-02,3.702238081330499336e-02,3.037803591187585583e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.482121111536969238e-01,5.610782411599096742e-01,-7.629107934078786790e-02,3.688627583873173299e-02,3.027786914627431103e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.498065480276917771e-01,5.634095829572111080e-01,-7.671716586075677524e-02,3.674965817367036036e-02,3.017746558130968704e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.513981307279594901e-01,5.657521979394031231e-01,-7.714956088727882577e-02,3.661251992220770912e-02,3.007682517041446180e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.529868522637218842e-01,5.681065103850876374e-01,-7.758830666223245909e-02,3.647485316940984185e-02,2.997594780810870496e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.545727056567677282e-01,5.704729578811196244e-01,-7.803343033693936615e-02,3.633665000853042448e-02,2.987483332811556314e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.561556839414840470e-01,5.728519899950996752e-01,-7.848494135434204433e-02,3.619790257060098021e-02,2.977348150171361119e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.577357801648857638e-01,5.752440700050738265e-01,-7.894283181392228177e-02,3.605860305663364379e-02,2.967189203611932946e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.593129873866474533e-01,5.776496721240030885e-01,-7.940707511454113410e-02,3.591874376880489078e-02,2.957006457283401502e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.608872986791325399e-01,5.800692816995922652e-01,-7.987762438410656030e-02,3.577831714574203348e-02,2.946799868649030985e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.624587071274248284e-01,5.825033941340636012e-01,-8.035441249508833150e-02,3.563731579731078958e-02,2.936569388351730758e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.640272058293580359e-01,5.849525141895652869e-01,-8.083734998665999261e-02,3.549573254197271877e-02,2.926314960124939413e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.655927878955469890e-01,5.874171540600549024e-01,-8.132632509702358847e-02,3.535356044497139916e-02,2.916036520702540158e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.671554464494171555e-01,5.898978351290398159e-01,-8.182120266363614169e-02,3.521079285738578507e-02,2.905733999765415243e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.687151746272345099e-01,5.923950856816394550e-01,-8.232182283434524850e-02,3.506742345644328629e-02,2.895407319843302796e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.702719655781371744e-01,5.949094363515259332e-01,-8.282800019394162550e-02,3.492344628822762614e-02,2.885056396339894524e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.718258124641639517e-01,5.974414248169126473e-01,-8.333952422360936529e-02,3.477885580922965292e-02,2.874681137507923423e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.733767084602854114e-01,5.999915907314075092e-01,-8.385615719598679685e-02,3.463364692953439794e-02,2.864281444455246128e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.749246467544329775e-01,6.025604762212254384e-01,-8.437763374137846606e-02,3.448781505764925842e-02,2.853857211177853737e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.764696205475297930e-01,6.051486226065502239e-01,-8.490366078294693530e-02,3.434135614548174120e-02,2.843408324630068984e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.780116230535196964e-01,6.077565704387106749e-01,-8.543391701123252691e-02,3.419426673266166838e-02,2.832934664812590264e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.795506474993976420e-01,6.103848594575920217e-01,-8.596805167746755794e-02,3.404654399277256410e-02,2.822436104872829143e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.810866871252394539e-01,6.130340243858507510e-01,-8.650568470238483021e-02,3.389818577927885407e-02,2.811912511214487709e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.826197351842306915e-01,6.157045938853850098e-01,-8.704640646714111030e-02,3.374919067174330517e-02,2.801363743703967901e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.841497849426976252e-01,6.183970922348999144e-01,-8.758977761154743136e-02,3.359955802077495973e-02,2.790789655803483926e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.856768296801356577e-01,6.211120311030335905e-01,-8.813532754548576709e-02,3.344928799530063473e-02,2.780190094823500879e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.872008626892394112e-01,6.238499152247789503e-01,-8.868255626943963299e-02,3.329838162786011302e-02,2.769564902150655206e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.887218772759319263e-01,6.266112354238076998e-01,-8.923093298054071698e-02,3.314684085961513288e-02,2.758913913512136551e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.902398667593944159e-01,6.293964708620514159e-01,-8.977989745344845673e-02,3.299466858447899081e-02,2.748236959257385204e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.917548244720952422e-01,6.322060824242952126e-01,-9.032885847484395381e-02,3.284186869298399286e-02,2.737533864701660047e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.932667437598194482e-01,6.350405186505813093e-01,-9.087719548065173458e-02,3.268844611580437598e-02,2.726804450445419192e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.947756179816976241e-01,6.379002046300789042e-01,-9.142425844422769066e-02,3.253440686488437428e-02,2.716048532749567590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.962814405102357718e-01,6.407855476736032951e-01,-9.196936831623490904e-02,3.237975807413635138e-02,2.705265923959768171e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.977842047313435048e-01,6.436969305431599064e-01,-9.251181739183318709e-02,3.222450803890829218e-02,2.694456432921057409e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+6.992839040443636911e-01,6.466347162398403681e-01,-9.305087027912192776e-02,3.206866625344144406e-02,2.683619865419816786e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.007805318621018742e-01,6.495992371036047874e-01,-9.358576383703845480e-02,3.191224344763778781e-02,2.672756024652648277e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.022740816108536954e-01,6.525908012342157560e-01,-9.411571006947772045e-02,3.175525162037014237e-02,2.661864711771446679e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.037645467304356472e-01,6.556096864696080440e-01,-9.463989451587123680e-02,3.159770407109799978e-02,2.650945726357640253e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.052519206742123847e-01,6.586561408936743023e-01,-9.515747860147094617e-02,3.143961543138151793e-02,2.639998867005751149e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.067361969091264795e-01,6.617303800744970266e-01,-9.566760077047015143e-02,3.128100169165104472e-02,2.629023931857010776e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.082173689157260643e-01,6.648325850240789370e-01,-9.616937755732904436e-02,3.112188022672434523e-02,2.618020719224042892e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.096954301881950311e-01,6.679629012093780682e-01,-9.666190454420069134e-02,3.096226981846563961e-02,2.606989028188508306e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.111703742343798984e-01,6.711214376531613457e-01,-9.714425804060315695e-02,3.080219067656959908e-02,2.595928659247244696e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.126421945758196763e-01,6.743082649434755105e-01,-9.761549675632875955e-02,3.064166445532164398e-02,2.584839414936955793e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.141108847477731780e-01,6.775234136001371255e-01,-9.807466276670777361e-02,3.048071426856754571e-02,2.573721100525028227e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.155764382992484407e-01,6.807668748036541650e-01,-9.852078405318087650e-02,3.031936470095623892e-02,2.562573524657069113e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.170388487930305921e-01,6.840385950661254100e-01,-9.895287539864867921e-02,3.015764181610538358e-02,2.551396500071571039e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.184981098057098281e-01,6.873384793625200162e-01,-9.936994067688005583e-02,2.999557316217135380e-02,2.540189844305328348e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.199542149277103897e-01,6.906663878938401613e-01,-9.977097481603595941e-02,2.983318777313047343e-02,2.528953380395204675e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.214071577633178745e-01,6.940221354976774393e-01,-1.001549650439275962e-01,2.967051616768029421e-02,2.517686937606080644e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.228569319307079910e-01,6.974054902957468327e-01,-1.005208938427540055e-01,2.950759034440905704e-02,2.506390352166617541e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.243035310619742040e-01,7.008161739893316033e-01,-1.008677404065546762e-01,2.934444377278144431e-02,2.495063468002371998e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.257469488031560445e-01,7.042538597442451476e-01,-1.011944822819595691e-01,2.918111138226757786e-02,2.483706137476471565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.271871788142661996e-01,7.077181735798332429e-01,-1.015000991126854130e-01,2.901762954639479017e-02,2.472318222131396626e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.286242147693199334e-01,7.112086918022164639e-01,-1.017835731462146953e-01,2.885403606323066680e-02,2.460899593417587572e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.300580503563610657e-01,7.147249406773320546e-01,-1.020438923980546281e-01,2.869037013375016348e-02,2.449450133461488766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.314886792774907276e-01,7.182663992158943023e-01,-1.022800531182562478e-01,2.852667233446495865e-02,2.437969735777003322e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.329160952488951164e-01,7.218324953378624231e-01,-1.024910612822366451e-01,2.836298458800611683e-02,2.426458305993432207e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.343402920008724744e-01,7.254226077569050357e-01,-1.026759353772163930e-01,2.819935012973777064e-02,2.414915762583806635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.357612632778612882e-01,7.290360640486789334e-01,-1.028337085542377904e-01,2.803581347098929241e-02,2.403342037587637425e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.371790028384670457e-01,7.326721443335779682e-01,-1.029634311652376094e-01,2.787242035838781093e-02,2.391737077307536097e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.385935044554904350e-01,7.363300783190246390e-01,-1.030641732016141787e-01,2.770921772981322154e-02,2.380100842987298015e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.400047619159539902e-01,7.400090483762525873e-01,-1.031350261634373100e-01,2.754625366677564829e-02,2.368433311488701842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.414127690211299582e-01,7.437081861839328623e-01,-1.031751060598700698e-01,2.738357734403735685e-02,2.356734475947150353e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.428175195865669433e-01,7.474265772261855512e-01,-1.031835551296509113e-01,2.722123897456520114e-02,2.345004346433454123e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.442190074421178858e-01,7.511632608028170344e-01,-1.031595434025794206e-01,2.705928975414408771e-02,2.333242950534920035e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.456172264319661513e-01,7.549172288656820218e-01,-1.031022732564072786e-01,2.689778179984140399e-02,2.321450333969798940e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.470121704146531760e-01,7.586874280653286284e-01,-1.030109789788696967e-01,2.673676808683593531e-02,2.309626561166410186e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.484038332631057777e-01,7.624727623425942014e-01,-1.028849308769993337e-01,2.657630238266945819e-02,2.297771715807201964e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.497922088646621352e-01,7.662720919935934738e-01,-1.027234358789196889e-01,2.641643917817752124e-02,2.285885901335698300e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.511772911210995440e-01,7.700842355092928360e-01,-1.025258408570767360e-01,2.625723361648134302e-02,2.273969241473053349e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.525590739486605063e-01,7.739079721632565745e-01,-1.022915338911943167e-01,2.609874141846025425e-02,2.262021880662682116e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.539375512780798205e-01,7.777420411407867107e-01,-1.020199464047478743e-01,2.594101880691538145e-02,2.250043984517275109e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.553127170546113378e-01,7.815851457344857867e-01,-1.017105549083185401e-01,2.578412242810899924e-02,2.238035740218556613e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.566845652380541631e-01,7.854359535137794834e-01,-1.013628830776838047e-01,2.562810927128847741e-02,2.225997356877952860e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.580530898027795228e-01,7.892930974928649146e-01,-1.009765026153456452e-01,2.547303658672158896e-02,2.213929065881844738e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.594182847377570766e-01,7.931551793979473297e-01,-1.005510357831364554e-01,2.531896180227380042e-02,2.201831121198382368e-01,-0.000000000000000000e+00,-0.000000000000000000e+00
+7.607801440465815634e-01,7.970207716947490528e-01,-1.000861566825645282e-01,2.516594243741059750e-02,2.189703799624795055e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.621386617474984471e-01,8.008884171552363984e-01,-9.958159208779819582e-02,2.501403601706739949e-02,2.177547401029090168e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.634938318734313389e-01,8.047566338116259876e-01,-9.903712345900499148e-02,2.486329998419596621e-02,2.165362248548943769e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.648456484720069781e-01,8.086239164499903209e-01,-9.845258810006730921e-02,2.471379161055385631e-02,2.153148688723071913e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.661941056055823207e-01,8.124887357573871149e-01,-9.782787944092867505e-02,2.456556790794054562e-02,2.140907091628746761e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.675391973512698529e-01,8.163495452861698753e-01,-9.716294915360354500e-02,2.441868553843024731e-02,2.128637850964787692e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.688809178009646805e-01,8.202047807697686599e-01,-9.645780722003206853e-02,2.427320072386809566e-02,2.116341384063343545e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.702192610613689538e-01,8.240528612554008925e-01,-9.571252294660219373e-02,2.412916915610947149e-02,2.104018131926398416e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.715542212540194011e-01,8.278921952142621610e-01,-9.492722587060130335e-02,2.398664590647559686e-02,2.091668559177201347e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.728857925153117536e-01,8.317211800273783817e-01,-9.410210582387362743e-02,2.384568533591081146e-02,2.079293153975554087e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.742139689965273908e-01,8.355382045255229650e-01,-9.323741384990455461e-02,2.370634100554401200e-02,2.066892427917769171e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.755387448638585424e-01,8.393416525214556501e-01,-9.233346191752912768e-02,2.356866558770935247e-02,2.054466915887447342e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.768601142984342678e-01,8.431299050050713451e-01,-9.139062346989734742e-02,2.343271077850435369e-02,2.042017175854342892e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.781780714963458800e-01,8.469013415156376734e-01,-9.040933361921636713e-02,2.329852721021365147e-02,2.029543788657025283e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.794926106686721479e-01,8.506543438656616329e-01,-8.939008831495655338e-02,2.316616436619996916e-02,2.017047357735032020e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.808037260415053860e-01,8.543872961037932701e-01,-8.833344511648412445e-02,2.303567049651573592e-02,2.004528508841793288e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.821114118559759909e-01,8.580985903763292955e-01,-8.724002270775023382e-02,2.290709253478704147e-02,1.991987889712936999e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.834156623682784204e-01,8.617866271078226115e-01,-8.611050011115704839e-02,2.278047601684096701e-02,1.979426169680747749e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.847164718496960623e-01,8.654498174353639772e-01,-8.494561556611178454e-02,2.265586500271364601e-02,1.966844039285099155e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.860138345866267695e-01,8.690865845147446311e-01,-8.374616734998772960e-02,2.253330199928579164e-02,1.954242209844556100e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.873077448806070633e-01,8.726953673686587143e-01,-8.251301181681902630e-02,2.241282788537984558e-02,1.941621413007115216e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.885981970483383341e-01,8.762746234660607758e-01,-8.124706318510863112e-02,2.229448183945256443e-02,1.928982400234191263e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.898851854217111557e-01,8.798228286791766051e-01,-7.994929195628681928e-02,2.217830126996013160e-02,1.916325942290910411e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.911687043478299319e-01,8.833384819174915359e-01,-7.862072450477693153e-02,2.206432174800776816e-02,1.903652828690999832e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.924487481890386542e-01,8.868201046956466493e-01,-7.726244095145523572e-02,2.195257694338618309e-02,1.890963867120712683e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.937253113229446599e-01,8.902662446003122287e-01,-7.587557530599473021e-02,2.184309856278007084e-02,1.878259882845775830e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.949983881424437238e-01,8.936754769404373633e-01,-7.446131236397190989e-02,2.173591629135546155e-02,1.865541718081921430e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.962679730557449265e-01,8.970464060177898435e-01,-7.302088761037732467e-02,2.163105773791988012e-02,1.852810231348974712e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.975340604863950800e-01,9.003776664241633920e-01,-7.155558476072519658e-02,2.152854838256312159e-02,1.840066296818876901e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+7.987966448733025970e-01,9.036679274869553469e-01,-7.006673483709040251e-02,2.142841152812960392e-02,1.827310803618196200e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.000557206707632485e-01,9.069158910719848921e-01,-6.855571354767926728e-02,2.133066825490989582e-02,1.814544655122229400e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.013112823484830338e-01,9.101202949846104451e-01,-6.702394079631417145e-02,2.123533737886154626e-02,1.801768768256547526e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.025633243916036053e-01,9.132799140307144015e-01,-6.547287744248810049e-02,2.114243541276508362e-02,1.788984072758169064e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.038118413007259155e-01,9.163935616123491945e-01,-6.390402375326691142e-02,2.105197653262915955e-02,1.776191510438167864e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.050568275919344208e-01,9.194600908498824499e-01,-6.231891792029237848e-02,2.096397254616567604e-02,1.763392034420411814e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.062982777968216164e-01,9.224783954151157817e-01,-6.071913359266969956e-02,2.087843286541740673e-02,1.750586608384324949e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.075361864625111297e-01,9.254474102066732044e-01,-5.910627778714810238e-02,2.079536448316768663e-02,1.737776205793958584e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.087705481516828110e-01,9.283661118182801042e-01,-5.748198890271945943e-02,2.071477195301530891e-02,1.724961809140161950e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.100013574425958263e-01,9.312335219892210469e-01,-5.584793435117629823e-02,2.063665737317264132e-02,1.712144409149209956e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.112286089291126379e-01,9.340487044876554235e-01,-5.420580856720586677e-02,2.056102037392053619e-02,1.699325004004036410e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.124522972207226523e-01,9.368107694830801746e-01,-5.255733017817123404e-02,2.048785810912695862e-02,1.686504598571443025e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.136724169425666453e-01,9.395188711169134566e-01,-5.090424061094959290e-02,2.041716525122880266e-02,1.673684203603684750e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.148889627354590770e-01,9.421722079597700805e-01,-4.924830084028083282e-02,2.034893398979333717e-02,1.660864834993144024e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.161019292559127392e-01,9.447700258217968106e-01,-4.759128974480203444e-02,2.028315403432722624e-02,1.648047512982032392e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.173113111761618477e-01,9.473116152396394707e-01,-4.593500172355011180e-02,2.021981261980827088e-02,1.635233261401564941e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.185171031841850242e-01,9.497963136798592831e-01,-4.428124405682394571e-02,2.015889451636412519e-02,1.622423106907503187e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.197192999837294991e-01,9.522235031368122549e-01,-4.263183433777100600e-02,2.010038204283153690e-02,1.609618078236089156e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.209178962943334268e-01,9.545926122165051542e-01,-4.098859901811028467e-02,2.004425508330760428e-02,1.596819205464486691e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.221128868513501997e-01,9.569031144261247457e-01,-3.935337050636497636e-02,1.999049110679539110e-02,1.584027519281810403e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.233042664059700977e-01,9.591545293140031347e-01,-3.772798488027004349e-02,1.993906519071965075e-02,1.571244050268204295e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.244920297252450458e-01,9.613464196355895330e-01,-3.611427902699804182e-02,1.988995004810981820e-02,1.558469828199568474e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.256761715921098199e-01,9.634783938209041665e-01,-3.451408978456752680e-02,1.984311605734499426e-02,1.545705881361748579e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.268566868054064711e-01,9.655501037455226365e-01,-3.292925018650311869e-02,1.979853129479005810e-02,1.532953235868485253e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.280335701799065307e-01,9.675612441438147249e-01,-3.136158832797301260e-02,1.975616157140762733e-02,1.520212915010002719e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.292068165463334362e-01,9.695115506980487297e-01,-2.981292457308781091e-02,1.971597047100227817e-02,1.507485938629687106e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.303764207513859574e-01,9.714008024288285359e-01,-2.828506940082096965e-02,1.967791939273616161e-02,1.494773322501126156e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.315423776577602899e-01,9.732288177685716191e-01,-2.677982169962204659e-02,1.964196759510066642e-02,1.482076077722551677e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.327046821441730362e-01,9.749954547016222994e-01,-2.529896582459697715e-02,1.960807224357612871e-02,1.469395210151084874e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.338633291053832997e-01,9.767006093221006768e-01,-2.384427075840802115e-02,1.957618846016032088e-02,1.456731719848932960e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.350183134522153328e-01,9.783442157557986629e-01,-2.241748730900664302e-02,1.954626937499190364e-02,1.444086600545492760e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.361696301115808527e-01,9.799262425782738761e-01,-2.102034607111445685e-02,1.951826618122443763e-02,1.431460839137420338e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.373172740265014680e-01,9.814466957561227556e-01,-1.965455625660952541e-02,1.949212819129043234e-02,1.418855415194846536e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.384612401561303274e-01,9.829056122068932089e-01,-1.832180284011759169e-02,1.946780289551328785e-02,1.406271300495195720e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.396015234757753243e-01,9.843030628792531767e-01,-1.702374551186844678e-02,1.944523602307820370e-02,1.393709458599079742e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.407381189769198571e-01,9.856391482182258468e-01,-1.576201691496220691e-02,1.942437160391647516e-02,1.381170844420094301e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.418710216672458113e-01,9.869139988344564074e-01,-1.453822062075811779e-02,1.940515203291762539e-02,1.368656403852049896e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.430002265706553199e-01,9.881277724718008360e-01,-1.335392959653193198e-02,1.938751813573789951e-02,1.356167073390226741e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.441257287272921905e-01,9.892806536137056517e-01,-1.221068503541743525e-02,1.937140923584345850e-02,1.343703779808225474e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.452475231935638877e-01,9.903728519451465706e-01,-1.110999452866419032e-02,1.935676322269190217e-02,1.331267439824788401e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.463656050421637378e-01,9.914045999919394170e-01,-1.005333037105025223e-02,1.934351662179413858e-02,1.318858959825946753e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.474799693620920227e-01,9.923761516344690170e-01,-9.042128858582456838e-03,1.933160466550105580e-02,1.306479235594432686e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.485906112586771854e-01,9.932877798390985502e-01,-8.077788605279702036e-03,1.932096136441179002e-02,1.294129152088744072e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.496975258535985898e-01,9.941397776930420704e-01,-7.161669077954335542e-03,1.931151958048153011e-02,1.281809583215210435e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.508007082849067260e-01,9.949324544642846879e-01,-6.295090074850725158e-03,1.930321110010961919e-02,1.269521391633030016e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.519001537070450825e-01,9.956661340652838144e-01,-5.479329448136387171e-03,1.929596670863076638e-02,1.257265428593490331e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.529958572908716841e-01,9.963411537180969502e-01,-4.715623912816583121e-03,1.928971626474565437e-02,1.245042533804521512e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.540878142236794091e-01,9.969578629945350379e-01,-4.005166352393294341e-03,1.928438877497468465e-02,1.232853535305636489e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.551760197092181937e-01,9.975166213510142654e-01,-3.349105582121427241e-03,1.927991246981725776e-02,1.220699249372519252e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.562604689677154601e-01,9.980177968796165588e-01,-2.748545614890790870e-03,1.927621487880052525e-02,1.208580480443741251e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.573411572358974331e-01,9.984617642963503625e-01,-2.204544544422837229e-03,1.927322290637201868e-02,1.196498021070378087e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.584180797670095675e-01,9.988489032637625220e-01,-1.718113878810464349e-03,1.927086290762117238e-02,1.184452651891263475e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.594912318308379762e-01,9.991795982460821568e-01,-1.290218123069769747e-03,1.926906076393333778e-02,1.172445141618015385e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.605606087137299687e-01,9.994542342894839893e-01,-9.217739159798195260e-04,1.926774195825151428e-02,1.160476247047031034e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.616262057186147016e-01,9.996731983511577546e-01,-6.136496582662007366e-04,1.926683165034547701e-02,1.148546713098005378e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.626880181650236068e-01,9.998368760684646084e-01,-3.666648464766913648e-04,1.926625475150443856e-02,1.136657272847052397e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.637460413891117073e-01,9.999456502927994705e-01,-1.815897527083772751e-04,1.926593599897312090e-02,1.124808647610561763e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.648002707436770464e-01,9.999999006709333615e-01,-5.914459054924125770e-05,1.926580002996886726e-02,1.113001547017107989e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.658507015981822263e-01,1.000000000000000000e+00,-0.000000000000000000e+00,1.926577145501411417e-02,1.101236669123313167e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.668973293387736145e-01,9.999463171389191274e-01,-4.776073287077151232e-06,1.926577493022299178e-02,1.089514700519804563e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.679401493683025492e-01,9.998392111367156998e-01,-7.404257801837900357e-05,1.926573522941560368e-02,1.077836316452941839e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.689791571063447684e-01,9.996790312192816996e-01,-2.083183497540764021e-04,1.926557731490765105e-02,1.066202180995547144e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.700143479892213927e-01,9.994661183546836858e-01,-4.080713781027198997e-04,1.926522640806421424e-02,1.054612947186404498e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.710457174700181326e-01,9.992008001037426812e-01,-6.737186980437973689e-04,1.926460805828533393e-02,1.043069257200594235e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.720732610186056055e-01,9.988833925137882153e-01,-1.005626443851530532e-03,1.926364821138197916e-02,1.031571742536665920e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.730969741216594304e-01,9.985141964784909341e-01,-1.404109412041024954e-03,1.926227327665753358e-02,1.020121024203553828e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.741168522826797682e-01,9.980934992311518705e-01,-1.869431411827702540e-03,1.926041019350568581e-02,1.008717712930092825e-01,-0.000000000000000000e+00,0.000000000000000000e+00
+8.751328910220110835e-01,9.976215714429843562e-01,-2.401805671825128861e-03,1.925798649582474709e-02,9.973624093614823216e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.761450858768620176e-01,9.970986669408742342e-01,-3.001394241540672990e-03,1.925493037599641105e-02,9.860557042934503524e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.771534324013248174e-01,9.965250229409825522e-01,-3.668308443382305595e-03,1.925117074751385438e-02,9.747981788800723357e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.781579261663950975e-01,9.959008574560112015e-01,-4.402609034683878202e-03,1.924663730642485818e-02,9.635904048704428682e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.791585627599906028e-01,9.952263696291376860e-01,-5.204306721132236080e-03,1.924126059098706445e-02,9.524329448409390997e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.801553377869717476e-01,9.945017384965378904e-01,-6.073361689506399236e-03,1.923497204045769571e-02,9.413263524397669224e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.811482468691598235e-01,9.937271236730134127e-01,-7.009684463401542366e-03,1.922770405251602449e-02,9.302711726195327102e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.821372856453569833e-01,9.929026629424043682e-01,-8.013136038260097044e-03,1.921939003887503106e-02,9.192679418840014882e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.831224497713646704e-01,9.920284728804011909e-01,-9.083527676853714664e-03,1.920996448007147514e-02,9.083171885360163955e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.841037349200038253e-01,9.911046487333220467e-01,-1.022062233801185153e-02,1.919936297841837863e-02,8.974194329208426413e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.850811367811323160e-01,9.901312634563490533e-01,-1.142413428825472116e-02,1.918752230905704584e-02,8.865751876651234431e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.860546510616654770e-01,9.891083668174416310e-01,-1.269372958274587147e-02,1.917438047030746787e-02,8.757849579314265498e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.870242734855938727e-01,9.880359869150859842e-01,-1.402902618191949162e-02,1.915987673218805948e-02,8.650492416543427432e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.879899997940023937e-01,9.869141283299138756e-01,-1.542959484733634132e-02,1.914395168378076398e-02,8.543685297801620337e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.889518257450892413e-01,9.857427733102797074e-01,-1.689495969326294811e-02,1.912654727815681424e-02,8.437433065027093171e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.899097471141842464e-01,9.845218809813742755e-01,-1.842459807017562262e-02,1.910760687657425683e-02,8.331740494923077500e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.908637596937675207e-01,9.832513875273138915e-01,-2.001794144293862418e-02,1.908707529081083570e-02,8.226612301237407054e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.918138592934876652e-01,9.819312064266144668e-01,-2.167437569575871720e-02,1.906489882410446202e-02,8.122053136969183129e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.927600417401808652e-01,9.805612278975907037e-01,-2.339324169679227183e-02,1.904102531030902809e-02,8.018067596529614149e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.937023028778884326e-01,9.791413205959619281e-01,-2.517383583926234916e-02,1.901540415178818236e-02,7.914660217842954537e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.946406385678755679e-01,9.776713311456217692e-01,-2.701541098301853128e-02,1.898798635506162777e-02,7.811835484220777670e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.955750446886495686e-01,9.761510826840030797e-01,-2.891717605355393142e-02,1.895872456554883317e-02,7.709597826442719792e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.965055171359771480e-01,9.745803789256104466e-01,-3.087829739564015158e-02,1.892757310062098339e-02,7.607951624538544788e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.974320518229038646e-01,9.729590017772963284e-01,-3.289789896237604178e-02,1.889448798085950620e-02,7.506901209449207135e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.983546446797705531e-01,9.712867124071563607e-01,-3.497506307180424379e-02,1.885942695988633219e-02,7.406450864771811637e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+8.992732916542326427e-01,9.695632527595571259e-01,-3.710883111410048785e-02,1.882234955244188254e-02,7.306604828263167650e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.001879887112764766e-01,9.677883453177524853e-01,-3.929820378752852533e-02,1.878321706092240390e-02,7.207367293248825857e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.010987318332384088e-01,9.659616938947968023e-01,-4.154214185651876334e-02,1.874199260072034745e-02,7.108742409983577482e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.020055170198213457e-01,9.640829843783419451e-01,-4.383956698354311149e-02,1.869864112341130050e-02,7.010734286859446329e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.029083402881132869e-01,9.621518857061663876e-01,-4.618936180137834385e-02,1.865312943897530495e-02,6.913346991517377305e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.038071976726038681e-01,9.601680499707101468e-01,-4.859037153119333957e-02,1.860542623590432862e-02,6.816584551821680549e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.047020852252026790e-01,9.581311136688172025e-01,-5.104140373359482979e-02,1.855550209987363863e-02,6.720450956801905795e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.055929990152560283e-01,9.560406995763781612e-01,-5.354122956249111193e-02,1.850332953100309805e-02,6.624950157309723864e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.064799351295638186e-01,9.538964163924987805e-01,-5.608858382919922059e-02,1.844888295953313676e-02,6.530086066646914600e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.073628896723984205e-01,9.516978609713606652e-01,-5.868216635802124548e-02,1.839213876002196088e-02,6.435862561014502048e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.082418587655194386e-01,9.494446177695128330e-01,-6.132064211514817731e-02,1.833307526375772128e-02,6.342283479880188313e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.091168385481928071e-01,9.471362610201509158e-01,-6.400264209531325577e-02,1.827167277010159629e-02,6.249352626234434133e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.099878251772063331e-01,9.447723550692912475e-01,-6.672676421770878941e-02,1.820791355587520616e-02,6.157073766681225857e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.108548148268875710e-01,9.423524557102829213e-01,-6.949157402398371819e-02,1.814178188331203034e-02,6.065450631454690628e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.117178036891198101e-01,9.398761127164926288e-01,-7.229560535929541176e-02,1.807326400643329695e-02,5.974486914205406202e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.125767879733592824e-01,9.373428694711127829e-01,-7.513736092084370710e-02,1.800234817614286553e-02,5.884186271639354343e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.134317639066518169e-01,9.347522641593823955e-01,-7.801531342562058324e-02,1.792902464371255722e-02,5.794552323182786124e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.142827277336491587e-01,9.321038319599610489e-01,-8.092790623958588669e-02,1.785328566250543939e-02,5.705588650376981763e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.151296757166257345e-01,9.293971048480033081e-01,-8.387355350548972299e-02,1.777512548882535445e-02,5.617298796247331000e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.159726041354947501e-01,9.266316147725572883e-01,-8.685064207695379623e-02,1.769454038100604781e-02,5.529686264474963087e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.168115092878250660e-01,9.238068936161748512e-01,-8.985753187912497164e-02,1.761152859648202479e-02,5.442754518431822119e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.176463874888565186e-01,9.209224750864097864e-01,-9.289255643943870877e-02,1.752609038810231876e-02,5.356506980128668272e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.184772350715175726e-01,9.179778952440973416e-01,-9.595402408487881118e-02,1.743822799872245949e-02,5.270947029056696764e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.193040483864395318e-01,9.149726951887851012e-01,-9.904021858870698525e-02,1.734794565440300751e-02,5.186078000879148359e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.201268238019744139e-01,9.119064206573402886e-01,-1.021494002248568428e-01,1.725524955613263328e-02,5.101903186007123586e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.209455577042096053e-01,9.087786243711833967e-01,-1.052798065741077094e-01,1.716014787011402309e-02,5.018425828153764284e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.217602464969846254e-01,9.055888683232087288e-01,-1.084296537012590517e-01,1.706265071652764143e-02,4.935649122613924855e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.225708866019060039e-01,9.023367232100459878e-01,-1.115971366501387479e-01,1.696277015689769477e-02,4.853576214527022953e-02,-0.000000000000000000e+00,0.000000000000000000e+00
+9.233774744583637117e-01,8.990217708265549890e-01,-1.147804303693413103e-01,1.686052018045527609e-02,4.772210197092283113e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.241800065235469264e-01,8.956436059437988817e-01,-1.179776915973768242e-01,1.675591668849609264e-02,4.691554109568871894e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.249784792724585758e-01,8.922018365457452527e-01,-1.211870587099932450e-01,1.664897747740705369e-02,4.611610935233899367e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.257728891979321029e-01,8.886960850233658338e-01,-1.244066531139852716e-01,1.653972222101623285e-02,4.532383599315210204e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.265632328106458981e-01,8.851259908284025979e-01,-1.276345808829380812e-01,1.642817245063241463e-02,4.453874966776041888e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.273495066391393982e-01,8.814912108199702834e-01,-1.308689326978724754e-01,1.631435153428568985e-02,4.376087839978604760e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.281317072298272963e-01,8.777914207688606307e-01,-1.341077857116945993e-01,1.619828465468597239e-02,4.299024956358142580e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.289098311470160851e-01,8.740263165763200170e-01,-1.373492045271957063e-01,1.607999878544369085e-02,4.222688985958960445e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.296838749729178231e-01,8.701956154278288613e-01,-1.405912422122561067e-01,1.595952266610126125e-02,4.147082528977363153e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.304538353076660107e-01,8.662990578716871504e-01,-1.438319409762223722e-01,1.583688677613149381e-02,4.072208113146511682e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.312197087693302455e-01,8.623364079848374608e-01,-1.470693340796929427e-01,1.571212330750039812e-02,3.998068191130759591e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.319814919939308773e-01,8.583074552447698125e-01,-1.503014466985738429e-01,1.558526613560271695e-02,3.924665137846689023e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.327391816354543286e-01,8.542120149591384548e-01,-1.535262969509309239e-01,1.545635078929719769e-02,3.852001247780679344e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.334927743658669730e-01,8.500499306168413094e-01,-1.567418970214980167e-01,1.532541441970079396e-02,3.780078732211502451e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.342422668751307890e-01,8.458210735931714908e-01,-1.599462548562944897e-01,1.519249576764177163e-02,3.708899716434908150e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.349876558712167940e-01,8.415253453201917422e-01,-1.631373751056079124e-01,1.505763512967114338e-02,3.638466236965120365e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.357289380801203649e-01,8.371626775592521108e-01,-1.663132601630356311e-01,1.492087432329312902e-02,3.568780238706020791e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.364661102458753383e-01,8.327330347964900792e-01,-1.694719116315850371e-01,1.478225665089440664e-02,3.499843572081289683e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.371991691305681105e-01,8.282364134120293597e-01,-1.726113318132085073e-01,1.464182686242693557e-02,3.431657990155521920e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.379281115143518477e-01,8.236728432854422133e-01,-1.757295248452885317e-01,1.449963111683772073e-02,3.364225145849762677e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.386529341954613637e-01,8.190423896621102928e-01,-1.788244978482302905e-01,1.435571694268690524e-02,3.297546589036323694e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.393736339902262200e-01,8.143451528624557900e-01,-1.818942624696848476e-01,1.421013319754125745e-02,3.231623763676478439e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.400902077330849371e-01,8.095812694321929959e-01,-1.849368362373129404e-01,1.406293002631577548e-02,3.166458005016530020e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.408026522765996491e-01,8.047509129181580345e-01,-1.879502439622230681e-01,1.391415881845770031e-02,3.102050536764965061e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.415109644914688714e-01,7.998542943062041388e-01,-1.909325187519576028e-01,1.376387216439362189e-02,3.038402468339861801e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.422151412665417114e-01,7.948916639451082400e-01,-1.938817039652303564e-01,1.361212381083958672e-02,2.975514792083217702e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.429151795088319687e-01,7.898633106976153950e-01,-1.967958541573104070e-01,1.345896861511061866e-02,2.913388380528327687e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.436110761435310135e-01,7.847695629992565980e-01,-1.996730368571377201e-01,1.330446249865762168e-02,2.852023983776654989e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.443028281140214419e-01,7.796107898460621355e-01,-2.025113337000755687e-01,1.314866239952408068e-02,2.791422226855918426e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.449904323818909546e-01,7.743874006679237754e-01,-2.053088417999410775e-01,1.299162622432421636e-02,2.731583607157910334e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.456738859269452346e-01,7.690998461006670617e-01,-2.080636755792538706e-01,1.283341279904895611e-02,2.672508491948669129e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.463531857472214925e-01,7.637486182444631622e-01,-2.107739676297410714e-01,1.267408181934180991e-02,2.614197115928674695e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.470283288590011228e-01,7.583342513932864737e-01,-2.134378708943901748e-01,1.251369379992015030e-02,2.556649578865853589e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.476993122968236927e-01,7.528573216193672479e-01,-2.160535595856034607e-01,1.235231002316428996e-02,2.499865843295575804e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.483661331134992656e-01,7.473184470748575059e-01,-2.186192307453441730e-01,1.218999248734308101e-02,2.443845732364670342e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.490287883801215019e-01,7.417182889535380852e-01,-2.211331056294966457e-01,1.202680385409988888e-02,2.388588927685350929e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.496872751860808703e-01,7.360575506665816281e-01,-2.235934313105904236e-01,1.186280739540528789e-02,2.334094967317505362e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.503415906390768608e-01,7.303369784055709912e-01,-2.259984818994265621e-01,1.169806693996142816e-02,2.280363243849667029e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.509917318651310847e-01,7.245573612936184738e-01,-2.283465605642405749e-01,1.153264681907175196e-02,2.227393002567330110e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.516376960085998205e-01,7.187195310141859528e-01,-2.306360002144922650e-01,1.136661181195016378e-02,2.175183339707282074e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.522794802321865593e-01,7.128243609121449609e-01,-2.328651650078394919e-01,1.120002709124164574e-02,2.123733200905185761e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.529170817169542174e-01,7.068727674437458308e-01,-2.350324525053984481e-01,1.103295816752033684e-02,2.073041379685389499e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.535504976623380147e-01,7.008657086631771005e-01,-2.371362942166388710e-01,1.086547083380845838e-02,2.023106516077523287e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.541797252861573542e-01,6.948041851926221035e-01,-2.391751576238655419e-01,1.069763110982308377e-02,1.973927095348661231e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.548047618246282564e-01,6.886892381488879389e-01,-2.411475468676008704e-01,1.052950518609615388e-02,1.925501446871030753e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.554256045323753499e-01,6.825219502054206133e-01,-2.430520050467054327e-01,1.036115936792538460e-02,1.877827743155377829e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.560422506824440836e-01,6.763034448578990609e-01,-2.448871148569138345e-01,1.019266001896929237e-02,1.830903998924942114e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.566546975663126062e-01,6.700348849906350157e-01,-2.466514998528986091e-01,1.002407350538354197e-02,1.784728070410276229e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.572629424939035347e-01,6.637174734102787577e-01,-2.483438262053130063e-01,9.855466139578562476e-03,1.739297654756436287e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.578669827935962777e-01,6.573524520679466798e-01,-2.499628037062967267e-01,9.686904124164884625e-03,1.694610289523602181e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.584668158122380266e-01,6.509411006220376938e-01,-2.515071872476648540e-01,9.518453495941962481e-03,1.650663352381518886e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.590624389151560791e-01,6.444847366859275395e-01,-2.529757776909021816e-01,9.350180070215763362e-03,1.607454060929139339e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.596538494861691637e-01,6.379847144510718548e-01,-2.543674233952093466e-01,9.182149385316295057e-03,1.564979472639608377e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.602410449275984305e-01,6.314424242479453886e-01,-2.556810211212074413e-01,9.014426647427584496e-03,1.523236484972699013e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.608240226602797751e-01,6.248592915211844678e-01,-2.569155174627831828e-01,8.847076675754624725e-03,1.482221835607127648e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.614027801235747184e-01,6.182367756596600650e-01,-2.580699096725390107e-01,8.680163848102726357e-03,1.441932102851632702e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.619773147753815090e-01,6.115763694602583955e-01,-2.591432469167220809e-01,8.513752046968863993e-03,1.402363706183706177e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.625476240921463367e-01,6.048795973192359066e-01,-2.601346310783252402e-01,8.347904606179226347e-03,1.363512906953726067e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.631137055688746562e-01,5.981480157605795211e-01,-2.610432182238467957e-01,8.182684258034301356e-03,1.325375809225142268e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.636755567191421790e-01,5.913832116491449797e-01,-2.618682190814686228e-01,8.018153081056892187e-03,1.287948360689294969e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.642331750751055308e-01,5.845867997732209220e-01,-2.626089002185493793e-01,7.854372448592389019e-03,1.251226353852034727e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.647865581875129104e-01,5.777604231226770581e-01,-2.632645846325327166e-01,7.691402978067890528e-03,1.215205427315115341e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.653357036257157464e-01,5.709057510224572196e-01,-2.638346529574213117e-01,7.529304481139659916e-03,1.179881067206763404e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.658806089776785786e-01,5.640244791950640435e-01,-2.643185439990956564e-01,7.368135914514469843e-03,1.145248608710037336e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.664212718499893828e-01,5.571183264847947569e-01,-2.647157555578610877e-01,7.207955331906926687e-03,1.111303237777728180e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.669576898678714505e-01,5.501890353759992802e-01,-2.650258450768114282e-01,7.048819836775519106e-03,1.078039992988921725e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.674898606751921593e-01,5.432383687316317822e-01,-2.652484302692814100e-01,6.890785536269569515e-03,1.045453767537066203e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.680177819344745194e-01,5.362681105791432534e-01,-2.653831898622292518e-01,6.733907496023095775e-03,1.013539311378801322e-02,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.685414513269068326e-01,5.292800635640213525e-01,-2.654298638671922506e-01,6.578239696278171886e-03,9.822912334369189938e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.690608665523531284e-01,5.222760475352420384e-01,-2.653882545132350557e-01,6.423834988965721610e-03,9.517040040315255522e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.695760253293631559e-01,5.152578984756136515e-01,-2.652582259608430859e-01,6.270745056194317740e-03,9.217719573796703814e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.700869253951824867e-01,5.082274666090489390e-01,-2.650397054193972535e-01,6.119020369971453835e-03,8.924892942602950796e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.705935645057625072e-01,5.011866153352110276e-01,-2.647326830662217745e-01,5.968710153069649305e-03,8.638500847828523010e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.710959404357700775e-01,4.941372200601413245e-01,-2.643372125053026767e-01,5.819862341413887044e-03,8.358482712685461405e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.715940509785973012e-01,4.870811658810803579e-01,-2.638534108614351559e-01,5.672523547727237325e-03,8.084776712487405995e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.720878939463715174e-01,4.800203467940873936e-01,-2.632814588915360909e-01,5.526739026773554858e-03,7.817319806087165424e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.725774671699647378e-01,4.729566639991428234e-01,-2.626216013808759242e-01,5.382552641898703111e-03,7.556047767844780577e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.730627684990029724e-01,4.658920238642198508e-01,-2.618741468268164785e-01,5.240006833156383471e-03,7.300895221331099236e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.735437958018759996e-01,4.588283375820351728e-01,-2.610394676253708912e-01,5.099142587014896845e-03,7.051795673429952886e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.740205469657464699e-01,4.517675183466238198e-01,-2.601179999637960316e-01,4.959999407584654053e-03,6.808681549775899740e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.744930198965594537e-01,4.447114813627381924e-01,-2.591102437888659904e-01,4.822615289438544027e-03,6.571484230863162621e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.749612125190515455e-01,4.376621405394703901e-01,-2.580167623833641288e-01,4.687026692110080571e-03,6.340134089171577982e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.754251227767596344e-01,4.306214087191920226e-01,-2.568381822820939298e-01,4.553268516295307794e-03,6.114560527271521370e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.758847486320304521e-01,4.235911948762904888e-01,-2.555751930721387732e-01,4.421374081655824080e-03,5.894692016275223344e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.763400880660293435e-01,4.165734029777506953e-01,-2.542285470490306953e-01,4.291375106292050733e-03,5.680456135769600817e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.767911390787489267e-01,4.095699317892852465e-01,-2.527990583810819092e-01,4.163301688096772597e-03,5.471779613511989875e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.772378996890179748e-01,4.025826714043255206e-01,-2.512876032125871961e-01,4.037182287817540904e-03,5.268588366263179772e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.776803679345105191e-01,3.956135029405784542e-01,-2.496951185389463257e-01,3.913043713830307094e-03,5.070807541419178679e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.781185418717538438e-01,3.886642971879929465e-01,-2.480226020637658624e-01,3.790911108881104327e-03,4.878361558751103874e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.785524195761373667e-01,3.817369131748142852e-01,-2.462711116820275881e-01,3.670807938294675787e-03,4.691174152690927822e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.789819991419210776e-01,3.748331959373837896e-01,-2.444417642529713719e-01,3.552755980259469839e-03,4.509168415262271648e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.794072786822439758e-01,3.679549763191504730e-01,-2.425357351329280597e-01,3.436775317903052827e-03,4.332266839423082674e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.798282563291319525e-01,3.611040694680652252e-01,-2.405542574382494025e-01,3.322884333160550704e-03,4.160391362234742464e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.802449302335066728e-01,3.542822726113916687e-01,-2.384986213528675347e-01,3.211099702339722649e-03,3.993463408888053671e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.806572985651929031e-01,3.474913651764596767e-01,-2.363701726282041726e-01,3.101436393692125724e-03,3.831403936643183739e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.810653595129272819e-01,3.407331063344263278e-01,-2.341703121144498856e-01,2.993907666824447301e-03,3.674133478931482580e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.814691112843658694e-01,3.340092346305775228e-01,-2.319004946679121826e-01,2.888525073793307262e-03,3.521572189779216371e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.818685521060921406e-01,3.273214665400974166e-01,-2.295622279567800239e-01,2.785298462121501509e-03,3.373639887986652416e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.822636802236245357e-01,3.206714952586468548e-01,-2.271570715480916158e-01,2.684235979597397650e-03,3.230256101601118070e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.826544939014246749e-01,3.140609897850027976e-01,-2.246866356380771701e-01,2.585344080839678971e-03,3.091340112164967131e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.830409914229045754e-01,3.074915935888100083e-01,-2.221525797667555646e-01,2.488627535741951002e-03,2.956810999052143351e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.834231710904342005e-01,3.009649243083239445e-01,-2.195566118267209543e-01,2.394089439623119011e-03,2.826587683457823085e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.838010312253490097e-01,2.944825720885778098e-01,-2.169004865909298085e-01,2.301731225129521798e-03,2.700588972256694285e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.841745701679575076e-01,2.880460989910417768e-01,-2.141860043507949618e-01,2.211552675945753672e-03,2.578733601726049349e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.845437862775482385e-01,2.816570379520530465e-01,-2.114150097270665885e-01,2.123551942187120421e-03,2.460940280917393950e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.849086779323972252e-01,2.753168922983854627e-01,-2.085893901700653363e-01,2.037725557456328707e-03,2.347127734705941552e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.852692435297746298e-01,2.690271346310518230e-01,-2.057110743958751042e-01,1.954068457654808359e-03,2.237214746401119253e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.856254814859526370e-01,2.627892060555589437e-01,-2.027820311513960361e-01,1.872574001396551174e-03,2.131120200131783778e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.859773902362116704e-01,2.566045161837176014e-01,-1.998042676157826547e-01,1.793233992003792458e-03,2.028763122533686156e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.863249682348473879e-01,2.504744415879648356e-01,-1.967798278400860190e-01,1.716038701136472257e-03,1.930062724025472572e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.866682139551777864e-01,2.444003258164933190e-01,-1.937107912737459081e-01,1.640976893941364662e-03,1.834938439672508404e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.870071258895497524e-01,2.383834785904542508e-01,-1.905992708925149937e-01,1.568035855769301518e-03,1.743309969283287058e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.873417025493456123e-01,2.324251754456435892e-01,-1.874474118778071963e-01,1.497201420388952686e-03,1.655097317085077284e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.876719424649894608e-01,2.265266578026785549e-01,-1.842573898613520667e-01,1.428457999565355564e-03,1.570220830460790189e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.879978441859542659e-01,2.206891315086809968e-01,-1.810314092757005777e-01,1.361788614123999545e-03,1.488601238252430358e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.883194062807676428e-01,2.149137674854672553e-01,-1.777717016657240912e-01,1.297174926317175422e-03,1.410159688336749166e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.886366273370181812e-01,2.092017008823671698e-01,-1.744805237815529320e-01,1.234597273616096181e-03,1.334817784332657060e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.889495059613619965e-01,2.035540311021139248e-01,-1.711601562602134696e-01,1.174034703699162758e-03,1.262497621693116703e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.892580407795287245e-01,1.979718217091636645e-01,-1.678129015526701739e-01,1.115465010677265442e-03,1.193121822847685934e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.895622304363274058e-01,1.924561001883687839e-01,-1.644410822233616232e-01,1.058864772573402677e-03,1.126613571608742811e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.898620735956522587e-01,1.870078579590642998e-01,-1.610470391965624914e-01,1.004209389922269867e-03,1.062896646688287578e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.901575689404893410e-01,1.816280501584153217e-01,-1.576331300792758738e-01,9.514731254469584740e-04,1.001895454418344576e-03,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.904487151729213235e-01,1.763175960055825831e-01,-1.542017272418902407e-01,9.006291447837718985e-04,9.435350605328140161e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.907355110141340404e-01,1.710773785762529087e-01,-1.507552159869575414e-01,8.516495582450427527e-04,8.877412210611181380e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.910179552044214857e-01,1.659082452834030885e-01,-1.472959929407560986e-01,8.045054634944752859e-04,8.344404123072289266e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.912960465031916968e-01,1.608110076276083600e-01,-1.438264641072506123e-01,7.591669890924263725e-04,7.835598598882150727e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.915697836889719730e-01,1.557864417723089367e-01,-1.403490429609775825e-01,7.156033389556931430e-04,7.350275669065113940e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.918391655594147593e-01,1.508352888291406568e-01,-1.368661489387057728e-01,6.737828375435815208e-04,6.887723410355214761e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.921041909313020879e-01,1.459582548852799977e-01,-1.333802053243934860e-01,6.336729757837673187e-04,6.447238207728437200e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.923648586405513505e-01,1.411560115762782175e-01,-1.298936375147165534e-01,5.952404577352713713e-04,6.028125006981265022e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.926211675422206282e-01,1.364291965607618284e-01,-1.264088713876743431e-01,5.584512478152940662e-04,5.629697557554445234e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.928731165105130207e-01,1.317784138443682251e-01,-1.229283313296926500e-01,5.232706186030288776e-04,5.251278645394440647e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.931207044387818650e-01,1.272042341330509307e-01,-1.194544384437413731e-01,4.896631991979678985e-04,4.892200317168311557e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.933639302395360637e-01,1.227071958116330574e-01,-1.159896089061733110e-01,4.575930240007236170e-04,4.551804093766353629e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.936027928444443047e-01,1.182878050245562973e-01,-1.125362520730845972e-01,4.270235819012832969e-04,4.229441174676080366e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.938372912043398344e-01,1.139465366892765952e-01,-1.090967687889933257e-01,3.979178658259076252e-04,3.924472632812972951e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.940674242892252321e-01,1.096838347740866221e-01,-1.056735496355247356e-01,3.702384225579105929e-04,3.636269599735822118e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.942931910882769619e-01,1.055001132057585694e-01,-1.022689731475187047e-01,3.439474028089155549e-04,3.364213441900452224e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.945145906098497024e-01,1.013957565779560754e-01,-9.888540415883158297e-02,3.190066114637266992e-04,3.107695927027255686e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.947316218814804545e-01,9.737112076205527156e-02,-9.552519202806478149e-02,2.953775579490420990e-04,2.866119381688512374e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.949442839498933155e-01,9.342653372886219887e-02,-9.219066905940714873e-02,2.730215066722901972e-04,2.638896839931390991e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.951525758810034761e-01,8.956229643259450146e-02,-8.888414878477109149e-02,2.518995274550453337e-04,2.425452182773220420e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.953564967599211055e-01,8.577868336635521840e-02,-8.560792431931835145e-02,2.319725459479941908e-04,2.225220269238076471e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.955560456909556821e-01,8.207594368511443583e-02,-8.236426680570722858e-02,2.132013939249019420e-04,2.037647058976779530e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.957512217976196567e-01,7.845430211422453615e-02,-7.915542368787166494e-02,1.955468594458051876e-04,1.862189725825138587e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.959420242226324493e-01,7.491395939103087653e-02,-7.598361725760736929e-02,1.789697368122343676e-04,1.698316764038358088e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.961284521279242243e-01,7.145509367466267026e-02,-7.285104310809356609e-02,1.634308762486073906e-04,1.545508086373625174e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.963105046946395538e-01,6.807786100286285225e-02,-6.975986849534375478e-02,1.488912332888289367e-04,1.403255114288442975e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.964881811231409703e-01,6.478239676820098103e-02,-6.671223098954297270e-02,1.353119178044188563e-04,1.271060860933821647e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.966614806330126308e-01,6.156881626005367841e-02,-6.371023692661778592e-02,1.226542425952754196e-04,1.148440006160353398e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.968304024630633142e-01,5.843721550922410801e-02,-6.075595988036914391e-02,1.108797715603823095e-04,1.034918965883095524e-04,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.969949458713304180e-01,5.538767269071070282e-02,-5.785143951839467891e-02,9.995036731504464915e-05,9.300359536061058728e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.971551101350828450e-01,5.242024863887391473e-02,-5.499867991317972798e-02,8.982823826369523994e-05,8.333410360671326814e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.973108945508237788e-01,4.953498820009627490e-02,-5.219964840410036555e-02,8.047598507533643738e-05,7.443961824041793593e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.974622984342947918e-01,4.673192082497519551e-02,-4.945627418246351104e-02,7.185664648309384882e-05,6.627753076575849511e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.976093211204778433e-01,4.401106191004359319e-02,-4.677044699620896279e-02,6.393374440327223151e-05,5.880643105040632564e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.977519619635988324e-01,4.137241349935354140e-02,-4.414401593698928650e-02,5.667132829817355852e-05,5.198611056528026572e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.978902203371300406e-01,3.881596544692126477e-02,-4.157878817199470861e-02,5.003401875221357017e-05,4.577756512043829799e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.980240956337933511e-01,3.634169614326071551e-02,-3.907652780480704025e-02,4.398705021753047592e-05,4.014299713394507756e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.981535872655622477e-01,3.394957369305470762e-02,-3.663895471000266912e-02,3.849631287671176762e-05,3.504581745032870081e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.982786946636650338e-01,3.163955673589996520e-02,-3.426774331157977915e-02,3.352839360788794981e-05,3.045064673708219429e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.983994172785870536e-01,2.941159543825177733e-02,-3.196452165488521818e-02,2.905061598542451768e-05,2.632331649309775315e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.985157545800730228e-01,2.726563233959382673e-02,-2.973087019249734744e-02,2.503107928951218270e-05,2.263086971002612255e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.986277060571295827e-01,2.520160348553246096e-02,-2.756832083353624729e-02,2.143869649686146409e-05,1.934156117835164151e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.987352712180274095e-01,2.321943887259480740e-02,-2.547835600152593277e-02,1.824323119467269108e-05,1.642485752017056801e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.988384495903031013e-01,2.131906375473221690e-02,-2.346240766823669624e-02,1.541533339447914900e-05,1.385143694628517232e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.989372407207620652e-01,1.950039929579198972e-02,-2.152185631958932269e-02,1.292657422373838088e-05,1.159318874640455211e-05,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.990316441754796273e-01,1.776336343708760993e-02,-1.965803022173175096e-02,1.074947945018531636e-05,9.623212582839185762e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.991216595398032529e-01,1.610787169907426306e-02,-1.787220453880981191e-02,8.857561800573144034e-06,7.915817585203902251e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.992072864183545455e-01,1.453383788596881651e-02,-1.616560053434751981e-02,7.225352059492744630e-06,6.446521288241811982e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.992885244350309115e-01,1.304117500987677335e-02,-1.453938476326934424e-02,5.828428912749328355e-06,5.192048429960178583e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.993653732330070039e-01,1.162979593491942319e-02,-1.299466836022187405e-02,4.643447511988093959e-06,4.130329623014402594e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.994378324747364983e-01,1.029961395589267664e-02,-1.153250636107346419e-02,3.648166727571013466e-06,3.240499962254564536e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.995059018419534258e-01,9.050543701528804968e-03,-1.015389704188604525e-02,2.821475069492598036e-06,2.502897546875883304e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.995695810356738376e-01,7.882501556410540997e-03,-8.859781266412604384e-03,2.143415253843864883e-06,1.899061970583074253e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.996288697761966935e-01,6.795406518818971654e-03,-7.651041963717603901e-03,1.595207390458170825e-06,1.411732780436549428e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.996837678031056384e-01,5.789180442666316470e-03,-6.528503522169459318e-03,1.159270770830220314e-06,1.024847944881575350e-06,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.997342748752695574e-01,4.863748869942286814e-03,-5.492931305676303222e-03,8.192442410758792242e-07,7.235423367395296259e-07,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.997803907708440185e-01,4.019041310057394796e-03,-4.545031184425194458e-03,5.600051356597276294e-07,4.941462526420997024e-07,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.998221152872722728e-01,3.254991928089291664e-03,-3.685449096886342629e-03,3.676867599349924234e-07,3.241839843193468079e-07,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.998594482412856976e-01,2.571539743547577965e-03,-2.914770676815493095e-03,2.296944026450090722e-07,2.023724620578544039e-07,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.998923894689052405e-01,1.968629246967516268e-03,-2.233520864261702259e-03,1.347198669911694154e-07,1.186199804921017435e-07,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999209388254417519e-01,1.446210514967800030e-03,-1.642163587066675090e-03,7.275450678347964172e-08,6.402502392043930728e-08,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999450961854965403e-01,1.004239749596159753e-03,-1.141101496327595177e-03,3.510075546144303800e-08,3.087519999927952015e-08,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999648614429623716e-01,6.426793257140208087e-04,-7.306757078514341175e-04,1.438213799073529622e-08,1.264629453566377874e-08,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999802345110231361e-01,3.614982470054549629e-04,-4.111655952132400350e-04,4.551757823718516219e-09,4.001455080885748678e-09,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999912153221552913e-01,1.606721108149795842e-04,-1.827886389983132188e-04,8.992503022602981804e-10,7.905125455121505901e-10,-0.000000000000000000e+00,-0.000000000000000000e+00
+9.999978038281271964e-01,4.018346884133498867e-05,-4.570026495150424442e-05,5.619751219360055816e-11,4.944655220443084589e-11,-0.000000000000000000e+00,-0.000000000000000000e+00
+1.000000000000000000e+00,2.172345245846269443e-08,6.227729555015112737e-09,-6.406842235841267287e-16,6.014764460522365031e-17,-0.000000000000000000e+00,0.000000000000000000e+00
diff --git a/examples/TS_channel/TSwave_cheb_3D.csv b/examples/TS_channel/TSwave_cheb_3D.csv
new file mode 100644
index 00000000000..25b087afa86
--- /dev/null
+++ b/examples/TS_channel/TSwave_cheb_3D.csv
@@ -0,0 +1,1500 @@
+-1.000000000000000000e+00,-8.532433453662170043e-09,-7.825595599627150187e-09,0.000000000000000000e+00,-0.000000000000000000e+00,-1.599831272561656676e-08,-1.467299174930090495e-08
+-9.999978038281271964e-01,6.339081250191365459e-06,4.338122083586022897e-06,4.527002409099353646e-12,1.195183183545239621e-11,-8.543270903398036051e-06,-3.317684627220741917e-07
+-9.999912153221552913e-01,2.538213128841344148e-05,1.737455491554113006e-05,7.345343160167814840e-11,1.900893980644579340e-10,-3.412460745523900372e-05,-1.285659877269323089e-06
+-9.999802345110231361e-01,5.712169081436035717e-05,3.909765612915819575e-05,3.726714591305153097e-10,9.612157521767487419e-10,-7.675772989925633467e-05,-2.883374579398063985e-06
+-9.999648614429623716e-01,1.015590864256511353e-04,6.950096197886711095e-05,1.178195977011979408e-09,3.036536800268931052e-09,-1.364397148525431532e-04,-5.136777931779325563e-06
+-9.999450961854965403e-01,1.586966408299545850e-04,1.085752883723027586e-04,2.875450803325525671e-09,7.411464482616838463e-09,-2.131656119904592008e-04,-8.062652415276418802e-06
+-9.999209388254417519e-01,2.285371189537993228e-04,1.563087678154905446e-04,5.958253586266729833e-09,1.536531941236317529e-08,-3.069294977659811586e-04,-1.168254652617643607e-05
+-9.998923894689052405e-01,3.110841581777411334e-04,2.126870951428934061e-04,1.102755811897632658e-08,2.846096669216762880e-08,-4.177236882276211586e-04,-1.602220886560661701e-05
+-9.998594482412856976e-01,4.063417112922833242e-04,2.776928930431409124e-04,1.878986997050827461e-08,4.854435876488042881e-08,-5.455398073093925635e-04,-2.111264932353566488e-05
+-9.998221152872722728e-01,5.143146210111909124e-04,3.513064318400511998e-04,3.005542619497379947e-08,7.774404255073667120e-08,-6.903677377934228174e-04,-2.698863777746396184e-05
+-9.997803907708440185e-01,6.350085839080511991e-04,4.335045815812777945e-04,4.573605253367640730e-08,1.184704800507748562e-07,-8.521957214234845718e-04,-3.369049402519871077e-05
+-9.997342748752695574e-01,7.684298599762195200e-04,5.242619024709374315e-04,6.684276389445010206e-08,1.734153604989223678e-07,-1.031010940303483687e-03,-4.126184518309180432e-05
+-9.996837678031056384e-01,9.145854153890290404e-04,6.235494765022742050e-04,9.448312592680328410e-08,2.455508216345021490e-07,-1.226799290360050228e-03,-4.975159527158962825e-05
+-9.996288697761966935e-01,1.073483310379447480e-03,7.313357270682651354e-04,1.298582651091697411e-07,3.381285481609111646e-07,-1.439544699513272460e-03,-5.921214435924348026e-05
+-9.995695810356738376e-01,1.245132113591762655e-03,8.475856592106073084e-04,1.742597291774336788e-07,4.546787552878928164e-07,-1.669230275523829292e-03,-6.970054611419045001e-05
+-9.995059018419534258e-01,1.429541350681043117e-03,9.722612530129576790e-04,2.290659532491314310e-07,5.990092281459728080e-07,-1.915837518886828348e-03,-8.127748039561823830e-05
+-9.994378324747364983e-01,1.626721909915838883e-03,1.105321058796416558e-03,2.957386186352766587e-07,7.752039665797756229e-07,-2.179345620629599429e-03,-9.400769978359452489e-05
+-9.993653732330070039e-01,1.836684663428785318e-03,1.246720106636086019e-03,3.758185879942542045e-07,9.876220703254661533e-07,-2.459734109998472883e-03,-1.079598641566882112e-04
+-9.992885244350309115e-01,2.059442512119403178e-03,1.396409998839586277e-03,4.709217263387399620e-07,1.240896374045567649e-06,-2.756979087020847239e-03,-1.232060097763611705e-04
+-9.992072864183545455e-01,2.295008962530312255e-03,1.554338672274459786e-03,5.827344509056003786e-07,1.539931751730508899e-06,-3.071055961785336321e-03,-1.398216150356972997e-04
+-9.991216595398032529e-01,2.543398548145819199e-03,1.720450063329689470e-03,7.130089865858985968e-07,1.889903773573420901e-06,-3.401938739576017799e-03,-1.578858266203851484e-04
+-9.990316441754796273e-01,2.804627156594336531e-03,1.894684450779379314e-03,8.635584581139484337e-07,2.296256848214360425e-06,-3.749599486515301297e-03,-1.774803925259928715e-04
+-9.989372407207620652e-01,3.078711674195466416e-03,2.076977617509164712e-03,1.036251699199413526e-06,2.764702535312161669e-06,-4.114009079217138126e-03,-1.986907878399675638e-04
+-9.988384495903031013e-01,3.365670624817492743e-03,2.267261737265849949e-03,1.233007910565025811e-06,3.301217475111975442e-06,-4.495136094082994270e-03,-2.216040843887388909e-04
+-9.987352712180274095e-01,3.665523431511381273e-03,2.465463722099767337e-03,1.455791084751685575e-06,3.912041436102811109e-06,-4.892948282758434984e-03,-2.463115600262525112e-04
+-9.986277060571295827e-01,3.978291314330954613e-03,2.671506672128005303e-03,1.706604012619967810e-06,4.603675095285029593e-06,-5.307410983524543310e-03,-2.729054697457588825e-04
+-9.985157545800730228e-01,4.303996515533350421e-03,2.885308499362131129e-03,1.987482625197020287e-06,5.382877778638580915e-06,-5.738488672583380405e-03,-3.014810942465334111e-04
+-9.983994172785870536e-01,4.642663278104359852e-03,3.106782279923024203e-03,2.300489383479645072e-06,6.256665029829621174e-06,-6.186143231385597165e-03,-3.321355268245334376e-04
+-9.982786946636650338e-01,4.994317116353828336e-03,3.335836217316592682e-03,2.647707335797211850e-06,7.232305954558219682e-06,-6.650335419804160458e-03,-3.649671687915827657e-04
+-9.981535872655622477e-01,5.358985339969861428e-03,3.572372796990599692e-03,3.031233193922529319e-06,8.317320650854867283e-06,-7.131024002344052599e-03,-4.000767207359412752e-04
+-9.980240956337933511e-01,5.736696921859368685e-03,3.816289465332934130e-03,3.453170931272371001e-06,9.519477282415188916e-06,-7.628166107917069838e-03,-4.375652951892959153e-04
+-9.978902203371300406e-01,6.127482744531542388e-03,4.067477382584993965e-03,3.915624579552764966e-06,1.084678916320121843e-05,-8.141716882760252280e-03,-4.775361206614616469e-04
+-9.977519619635988324e-01,6.531375794491861043e-03,4.325822775716871814e-03,4.420691616412480435e-06,1.230751144047500538e-05,-8.671629243610255375e-03,-5.200913510022429822e-04
+-9.976093211204778433e-01,6.948410482726272500e-03,4.591204291733111398e-03,4.970455485757362102e-06,1.391013792565739864e-05,-9.217855271970002567e-03,-5.653363544483002963e-04
+-9.974622984342947918e-01,7.378623493675015750e-03,4.863495513702270623e-03,5.566978208283417180e-06,1.566339789831929291e-05,-9.780344744754978017e-03,-6.133743033506157578e-04
+-9.973108945508237788e-01,7.822054063482486788e-03,5.142562866324609100e-03,6.212293468065257354e-06,1.757625206087777084e-05,-1.035904473702789659e-02,-6.643093895793436912e-04
+-9.971551101350828450e-01,8.278742775890782571e-03,5.428265825474629450e-03,6.908398568859482315e-06,1.965788908375299849e-05,-1.095390200590806370e-02,-7.182457013845617091e-04
+-9.969949458713304180e-01,8.748733253779318597e-03,5.720456999613215618e-03,7.657247012810218355e-06,2.191772173572425551e-05,-1.156485994652256023e-02,-7.752863221644666842e-04
+-9.968304024630633142e-01,9.232071099341650905e-03,6.018981708339644389e-03,8.460740816071564953e-06,2.436538248036250096e-05,-1.219186070782152416e-02,-8.355333541357465093e-04
+-9.966614806330126308e-01,9.728804226681379147e-03,6.323677576188095270e-03,9.320722714674573218e-06,2.701071963998305902e-05,-1.283484469830253362e-02,-8.990878957758016449e-04
+-9.964881811231409703e-01,1.023898340371178113e-02,6.634374406595785860e-03,1.023896824639097340e-05,2.986379272787295029e-05,-1.349374969956493135e-02,-9.660494766437511269e-04
+-9.963105046946395538e-01,1.076266175294141730e-02,6.950893797856658000e-03,1.121717778131020265e-05,3.293486783092153199e-05,-1.416851193178225374e-02,-1.036515959070000289e-03
+-9.961284521279242243e-01,1.129989496760361312e-02,7.273049243184456260e-03,1.225696866152347086e-05,3.623441302721597712e-05,-1.485906577731362956e-02,-1.110582515601232214e-03
+-9.959420242226324493e-01,1.185074174631991530e-02,7.600645207823826348e-03,1.335986715536346793e-05,3.977309330728262944e-05,-1.556534309342686925e-02,-1.188342508308591399e-03
+-9.957512217976196567e-01,1.241526319472179354e-02,7.933477459207615920e-03,1.452730032257286609e-05,4.356176547892419654e-05,-1.628727446030235268e-02,-1.269886002876266734e-03
+-9.955560456909556821e-01,1.299352359168766229e-02,8.271332343495406761e-03,1.576058804792291850e-05,4.761147305124416778e-05,-1.702478786820387988e-02,-1.355300242948894547e-03
+-9.953564967599211055e-01,1.358559015004373073e-02,8.613987045967930883e-03,1.706093503574931386e-05,5.193344034716563791e-05,-1.777780928714702349e-02,-1.444668264806571499e-03
+-9.951525758810034761e-01,1.419153250484345152e-02,8.961208266401326367e-03,1.842942272890574966e-05,5.653906741635552008e-05,-1.854626374391964844e-02,-1.538070469526965156e-03
+-9.949442839498933155e-01,1.481142432616712121e-02,9.312753200257976871e-03,1.986700099943787922e-05,6.143992383889711173e-05,-1.933007241144263366e-02,-1.635581857765946480e-03
+-9.947316218814804545e-01,1.544534144618715972e-02,9.668368522869195958e-03,2.137448088540450406e-05,6.664774258380052208e-05,-2.012915622899656351e-02,-1.737272995423964168e-03
+-9.945145906098497024e-01,1.609336351122677908e-02,1.002778975591368978e-02,2.295252594017421405e-05,7.217441434654130726e-05,-2.094343290743564540e-02,-1.843210249535410855e-03
+-9.942931910882769619e-01,1.675557322492633033e-02,1.039074179961434891e-02,2.460164472290550224e-05,7.803198031871484848e-05,-2.177281844461415386e-02,-1.953453825909282765e-03
+-9.940674242892252321e-01,1.743205611836361885e-02,1.075693773220816503e-02,2.632218276345744056e-05,8.423262659777318220e-05,-2.261722764545039469e-02,-2.068059043737128632e-03
+-9.938372912043398344e-01,1.812290182250941503e-02,1.112607927517476371e-02,2.811431491771163740e-05,9.078867668153055087e-05,-2.347657181691172740e-02,-2.187074475451878611e-03
+-9.936027928444443047e-01,1.882820233445253386e-02,1.149785591863125740e-02,2.997803774362083721e-05,9.771258499916576998e-05,-2.435076209050840088e-02,-2.310542588006000726e-03
+-9.933639302395360637e-01,1.954805373059874049e-02,1.187194490825461100e-02,3.191316203475359992e-05,1.050169298428087059e-04,-2.523970627159163871e-02,-2.438498758499811018e-03
+-9.931207044387818650e-01,2.028255532429806135e-02,1.224801047160161901e-02,3.391930516900325878e-05,1.127144057849433043e-04,-2.614331046915623069e-02,-2.570971706341594609e-03
+-9.928731165105130207e-01,2.103180999179347227e-02,1.262570429124358282e-02,3.599588404432022812e-05,1.208178160758218469e-04,-2.706147852278731336e-02,-2.707981578629066466e-03
+-9.926211675422206282e-01,2.179592318080299240e-02,1.300466413463748951e-02,3.814210795519065315e-05,1.293400656713939736e-04,-2.799411388628083794e-02,-2.849541483319385088e-03
+-9.923648586405513505e-01,2.257500464780257815e-02,1.338451420470505512e-02,4.035697123174238749e-05,1.382941533238617323e-04,-2.894111638039659901e-02,-2.995655788401834982e-03
+-9.921041909313020879e-01,2.336916717426754456e-02,1.376486480546426985e-02,4.263924679951363569e-05,1.476931632555217961e-04,-2.990238459425427486e-02,-3.146319698408358775e-03
+-9.918391655594147593e-01,2.417852738266126900e-02,1.414531156755700209e-02,4.498747952029019627e-05,1.575502569851042114e-04,-3.087781433265205169e-02,-3.301519649844882921e-03
+-9.915697836889719730e-01,2.500320401968175385e-02,1.452543513447553288e-02,4.739997925943132063e-05,1.678786655637114786e-04,-3.186730179389837131e-02,-3.461232837393989938e-03
+-9.912960465031916968e-01,2.584332042707298063e-02,1.490480170193564380e-02,4.987481517512545772e-05,1.786916814652482726e-04,-3.287073887616812073e-02,-3.625425135726688831e-03
+-9.910179552044214857e-01,2.669900269168625981e-02,1.528296098090464912e-02,5.240980950696569085e-05,1.900026487918755446e-04,-3.388801656411465774e-02,-3.794053848086727937e-03
+-9.907355110141340404e-01,2.757037993340019644e-02,1.565944753535259901e-02,5.500253119560607622e-05,2.018249558976194387e-04,-3.491902428432327316e-02,-3.967064124217310229e-03
+-9.904487151729213235e-01,2.845758481313346092e-02,1.603377954542316297e-02,5.765029135994474815e-05,2.141720256638691641e-04,-3.596364882393122270e-02,-4.144390203439442776e-03
+-9.901575689404893410e-01,2.936075240143082646e-02,1.640545798421597784e-02,6.035013667880650702e-05,2.270573072058961568e-04,-3.702177629742345322e-02,-4.325955880942583889e-03
+-9.898620735956522587e-01,3.028002132747548841e-02,1.677396783997493757e-02,6.309884500315441453e-05,2.404942665648487042e-04,-3.809328981028502203e-02,-4.511671138588709169e-03
+-9.895622304363274058e-01,3.121553265218334969e-02,1.713877617163659522e-02,6.589292072330536266e-05,2.544963773625200814e-04,-3.917807136099064891e-02,-4.701434713798947895e-03
+-9.892580407795287245e-01,3.216743030567546097e-02,1.749933243152796553e-02,6.872858969167958425e-05,2.690771117853014462e-04,-4.027600077904260056e-02,-4.895132419154998860e-03
+-9.889495059613619965e-01,3.313586056760429999e-02,1.785506790451459741e-02,7.160179499116875565e-05,2.842499310366469571e-04,-4.138695642522150636e-02,-5.092637121382568612e-03
+-9.886366273370181812e-01,3.412097222655550988e-02,1.820539574949833414e-02,7.450819304149697091e-05,3.000282761650310370e-04,-4.251081458424782467e-02,-5.293807594634122524e-03
+-9.883194062807676428e-01,3.512291629551924504e-02,1.854971002832950244e-02,7.744314996647098752e-05,3.164255580430944343e-04,-4.364744965377550384e-02,-5.498489277366367355e-03
+-9.879978441859542659e-01,3.614184551332111361e-02,1.888738593082298425e-02,8.040173800224135450e-05,3.334551483441465622e-04,-4.479673469667688757e-02,-5.706512792498483026e-03
+-9.876719424649894608e-01,3.717791490803266236e-02,1.921777889312571361e-02,8.337873268699655068e-05,3.511303692876851287e-04,-4.595853996223948912e-02,-5.917694549913188155e-03
+-9.873417025493456123e-01,3.823128025929557749e-02,1.954022474042269031e-02,8.636860978055597505e-05,3.694644843714076326e-04,-4.713273530503027386e-02,-6.131835436851433042e-03
+-9.870071258895497524e-01,3.930209930895558340e-02,1.985403891758314929e-02,8.936554336847572495e-05,3.884706887076839533e-04,-4.831918740683962404e-02,-6.348721228321403329e-03
+-9.866682139551777864e-01,4.039053053423431044e-02,2.015851625996231342e-02,9.236340306292175326e-05,4.081620986826525982e-04,-4.951776152299706968e-02,-6.568121995975817126e-03
+-9.863249682348473879e-01,4.149673320844026597e-02,2.045293097783184438e-02,9.535575306287639150e-05,4.285517424327936259e-04,-5.072832076664049450e-02,-6.789791129105464916e-03
+-9.859773902362116704e-01,4.262086668636293207e-02,2.073653565070117735e-02,9.833585019947824256e-05,4.496525501464932319e-04,-5.195072679667349369e-02,-7.013466226278887439e-03
+-9.856254814859526370e-01,4.376309075126367382e-02,2.100856140207872300e-02,1.012966432117172873e-04,4.714773441279366357e-04,-5.318483846316421348e-02,-7.238867789388727519e-03
+-9.852692435297746298e-01,4.492356428505680371e-02,2.126821710454293926e-02,1.042307712901618483e-04,4.940388290882507735e-04,-5.443051354250300761e-02,-7.465699753139687887e-03
+-9.849086779323972252e-01,4.610244605194276785e-02,2.151469050519261988e-02,1.071305652334384497e-04,5.173495824415531678e-04,-5.568760645346442634e-02,-7.693646432895476575e-03
+-9.845437862775482385e-01,4.729989314008424539e-02,2.174714525298038215e-02,1.099880467966151174e-04,5.414220441299077979e-04,-5.695597030588122472e-02,-7.922377177156257957e-03
+-9.841745701679575076e-01,4.851606131756559803e-02,2.196472337209056519e-02,1.127949288337432542e-04,5.662685074919658569e-04,-5.823545529939327947e-02,-8.151540831312831595e-03
+-9.838010312253490097e-01,4.975110385305477750e-02,2.216654265844925087e-02,1.155426168826498740e-04,5.919011093116681530e-04,-5.952590993791671836e-02,-8.380769744921780229e-03
+-9.834231710904342005e-01,5.100517187947736092e-02,2.235169824127722013e-02,1.182222094333607168e-04,6.183318206326557549e-04,-6.082717928596018120e-02,-8.609675995928515665e-03
+-9.830409914229045754e-01,5.227841300488857146e-02,2.251926102550367084e-02,1.208245008938678129e-04,6.455724365666243953e-04,-6.213910644418603468e-02,-8.837853491708298759e-03
+-9.826544939014246749e-01,5.357097112321640431e-02,2.266827797920013474e-02,1.233399823330610110e-04,6.736345679701807561e-04,-6.346153170581454728e-02,-9.064876641220818987e-03
+-9.822636802236245357e-01,5.488298595753000192e-02,2.279777159093675579e-02,1.257588447404162731e-04,7.025296313918743532e-04,-6.479429214296811290e-02,-9.290300616339612627e-03
+-9.818685521060921406e-01,5.621459217333476394e-02,2.290674010330361401e-02,1.280709811550743266e-04,7.322688403269344507e-04,-6.613722192553256329e-02,-9.513660192893862513e-03
+-9.814691112843658694e-01,5.756591844294607629e-02,2.299415670311923121e-02,1.302659909872192737e-04,7.628631963049282766e-04,-6.749015265590524448e-02,-9.734470585340051010e-03
+-9.810653595129272819e-01,5.893708763011993451e-02,2.305896934837438003e-02,1.323331820344478672e-04,7.943234805587595518e-04,-6.885291152543214499e-02,-9.952227127481397956e-03
+-9.806572985651929031e-01,6.032821571966719837e-02,2.310010114531297482e-02,1.342615758514484694e-04,8.266602435678495949e-04,-7.022532174395104687e-02,-1.016640396411116511e-02
+-9.802449302335066728e-01,6.173940954802516201e-02,2.311644945552231503e-02,1.360399111565009059e-04,8.598837987603961302e-04,-7.160720513567277179e-02,-1.037645516906132541e-02
+-9.798282563291319525e-01,6.317076880550044626e-02,2.310688670647256804e-02,1.376566502272675619e-04,8.940042137109045512e-04,-7.299837664172550977e-02,-1.058181271715767820e-02
+-9.794072786822439758e-01,6.462238291528132894e-02,2.307025786634851749e-02,1.390999820643740539e-04,9.290313007638682135e-04,-7.439864834299082796e-02,-1.078189076143932189e-02
+-9.789819991419210776e-01,6.609433074679510467e-02,2.300538386224007872e-02,1.403578284306360462e-04,9.649746111547599994e-04,-7.580782808085947955e-02,-1.097607882034450436e-02
+-9.785524195761373667e-01,6.758668044408429332e-02,2.291105860743943706e-02,1.414178514805443214e-04,1.001843426220722652e-03,-7.722571777294325035e-02,-1.116374700465235133e-02
+-9.781185418717538438e-01,6.909948785427987017e-02,2.278604980627060503e-02,1.422674579722290537e-04,1.039646749931121949e-03,-7.865211426253873594e-02,-1.134424422166426319e-02
+-9.776803679345105191e-01,7.063279522373049069e-02,2.262909912830214643e-02,1.428938069179949352e-04,1.078393301541915542e-03,-8.008680957353520036e-02,-1.151689762512471057e-02
+-9.772378996890179748e-01,7.218662947421108922e-02,2.243892178545597166e-02,1.432838162182558425e-04,1.118091511428223866e-03,-8.152959185848712709e-02,-1.168101325158865667e-02
+-9.767911390787489267e-01,7.376100442763727782e-02,2.221420710795192141e-02,1.434241704879421378e-04,1.158749512482745627e-03,-8.298023884772429137e-02,-1.183587485432350574e-02
+-9.763400880660293435e-01,7.535591588071266500e-02,2.195361845333989453e-02,1.433013299690825775e-04,1.200375132691042452e-03,-8.443852460799314286e-02,-1.198074406168983357e-02
+-9.758847486320304521e-01,7.697134275014592963e-02,2.165579147911947042e-02,1.429015374656361441e-04,1.242975891235105360e-03,-8.590421482076562865e-02,-1.211486368173496884e-02
+-9.754251227767596344e-01,7.860724442235214604e-02,2.131933656916533068e-02,1.422108252305346319e-04,1.286558991382327505e-03,-8.737706907819360602e-02,-1.223745330009645173e-02
+-9.749612125190515455e-01,8.026356008697661804e-02,2.094283866834539126e-02,1.412150275362695519e-04,1.331131317740626933e-03,-8.885683935951853296e-02,-1.234770982344519756e-02
+-9.744930198965594537e-01,8.194020859099702958e-02,2.052485480721886676e-02,1.398997867524000142e-04,1.376699429113059647e-03,-9.034326743085663303e-02,-1.244481244396368827e-02
+-9.740205469657464699e-01,8.363708544974660097e-02,2.006391856491121506e-02,1.382505638006889688e-04,1.423269553703729144e-03,-9.183608747463499478e-02,-1.252791468911570311e-02
+-9.735437958018759996e-01,8.535406135183004617e-02,1.955853645746043895e-02,1.362526500485540298e-04,1.470847587160727978e-03,-9.333502581654296026e-02,-1.259615170986597490e-02
+-9.730627684990029724e-01,8.709098342949093963e-02,1.900718984893332367e-02,1.338911742286550452e-04,1.519439087165323169e-03,-9.483979536564042523e-02,-1.264863731700711924e-02
+-9.725774671699647378e-01,8.884766982161737547e-02,1.840833527620330812e-02,1.311511150833555024e-04,1.569049269300737837e-03,-9.635010252911657691e-02,-1.268446409984810667e-02
+-9.720878939463715174e-01,9.062391145199816256e-02,1.776040437881249573e-02,1.280173109423428282e-04,1.619683007442122239e-03,-9.786564049721009628e-02,-1.270270439819044805e-02
+-9.715940509785973012e-01,9.241946953170139534e-02,1.706180469186589777e-02,1.244744718967820006e-04,1.671344826423061015e-03,-9.938609044442801854e-02,-1.270240977384311937e-02
+-9.710959404357700775e-01,9.423407292959511128e-02,1.631091962626158437e-02,1.205071903818965168e-04,1.724038903383682099e-03,-1.009111228778968999e-01,-1.268261212864679049e-02
+-9.705935645057625072e-01,9.606741817877316458e-02,1.550610904613833156e-02,1.160999531229603589e-04,1.777769063923689483e-03,-1.024403939441605182e-01,-1.264232383075991790e-02
+-9.700869253951824867e-01,9.791916647452671152e-02,1.464571021992967272e-02,1.112371523865701109e-04,1.832538782195425710e-03,-1.039735472789300313e-01,-1.258053727363914014e-02
+-9.695760253293631559e-01,9.978894291392484406e-02,1.372803780852394247e-02,1.059030994693818743e-04,1.888351178721697299e-03,-1.055102115572893462e-01,-1.249622637917423458e-02
+-9.690608665523531284e-01,1.016763341846702029e-01,1.275138469074016748e-02,1.000820347174405058e-04,1.945209021508443291e-03,-1.070500008569096884e-01,-1.238834667506926585e-02
+-9.685414513269068326e-01,1.035808876867649331e-01,1.171402366388016661e-02,9.375814330469044004e-05,2.003114723392627767e-03,-1.085925122421780992e-01,-1.225583388132284873e-02
+-9.680177819344745194e-01,1.055021077390924655e-01,1.061420562705819165e-02,8.691556507846687847e-05,2.062070345550963326e-03,-1.101373287240085819e-01,-1.209760924704114844e-02
+-9.674898606751921593e-01,1.074394569767094521e-01,9.450164064849865542e-03,7.953840909600595527e-05,2.122077595894832601e-03,-1.116840123996863388e-01,-1.191257323561555580e-02
+-9.669576898678714505e-01,1.093923507613987905e-01,8.220112202099063084e-03,7.161076690846525437e-05,2.183137831290245766e-03,-1.132321106086291024e-01,-1.169961311879048373e-02
+-9.664212718499893828e-01,1.113601584163542851e-01,6.922246903151404313e-03,6.311672452135370264e-05,2.245252059883943389e-03,-1.147811492120719284e-01,-1.145759809827150318e-02
+-9.658806089776785786e-01,1.133421994798880367e-01,5.554747975847653188e-03,5.404037755389454509e-05,2.308420942323753146e-03,-1.163306351351548473e-01,-1.118538322278752294e-02
+-9.653357036257157464e-01,1.153377429027850343e-01,4.115780712907721878e-03,4.436584457489788519e-05,2.372644793935488241e-03,-1.178800533169546000e-01,-1.088180742060824470e-02
+-9.647865581875129104e-01,1.173460030966886752e-01,2.603495148811581922e-03,3.407728165404866160e-05,2.437923589624730377e-03,-1.194288694967443193e-01,-1.054569788222177451e-02
+-9.642331750751055308e-01,1.193661412778955766e-01,1.016028433705744695e-03,2.315889341053508756e-05,2.504256966306355522e-03,-1.209765230082980314e-01,-1.017586879653580717e-02
+-9.636755567191421790e-01,1.213972601632023890e-01,-6.484930119898796698e-04,1.159494939926679154e-05,2.571644225380283980e-03,-1.225224319815429452e-01,-9.771120702497049573e-03
+-9.631137055688746562e-01,1.234384032188551583e-01,-2.391953288269149051e-03,-6.302012641761244584e-07,2.640084340023286878e-03,-1.240659899554875240e-01,-9.330243698597247975e-03
+-9.625476240921463367e-01,1.254885529162892421e-01,-4.216245394119134439e-03,-1.353211724974295525e-05,2.709575959627177726e-03,-1.256065643081415995e-01,-8.852018221497079051e-03
+-9.619773147753815090e-01,1.275466291455691903e-01,-6.123268264388156620e-03,-2.712624898507656217e-05,2.780117412647261331e-03,-1.271434943523253036e-01,-8.335213531932865114e-03
+-9.614027801235747184e-01,1.296114846954688482e-01,-8.114927300118137884e-03,-4.142792459845931366e-05,2.851706715108802762e-03,-1.286760949004074339e-01,-7.778592980152662130e-03
+-9.608240226602797751e-01,1.316819072844345739e-01,-1.019313024760963582e-02,-5.645233620511518784e-05,2.924341574734434530e-03,-1.302036475228771084e-01,-7.180910785710034779e-03
+-9.602410449275984305e-01,1.337566118423960948e-01,-1.235978731884243217e-02,-7.221452386967757429e-05,2.998019398566345497e-03,-1.317254100715871679e-01,-6.540917011420724000e-03
+-9.596538494861691637e-01,1.358342445918990504e-01,-1.461680784249417414e-02,-8.872936363592705798e-05,3.072737302248501699e-03,-1.332406040731584362e-01,-5.857356279759609770e-03
+-9.590624389151560791e-01,1.379133762017422105e-01,-1.696609807323058369e-02,-1.060115497297408531e-04,3.148492112326703625e-03,-1.347484226146740127e-01,-5.128968914681326582e-03
+-9.584668158122380266e-01,1.399925024658392470e-01,-1.940956017564130637e-02,-1.240755816739413746e-04,3.225280379625655638e-03,-1.362480241327871378e-01,-4.354494545278702819e-03
+-9.578669827935962777e-01,1.420700411888447534e-01,-2.194908857886574902e-02,-1.429357493718884755e-04,3.303098381938352444e-03,-1.377385333387027011e-01,-3.532671044498428194e-03
+-9.572629424939035347e-01,1.441443299290512958e-01,-2.458656819817440706e-02,-1.626061176436361874e-04,3.381942137252448317e-03,-1.392190405703763179e-01,-2.662237239004729659e-03
+-9.566546975663126062e-01,1.462136248531504534e-01,-2.732387226593441590e-02,-1.831005127333256918e-04,3.461807409095463573e-03,-1.406885991062374608e-01,-1.741935162457770771e-03
+-9.560422506824440836e-01,1.482760973585003916e-01,-3.016285836622525224e-02,-2.044325074092497865e-04,3.542689719005846088e-03,-1.421462267231041465e-01,-7.705092215160078530e-04
+-9.554256045323753499e-01,1.503298333013140808e-01,-3.310536684876352193e-02,-2.266154049264973327e-04,3.624584355234732828e-03,-1.435909024390203681e-01,2.532898912403119861e-04
+-9.548047618246282564e-01,1.523728315677661260e-01,-3.615321740697659963e-02,-2.496622262441052062e-04,3.707486382777594198e-03,-1.450215645725705382e-01,1.330704436400347544e-03
+-9.541797252861573542e-01,1.544030008141429322e-01,-3.930820585141819173e-02,-2.735856935320307042e-04,3.791390650588334459e-03,-1.464371123333620683e-01,2.462968206118040446e-03
+-9.535504976623380147e-01,1.564181573971507588e-01,-4.257210197906368537e-02,-2.983982176544526868e-04,3.876291808179497308e-03,-1.478364052936991802e-01,3.651302749502055838e-03
+-9.529170817169542174e-01,1.584160263243420408e-01,-4.594664448616789243e-02,-3.241118827155768222e-04,3.962184311518181581e-03,-1.492182573252384681e-01,4.896918839533842722e-03
+-9.522794802321865593e-01,1.603942359823657682e-01,-4.943353953968033371e-02,-3.507384312457669222e-04,4.049062436107707212e-03,-1.505814423456380835e-01,6.201010775439897912e-03
+-9.516376960085998205e-01,1.623503195695589207e-01,-5.303445585531232909e-02,-3.782892519422740869e-04,4.136920287968386643e-03,-1.519246876541001656e-01,7.564756927693293449e-03
+-9.509917318651310847e-01,1.642817112965618276e-01,-5.675102112127287241e-02,-4.067753626519535594e-04,4.225751814925627660e-03,-1.532466772533131183e-01,8.989317450385022820e-03
+-9.503415906390768608e-01,1.661857463236760812e-01,-6.058481937143549850e-02,-4.362073984047558882e-04,4.315550819011346184e-03,-1.545460483586932332e-01,1.047582990235714634e-02
+-9.496872751860808703e-01,1.680596586908466306e-01,-6.453738546512671759e-02,-4.665955969201369502e-04,4.406310968318266803e-03,-1.558213918831817535e-01,1.202540998128639495e-02
+-9.490287883801215019e-01,1.699005804237720962e-01,-6.861020215050452065e-02,-4.979497842739065861e-04,4.498025808610971697e-03,-1.570712509469393559e-01,1.363914710107308470e-02
+-9.483661331134992656e-01,1.717055397999081578e-01,-7.280469514227209948e-02,-5.302793610274272830e-04,4.590688777166633885e-03,-1.582941212114150897e-01,1.531810338006424008e-02
+-9.476993122968236927e-01,1.734714615535648263e-01,-7.712222959955844503e-02,-5.635932889002794955e-04,4.684293213353115520e-03,-1.594884478463890476e-01,1.706330969268635839e-02
+-9.470283288590011228e-01,1.751951643408979631e-01,-8.156410489632369420e-02,-5.979000771424593632e-04,4.778832372028940859e-03,-1.606526279235608590e-01,1.887576461287778298e-02
+-9.463531857472214925e-01,1.768733615431523076e-01,-8.613155106993695964e-02,-6.332077701327876174e-04,4.874299435454646640e-03,-1.617850068602053326e-01,2.075642990380140163e-02
+-9.456738859269452346e-01,1.785026590285739845e-01,-9.082572241842234995e-02,-6.695239333718990774e-04,4.970687526335774986e-03,-1.628838808957835671e-01,2.270623104978902740e-02
+-9.449904323818909546e-01,1.800795559238147769e-01,-9.564769444282092414e-02,-7.068556397992764324e-04,5.067989720775119451e-03,-1.639474942775719923e-01,2.472605121640881337e-02
+-9.443028281140214419e-01,1.816004433158170950e-01,-1.005984574000055171e-01,-7.452094572049589374e-04,5.166199061390032606e-03,-1.649740407005163556e-01,2.681673127113141236e-02
+-9.436110761435310135e-01,1.830616055690400534e-01,-1.056789130145340122e-01,-7.845914356259237445e-04,5.265308568672509050e-03,-1.659616602392730011e-01,2.897906359207263333e-02
+-9.429151795088319687e-01,1.844592186917951016e-01,-1.108898668635011492e-01,-8.250070949270178495e-04,5.365311253056664377e-03,-1.669084422319580818e-01,3.121379370803492592e-02
+-9.422151412665417114e-01,1.857893512129650260e-01,-1.162320258442573473e-01,-8.664614107825388545e-04,5.466200128416539607e-03,-1.678124238985028160e-01,3.352161213444132537e-02
+-9.415109644914688714e-01,1.870479639106026049e-01,-1.217059905163423300e-01,-9.089588059684429932e-04,5.567968226324858556e-03,-1.686715915767269536e-01,3.590315555558232230e-02
+-9.408026522765996491e-01,1.882309126808823785e-01,-1.273122501573889231e-01,-9.525031333841775614e-04,5.670608605855296212e-03,-1.694838765585883755e-01,3.835900266383377583e-02
+-9.400902077330849371e-01,1.893339462526960060e-01,-1.330511779679014184e-01,-9.970976668310861830e-04,5.774114365283190310e-03,-1.702471611030131893e-01,4.088966947683841252e-02
+-9.393736339902262200e-01,1.903527097984835936e-01,-1.389230248969411385e-01,-1.042745089333524389e-03,5.878478655947371422e-03,-1.709592739290833885e-01,4.349560701044936223e-02
+-9.386529341954613637e-01,1.912827444572770541e-01,-1.449279130617271982e-01,-1.089447481479418082e-03,5.983694691510803819e-03,-1.716179939337661164e-01,4.617719949682961428e-02
+-9.379281115143518477e-01,1.921194899207331053e-01,-1.510658312509766399e-01,-1.137206308974161558e-03,6.089755761933746646e-03,-1.722210487502347787e-01,4.893475849465249439e-02
+-9.371991691305681105e-01,1.928582857000026574e-01,-1.573366271452593113e-01,-1.186022412844509781e-03,6.196655242354378451e-03,-1.727661163873714101e-01,5.176852297057897567e-02
+-9.364661102458753383e-01,1.934943733890863715e-01,-1.637400029042138105e-01,-1.235895994645939923e-03,6.304386605997050648e-03,-1.732508256325145102e-01,5.467865290171525922e-02
+-9.357289380801203649e-01,1.940228992309550038e-01,-1.702755088078475587e-01,-1.286826610672445583e-03,6.412943432248294391e-03,-1.736727565416646413e-01,5.766522637975392968e-02
+-9.349876558712167940e-01,1.944389157010743552e-01,-1.769425362060048357e-01,-1.338813157959254079e-03,6.522319419144288234e-03,-1.740294434508982413e-01,6.072823788657272237e-02
+-9.342422668751307890e-01,1.947373860032365067e-01,-1.837403119854928712e-01,-1.391853865346994986e-03,6.632508392169446723e-03,-1.743183732291485877e-01,6.386759362061553147e-02
+-9.334927743658669730e-01,1.949131858030098696e-01,-1.906678923010986237e-01,-1.445946281242605630e-03,6.743504313143705402e-03,-1.745369894351761764e-01,6.708310812414666757e-02
+-9.327391816354543286e-01,1.949611078703568923e-01,-1.977241563377720168e-01,-1.501087266725395367e-03,6.855301290103832726e-03,-1.746826917669493939e-01,7.037450079803946046e-02
+-9.319814919939308773e-01,1.948758654746728936e-01,-2.049077994618526966e-01,-1.557272981786356810e-03,6.967893583633121175e-03,-1.747528386191859739e-01,7.374139353524562790e-02
+-9.312197087693302455e-01,1.946520961984959386e-01,-2.122173285303016188e-01,-1.614498880207301161e-03,7.081275616292521079e-03,-1.747447496459680061e-01,7.718330430991547642e-02
+-9.304538353076660107e-01,1.942843666748714504e-01,-2.196510527020491543e-01,-1.672759696185111304e-03,7.195441980170671527e-03,-1.746557074020257116e-01,8.069964923218483910e-02
+-9.296838749729178231e-01,1.937671778090696850e-01,-2.272070807178978857e-01,-1.732049436041577239e-03,7.310387444373840464e-03,-1.744829589061654695e-01,8.428973253873639493e-02
+-9.289098311470160851e-01,1.930949701565493892e-01,-2.348833122443878818e-01,-1.792361371159763642e-03,7.426106958611002236e-03,-1.742237177606281162e-01,8.795274781112018270e-02
+-9.281317072298272963e-01,1.922621285872382624e-01,-2.426774324638932279e-01,-1.853688027347430130e-03,7.542595660317084026e-03,-1.738751684752829763e-01,9.168777324070617740e-02
+-9.273495066391393982e-01,1.912629890920836129e-01,-2.505869061969957512e-01,-1.916021175976444311e-03,7.659848878755373507e-03,-1.734344676582890987e-01,9.549376794058297979e-02
+-9.265632328106458981e-01,1.900918441714042895e-01,-2.586089724498749076e-01,-1.979351828644914692e-03,7.777862141109427065e-03,-1.728987487644521870e-01,9.936956766932451834e-02
+-9.257728891979321029e-01,1.887429513468196207e-01,-2.667406370241863778e-01,-2.043670227606438940e-03,7.896631173554602423e-03,-1.722651218995861211e-01,1.033138844366232323e-01
+-9.249784792724585758e-01,1.872105374296359204e-01,-2.749786679675104706e-01,-2.108965837971860276e-03,8.016151905378252410e-03,-1.715306825065588126e-01,1.073253010736729823e-01
+-9.241800065235469264e-01,1.854888088482680997e-01,-2.833195889654090416e-01,-2.175227338539278705e-03,8.136420472657520872e-03,-1.706925096269123199e-01,1.114022699975212305e-01
+-9.233774744583637117e-01,1.835719581371324793e-01,-2.917596758792107559e-01,-2.242442617016243093e-03,8.257433215827923029e-03,-1.697476723050866110e-01,1.155431064566103533e-01
+-9.225708866019060039e-01,1.814541722668228663e-01,-3.002949491858878384e-01,-2.310598763721740442e-03,8.379186681834476305e-03,-1.686932334931813271e-01,1.197459898862039546e-01
+-9.217602464969846254e-01,1.791296412936855265e-01,-3.089211704775539635e-01,-2.379682064163948178e-03,8.501677623943609471e-03,-1.675262543152449823e-01,1.240089581262052154e-01
+-9.209455577042096053e-01,1.765925679090185607e-01,-3.176338363761705974e-01,-2.449677993139766537e-03,8.624902999731324030e-03,-1.662437976067385192e-01,1.283299070094761074e-01
+-9.201268238019744139e-01,1.738371755370201888e-01,-3.264281749652989673e-01,-2.520571208504236108e-03,8.748859970855744211e-03,-1.648429351382936503e-01,1.327065858056715175e-01
+-9.193040483864395318e-01,1.708577205937095445e-01,-3.352991399905030923e-01,-2.592345544274002409e-03,8.873545899565767406e-03,-1.633207479998534384e-01,1.371365974762065176e-01
+-9.184772350715175726e-01,1.676485006205602202e-01,-3.442414099566178942e-01,-2.664984007298136070e-03,8.998958342382273184e-03,-1.616743356785107777e-01,1.416173904286272822e-01
+-9.176463874888565186e-01,1.642038659294889069e-01,-3.532493809575694144e-01,-2.738468777060978909e-03,9.125095047914423899e-03,-1.599008195053389370e-01,1.461462627582885421e-01
+-9.168115092878250660e-01,1.605182302367740843e-01,-3.623171640406794292e-01,-2.812781193956283635e-03,9.251953950107954378e-03,-1.579973489420164190e-01,1.507203587795932220e-01
+-9.159726041354947501e-01,1.565860828660111947e-01,-3.714385833209493670e-01,-2.887901759035163291e-03,9.379533161149825049e-03,-1.559611058581482634e-01,1.553366649998065285e-01
+-9.151296757166257345e-01,1.524019989188800661e-01,-3.806071722727104034e-01,-2.963810129396427483e-03,9.507830964675893087e-03,-1.537893135370998199e-01,1.599920104168069401e-01
+-9.142827277336491587e-01,1.479606539122732267e-01,-3.898161720010296372e-01,-3.040485116968172450e-03,9.636845807242604145e-03,-1.514792382081188715e-01,1.646830640700649162e-01
+-9.134317639066518169e-01,1.432568332536452438e-01,-3.990585287597890884e-01,-3.117904683776362689e-03,9.766576287816774657e-03,-1.490282011370191173e-01,1.694063350284857605e-01
+-9.125767879733592824e-01,1.382854487689762768e-01,-4.083268938626608002e-01,-3.196045943552851525e-03,9.897021149975987128e-03,-1.464335783493253829e-01,1.741581689759427176e-01
+-9.117178036891198101e-01,1.330415490573529369e-01,-4.176136201353647937e-01,-3.274885157035880776e-03,1.002817926321624385e-02,-1.436928127575598790e-01,1.789347524262900058e-01
+-9.108548148268875710e-01,1.275203337879254606e-01,-4.269107647871199784e-01,-3.354397731561421531e-03,1.016004961907617833e-02,-1.408034196947035299e-01,1.837321061072461348e-01
+-9.099878251772063331e-01,1.217171686332258612e-01,-4.362100871225241816e-01,-3.434558223170946015e-03,1.029263131066709969e-02,-1.377629920183931767e-01,1.885460892838678459e-01
+-9.091168385481928071e-01,1.156275975006962609e-01,-4.455030507126322425e-01,-3.515340336791752517e-03,1.042592352380280855e-02,-1.345692110068386349e-01,1.933723970466771913e-01
+-9.082418587655194386e-01,1.092473596647220158e-01,-4.547808217789031660e-01,-3.596716924228671695e-03,1.055992551679786617e-02,-1.312198487503824229e-01,1.982065660863299994e-01
+-9.073628896723984205e-01,1.025724014467600947e-01,-4.640342758072383833e-01,-3.678659985727711617e-03,1.069463660674466188e-02,-1.277127814037881393e-01,2.030439664823544543e-01
+-9.064799351295638186e-01,9.559889455895401178e-02,-4.732539953439169889e-01,-3.761140677543506165e-03,1.083005614984260644e-02,-1.240459905334104912e-01,2.078798115299818616e-01
+-9.055929990152560283e-01,8.832324791479873038e-02,-4.824302764749165973e-01,-3.844129308111431716e-03,1.096618352199770635e-02,-1.202175772490920308e-01,2.127091528477396132e-01
+-9.047020852252026790e-01,8.074212506276119694e-02,-4.915531302984154083e-01,-3.927595349442463551e-03,1.110301810319961648e-02,-1.162257662708316569e-01,2.175268864920962830e-01
+-9.038071976726038681e-01,7.285246006511471306e-02,-5.006122881643578770e-01,-4.011507431579057308e-03,1.124055925222976031e-02,-1.120689133269130722e-01,2.223277536833408174e-01
+-9.029083402881132869e-01,6.465147035582076429e-02,-5.095972086054116890e-01,-4.095833357886129698e-03,1.137880628738096624e-02,-1.077455185668855647e-01,2.271063400887987638e-01
+-9.020055170198213457e-01,5.613667537047584144e-02,-5.184970802698641856e-01,-4.180540106038459214e-03,1.151775847068124692e-02,-1.032542294325582144e-01,2.318570843829403649e-01
+-9.010987318332384088e-01,4.730591396793326631e-02,-5.273008308016615686e-01,-4.265593836603725347e-03,1.165741497108667771e-02,-9.859384600140155830e-02,2.365742774874354526e-01
+-9.001879887112764766e-01,3.815735397259163025e-02,-5.359971336118987129e-01,-4.350959898791680568e-03,1.179777484774401260e-02,-9.376334123225094075e-02,2.412520676431678113e-01
+-8.992732916542326427e-01,2.868951536371498398e-02,-5.445744168294223142e-01,-4.436602844584945811e-03,1.193883702713077061e-02,-8.876185566929647996e-02,2.458844633198631335e-01
+-8.983546446797705531e-01,1.890128088682855451e-02,-5.530208705700540017e-01,-4.522486432295488509e-03,1.208060026972740050e-02,-8.358871568979382716e-02,2.504653412441356641e-01
+-8.974320518229038646e-01,8.791914373197384086e-03,-5.613244593559856277e-01,-4.608573640799310672e-03,1.222306315339839224e-02,-7.824343716905968193e-02,2.549884467756986517e-01
+-8.965055171359771480e-01,-1.638922798471393726e-03,-5.694729313455330066e-01,-4.694826679289238608e-03,1.236622403543406572e-02,-7.272573240440764752e-02,2.594474023047920186e-01
+-8.955750446886495686e-01,-1.239115928307870859e-02,-5.774538314922215143e-01,-4.781207006085214789e-03,1.251008103114650480e-02,-6.703552429678778790e-02,2.638357103543755300e-01
+-8.946406385678755679e-01,-2.346429431701339136e-02,-5.852545107396379676e-01,-4.867675334745366102e-03,1.265463198234218975e-02,-6.117294692091271996e-02,2.681467662155948006e-01
+-8.937023028778884326e-01,-3.485738642752045369e-02,-5.928621445970475579e-01,-4.954191653630683582e-03,1.279987442756869848e-02,-5.513836086838582590e-02,2.723738551182620093e-01
+-8.927600417401808652e-01,-4.656903463223596024e-02,-6.002637425549116612e-01,-5.040715242599817271e-03,1.294580557321806796e-02,-4.893235387096749739e-02,2.765101687196920288e-01
+-8.918138592934876652e-01,-5.859736750749862388e-02,-6.074461665802743937e-01,-5.127204688296340719e-03,1.309242225745382154e-02,-4.255575542659431609e-02,2.805488067257361218e-01
+-8.908637596937675207e-01,-7.094002796125911670e-02,-6.143961451359509773e-01,-5.213617902737309635e-03,1.323972093047570166e-02,-3.600964250471282851e-02,2.844827890710450724e-01
+-8.899097471141842464e-01,-8.359415639588563263e-02,-6.211002925570126010e-01,-5.299912145298656292e-03,1.338769761313280543e-02,-2.929534124747842222e-02,2.883050602240953553e-01
+-8.889518257450892413e-01,-9.655638132008446950e-02,-6.275451244309839804e-01,-5.386044043041715579e-03,1.353634786485697315e-02,-2.241444153137139156e-02,2.920085031530553765e-01
+-8.879899997940023937e-01,-1.098228043352850319e-01,-6.337170784731525064e-01,-5.471969614326008605e-03,1.368566675242535897e-02,-1.536879975052591642e-02,2.955859451475560951e-01
+-8.870242734855938727e-01,-1.233889901429867098e-01,-6.396025314826730979e-01,-5.557644286887658200e-03,1.383564881938684904e-02,-8.160549657936298773e-03,2.990301731341628022e-01
+-8.860546510616654770e-01,-1.372499519472402119e-01,-6.451878257928546079e-01,-5.643022930527581346e-03,1.398628805435148809e-02,-7.921030880959171192e-04,3.023339333143670871e-01
+-8.850811367811323160e-01,-1.514001420853510471e-01,-6.504592825677335544e-01,-5.728059883390757868e-03,1.413757785208917886e-02,6.733841131703820551e-03,3.054899575620711460e-01
+-8.841037349200038253e-01,-1.658334419376389091e-01,-6.554032309613219143e-01,-5.812708971058678543e-03,1.428951098302943569e-02,1.441429136712410984e-02,3.084909621400429014e-01
+-8.831224497713646704e-01,-1.805431526797863506e-01,-6.600060282370917575e-01,-5.896923546854922552e-03,1.444207955980715552e-02,2.224595766003291050e-02,3.113296653855469054e-01
+-8.821372856453569833e-01,-1.955219863087063747e-01,-6.642540844697107927e-01,-5.980656517425071532e-03,1.459527500616319452e-02,3.022524770899175442e-02,3.139987987741189035e-01
+-8.811482468691598235e-01,-2.107620576254061462e-01,-6.681338859282089926e-01,-6.063860374913013739e-03,1.474908802083750928e-02,3.834826319844745340e-02,3.164911223684933606e-01
+-8.801553377869717476e-01,-2.262548784175912875e-01,-6.716320235718300502e-01,-6.146487229605496899e-03,1.490350854538802748e-02,4.661079427930787250e-02,3.187994325288991959e-01
+-8.791585627599906028e-01,-2.419913517407753234e-01,-6.747352159926299775e-01,-6.228488848475890148e-03,1.505852573286066594e-02,5.500831627298097964e-02,3.209165818103280610e-01
+-8.781579261663950975e-01,-2.579617667170737705e-01,-6.774303398068931559e-01,-6.309816687996699751e-03,1.521412791737892199e-02,6.353598794407708250e-02,3.228354865866152057e-01
+-8.771534324013248174e-01,-2.741557940252501435e-01,-6.797044537480029236e-01,-6.390421937803749015e-03,1.537030257618527025e-02,7.218865114614517187e-02,3.245491480880584034e-01
+-8.761450858768620176e-01,-2.905624871989522240e-01,-6.815448299037527580e-01,-6.470255551299648571e-03,1.552703630197150610e-02,8.096082237389171099e-02,3.260506615489636717e-01
+-8.751328910220110835e-01,-3.071702776351515896e-01,-6.829389817778329652e-01,-6.549268299262913512e-03,1.568431477652696901e-02,8.984669903103077726e-02,3.273332327092682847e-01
+-8.741168522826797682e-01,-3.239669767783736054e-01,-6.838746911749289392e-01,-6.627410800561461986e-03,1.584212273322579967e-02,9.884015528790891791e-02,3.283901978063777927e-01
+-8.730969741216594304e-01,-3.409397779889623559e-01,-6.843400420379195381e-01,-6.704633569286138836e-03,1.600044393634734302e-02,1.079347416949416327e-01,3.292150316925539499e-01
+-8.720732610186056055e-01,-3.580752571451557564e-01,-6.843234491240344219e-01,-6.780887064700561080e-03,1.615926114677194297e-02,1.171236904480671137e-01,3.298013666583395898e-01
+-8.710457174700181326e-01,-3.753593797692912348e-01,-6.838136870282343072e-01,-6.856121736787647997e-03,1.631855609665647938e-02,1.263999118098000918e-01,3.301430114933339155e-01
+-8.700143479892213927e-01,-3.927775048878344166e-01,-6.827999227576524444e-01,-6.930288067788545336e-03,1.647830946365921520e-02,1.357560001473826772e-01,3.302339646281327590e-01
+-8.689791571063447684e-01,-4.103143946017844268e-01,-6.812717474978408072e-01,-7.003336629521221372e-03,1.663850085012998081e-02,1.451842328565029761e-01,3.300684293740484554e-01
+-8.679401493683025492e-01,-4.279542190663344070e-01,-6.792192062554747167e-01,-7.075218130797322931e-03,1.679910875839571785e-02,1.546765815779193587e-01,3.296408335613300045e-01
+-8.668973293387736145e-01,-4.456805702939587999e-01,-6.766328303462065819e-01,-7.145883470622175354e-03,1.696011056364686923e-02,1.642247106414666258e-01,3.289458440863133881e-01
+-8.658507015981822263e-01,-4.634764727832346720e-01,-6.735036688434207219e-01,-7.215283790217001496e-03,1.712148249966935834e-02,1.738199853117377292e-01,3.279783834693743017e-01
+-8.648002707436770464e-01,-4.813243976409127267e-01,-6.698233195933296358e-01,-7.283370531979805641e-03,1.728319963682318355e-02,1.834534774044441208e-01,3.267336470999037124e-01
+-8.637460413891117073e-01,-4.992062781411992223e-01,-6.655839593188326297e-01,-7.350095486495815920e-03,1.744523587002529477e-02,1.931159721901186677e-01,3.252071218679576936e-01
+-8.626880181650236068e-01,-5.171035250336512012e-01,-6.607783784846456010e-01,-7.415410856873969232e-03,1.760756390017588144e-02,2.027979798041064374e-01,3.233945953438847054e-01
+-8.616262057186147016e-01,-5.349970462044365549e-01,-6.554000076785487794e-01,-7.479269318127655490e-03,1.777015521628060382e-02,2.124897425717015842e-01,3.212921801315878345e-01
+-8.605606087137299687e-01,-5.528672695233899370e-01,-6.494429498905217590e-01,-7.541624071548763686e-03,1.793298008960327733e-02,2.221812404703638055e-01,3.188963262142982535e-01
+-8.594912318308379762e-01,-5.706941618588499132e-01,-6.429020095224730058e-01,-7.602428903864518438e-03,1.809600757052858694e-02,2.318622079092272081e-01,3.162038382606248699e-01
+-8.584180797670095675e-01,-5.884572532911368503e-01,-6.357727237135454290e-01,-7.661638251377497939e-03,1.825920547278998829e-02,2.415221449032168566e-01,3.132118871933267656e-01
+-8.573411572358974331e-01,-6.061356638991538537e-01,-6.280513871506836487e-01,-7.719207261620519150e-03,1.842254037230403596e-02,2.511503275631696841e-01,3.099180323470460685e-01
+-8.562604689677154601e-01,-6.237081301048829074e-01,-6.197350829172788567e-01,-7.775091848043348686e-03,1.858597760219883252e-02,2.607358235413216319e-01,3.063202304439084922e-01
+-8.551760197092181937e-01,-6.411530349451791677e-01,-6.108217097838628540e-01,-7.829248766065666221e-03,1.874948125662488294e-02,2.702675042127267169e-01,3.024168491419343963e-01
+-8.540878142236794091e-01,-6.584484370279904963e-01,-6.013100029861132567e-01,-7.881635667143531074e-03,1.891301419061047762e-02,2.797340633826466694e-01,2.982066904557959530e-01
+-8.529958572908716841e-01,-6.755721055654412277e-01,-5.911995646472667509e-01,-7.932211156335028032e-03,1.907653802860077505e-02,2.891240286052846886e-01,2.936889935129112228e-01
+-8.519001537070450825e-01,-6.925015511963173998e-01,-5.804908858858679466e-01,-7.980934869071020302e-03,1.924001317339306058e-02,2.984257843348062100e-01,2.888634500744834410e-01
+-8.508007082849067260e-01,-7.092140632898102925e-01,-5.691853674684226316e-01,-8.027767530532985488e-03,1.940339881065949665e-02,3.076275867792879759e-01,2.837302197156937322e-01
+-8.496975258535985898e-01,-7.256867474396775375e-01,-5.572853410876268399e-01,-8.072671017526462933e-03,1.956665292557853431e-02,3.167175821555582305e-01,2.782899405198006604e-01
+-8.485906112586771854e-01,-7.418965642965752449e-01,-5.447940905733493766e-01,-8.115608421428914185e-03,1.972973232153786560e-02,3.256838260814969299e-01,2.725437363427984927e-01
+-8.474799693620920227e-01,-7.578203680497427319e-01,-5.317158677452513693e-01,-8.156544120339951492e-03,1.989259263819209353e-02,3.345143071446772809e-01,2.664932303290475923e-01
+-8.463656050421637378e-01,-7.734349499869201949e-01,-5.180559068545700363e-01,-8.195443825078824879e-03,2.005518837043798741e-02,3.431969643264827230e-01,2.601405570386526866e-01
+-8.452475231935638877e-01,-7.887170807361874036e-01,-5.038204465667958099e-01,-8.232274657410000931e-03,2.021747290000576458e-02,3.517197101166243955e-01,2.534883561685913334e-01
+-8.441257287272921905e-01,-8.036435534911906542e-01,-4.890167323085324314e-01,-8.267005215490761533e-03,2.037939851826127066e-02,3.600704548301454921e-01,2.465397985953597659e-01
+-8.430002265706553199e-01,-8.181912311569096152e-01,-4.736530318084679059e-01,-8.299605616392103855e-03,2.054091646077817190e-02,3.682371264310440040e-01,2.392985829602000403e-01
+-8.418710216672458113e-01,-8.323370899123723188e-01,-4.577386445005093840e-01,-8.330047567982598600e-03,2.070197693775357659e-02,3.762076997283484991e-01,2.317689388283442342e-01
+-8.407381189769198571e-01,-8.460582711088474595e-01,-4.412839070369448113e-01,-8.358304433580973636e-03,2.086252917146996094e-02,3.839702123779023535e-01,2.239556319996521272e-01
+-8.396015234757753243e-01,-8.593321259608118767e-01,-4.243001933726903707e-01,-8.384351284966813991e-03,2.102252144706586584e-02,3.915127965995208181e-01,2.158639746569905038e-01
+-8.384612401561303274e-01,-8.721362643129949577e-01,-4.067999215176897887e-01,-8.408164950586719671e-03,2.118190113970208621e-02,3.988237052037980068e-01,2.074998174809089546e-01
+-8.373172740265014680e-01,-8.844486095692961625e-01,-3.887965513012069385e-01,-8.429724087205634625e-03,2.134061477409905302e-02,4.058913277554735410e-01,1.988695529082594171e-01
+-8.361696301115808527e-01,-8.962474409583441881e-01,-3.703045789083202477e-01,-8.449009222008313155e-03,2.149860806943872105e-02,4.127042318935673637e-01,1.899801185676395776e-01
+-8.350183134522153328e-01,-9.075114523919968201e-01,-3.513395348492067138e-01,-8.466002810085755670e-03,2.165582598278698381e-02,4.192511746625624536e-01,1.808389882496316547e-01
+-8.338633291053832997e-01,-9.182198001729144909e-01,-3.319179721342602196e-01,-8.480689283680733007e-03,2.181221278425300378e-02,4.255211355712635313e-01,1.714541750733536429e-01
+-8.327046821441730362e-01,-9.283521529771564529e-01,-3.120574573284637943e-01,-8.493055096281421426e-03,2.196771210061302521e-02,4.315033458898989149e-01,1.618342229622768136e-01
+-8.315423776577602899e-01,-9.378887458045690462e-01,-2.917765570622485871e-01,-8.503088770798737997e-03,2.212226698186292931e-02,4.371873106847850021e-01,1.519882002383713127e-01
+-8.303764207513859574e-01,-9.468104294023325096e-01,-2.710948213769805992e-01,-8.510780946613821091e-03,2.227581995911694090e-02,4.425628391799191763e-01,1.419256926593806534e-01
+-8.292068165463334362e-01,-9.550987244607508364e-01,-2.500327629345306391e-01,-8.516124413647198008e-03,2.242831311954535806e-02,4.476200656622482676e-01,1.316567976451799571e-01
+-8.280335701799065307e-01,-9.627358678949333726e-01,-2.286118383274572119e-01,-8.519114150759093951e-03,2.257968817081184784e-02,4.523494843598187543e-01,1.211921079445384547e-01
+-8.268566868054064711e-01,-9.697048682307862766e-01,-2.068544211252372200e-01,-8.519747364767663381e-03,2.272988651452391776e-02,4.567419660191611719e-01,1.105427041406049499e-01
+-8.256761715921098199e-01,-9.759895499087677395e-01,-1.847837756612068782e-01,-8.518023518576155542e-03,2.287884932314212388e-02,4.607887936588033195e-01,9.972013909576143975e-02
+-8.244920297252450458e-01,-9.815746057227390242e-01,-1.624240263900425807e-01,-8.513944363895338194e-03,2.302651760726632732e-02,4.644816810890148306e-01,8.873642403592450767e-02
+-8.233042664059700977e-01,-9.864456435883066643e-01,-1.398001246803742548e-01,-8.507513962593388213e-03,2.317283230432328964e-02,4.678127997002138327e-01,7.760401280516163181e-02
+-8.221128868513501997e-01,-9.905892326477692134e-01,-1.169378136400601176e-01,-8.498738711041470967e-03,2.331773435528809724e-02,4.707748037454725920e-01,6.633578322025734941e-02
+-8.209178962943334268e-01,-9.939929481189155869e-01,-9.386358851680356996e-02,-8.487627353013938586e-03,2.346116478839956401e-02,4.733608548400110028e-01,5.494502020491219552e-02
+-8.197192999837294991e-01,-9.966454157437840689e-01,-7.060465896637618810e-02,-8.474191000052895897e-03,2.360306479862665091e-02,4.755646436833757540e-01,4.344538900138882748e-02
+-8.185171031841850242e-01,-9.985363545287795350e-01,-4.718890076770562020e-02,-8.458443144696422764e-03,2.374337584134108420e-02,4.773804110686625246e-01,3.185092186763630040e-02
+-8.173113111761618477e-01,-9.996566147687609982e-01,-2.364481208040133606e-02,-8.440399655697332706e-03,2.388203971497237024e-02,4.788029734310821350e-01,2.017599009895832199e-02
+-8.161019292559127392e-01,-9.999982193062991742e-01,-1.466039290599369866e-06,-8.420078792841236351e-03,2.401899865184375579e-02,4.798277376438284669e-01,8.435276927030565391e-03
+-8.148889627354590770e-01,-9.995543962513298020e-01,2.371154456375888092e-02,-8.397501200793602155e-03,2.415419540930800746e-02,4.804507267605278020e-01,-3.356240544277015302e-03
+-8.136724169425666453e-01,-9.983196150547294367e-01,4.746415724904606837e-02,-8.372689896163805490e-03,2.428757335147952584e-02,4.806685940321133166e-01,-1.518332501564592998e-02
+-8.124522972207226523e-01,-9.962896188071073134e-01,7.122589471773602132e-02,-8.345670271890976225e-03,2.441907655196347629e-02,4.804786382005866163e-01,-2.703050882584450906e-02
+-8.112286089291126379e-01,-9.934614494060887147e-01,9.496592788279527297e-02,-8.316470076905021699e-03,2.454864987898703471e-02,4.798788260095348535e-01,-3.888211356019877218e-02
+-8.100013574425958263e-01,-9.898334778659937383e-01,1.186531310210999363e-01,-8.285119387250773626e-03,2.467623908583995579e-02,4.788677985748612809e-01,-5.072228922318531424e-02
+-8.087705481516828110e-01,-9.854054256514822585e-01,1.422561435594618950e-01,-8.251650597306810914e-03,2.480179091197395880e-02,4.774448877582532225e-01,-6.253504545243683543e-02
+-8.075361864625111297e-01,-9.801783833987534056e-01,1.657434361665048872e-01,-8.216098387902692410e-03,2.492525316883444780e-02,4.756101302960473176e-01,-7.430427880522021133e-02
+-8.062982777968216164e-01,-9.741548293789535906e-01,1.890833743452035309e-01,-8.178499689492493147e-03,2.504657482338549027e-02,4.733642749693253715e-01,-8.601380852102358254e-02
+-8.050568275919344208e-01,-9.673386471025832156e-01,2.122442840384957952e-01,-8.138893647267815210e-03,2.516570610375541001e-02,4.707087836612453557e-01,-9.764741192149845039e-02
+-8.038118413007259155e-01,-9.597351278046067913e-01,2.351945206786512643e-01,-8.097321580595971680e-03,2.528259858850418276e-02,4.676458527770980811e-01,-1.091888563668418077e-01
+-8.025633243916036053e-01,-9.513509834312565694e-01,2.579025364651632013e-01,-8.053826933576292701e-03,2.539720527671632602e-02,4.641784067462442764e-01,-1.206219371289684317e-01
+-8.013112823484830338e-01,-9.421943504027250338e-01,2.803369496760872437e-01,-8.008455233013614741e-03,2.550948069762547579e-02,4.603101003727200213e-01,-1.319305136106802834e-01
+-8.000557206707632485e-01,-9.322747849223692596e-01,3.024666178508710335e-01,-7.961254028209775735e-03,2.561938098477681627e-02,4.560453283938359359e-01,-1.430985400098599425e-01
+-7.987966448733025970e-01,-9.216032638714933833e-01,3.242607060362152072e-01,-7.912272822614571655e-03,2.572686395868314588e-02,4.513892157527723548e-01,-1.541101064744835325e-01
+-7.975340604863950800e-01,-9.101921758420347119e-01,3.456887550058900804e-01,-7.861563027153692720e-03,2.583188921692561660e-02,4.463476174058076396e-01,-1.649494810549443358e-01
+-7.962679730557449265e-01,-8.980553080845486980e-01,3.667207608159198662e-01,-7.809177885438944866e-03,2.593441821039297809e-02,4.409271167140625391e-01,-1.756011306523950211e-01
+-7.949983881424437238e-01,-8.852078336014195248e-01,3.873272365397030237e-01,-7.755172388405916217e-03,2.603441431547148799e-02,4.351350144010134868e-01,-1.860497751194815919e-01
+-7.937253113229446599e-01,-8.716662917786611731e-01,4.074792878552943587e-01,-7.699603225296869878e-03,2.613184291855217417e-02,4.289793204050404429e-01,-1.962804146074282330e-01
+-7.924487481890386542e-01,-8.574485634725578231e-01,4.271486850555861192e-01,-7.642528671438883654e-03,2.622667147867107501e-02,4.224687468810291446e-01,-2.062783623108120912e-01
+-7.911687043478299319e-01,-8.425738475618385070e-01,4.463079239806131415e-01,-7.584008527499387642e-03,2.631886959933626285e-02,4.156126892098043757e-01,-2.160292961387449417e-01
+-7.898851854217111557e-01,-8.270626274063678229e-01,4.649303052593858343e-01,-7.524104025708921312e-03,2.640840909494108005e-02,4.084212167250323433e-01,-2.255192736717469282e-01
+-7.885981970483383341e-01,-8.109366374922751808e-01,4.829899924492839225e-01,-7.462877728020170763e-03,2.649526404601267732e-02,4.009050539513179245e-01,-2.347347837593070641e-01
+-7.873077448806070633e-01,-7.942188264660438035e-01,5.004620811190607643e-01,-7.400393449800464138e-03,2.657941085902535208e-02,3.930755595763983923e-01,-2.436627741297733019e-01
+-7.860138345866267695e-01,-7.769333145544807806e-01,5.173226650920056402e-01,-7.336716146316401774e-03,2.666082832735822833e-02,3.849447070490913947e-01,-2.522906803153059929e-01
+-7.847164718496960623e-01,-7.591053455196633548e-01,5.335488928309204404e-01,-7.271911812255111955e-03,2.673949767066938210e-02,3.765250666930395340e-01,-2.606064685145582649e-01
+-7.834156623682784204e-01,-7.407612417717903686e-01,5.491190306257212050e-01,-7.206047394214433813e-03,2.681540257907138319e-02,3.678297733725227059e-01,-2.685986606230384988e-01
+-7.821114118559759909e-01,-7.219283491834027622e-01,5.640125220333074285e-01,-7.139190664045851412e-03,2.688852926080429959e-02,3.588725051788717590e-01,-2.762563606906400793e-01
+-7.808037260415053860e-01,-7.026349817441026602e-01,5.782100363019198053e-01,-7.071410121613113005e-03,2.695886647099205238e-02,3.496674543982792316e-01,-2.835692959212295361e-01
+-7.794926106686721479e-01,-6.829103627028314527e-01,5.916935274067446349e-01,-7.002774887157044680e-03,2.702640554564814113e-02,3.402292973522117459e-01,-2.905278311650468237e-01
+-7.781780714963458800e-01,-6.627845615807120838e-01,6.044462799538522502e-01,-6.933354573806806836e-03,2.709114042177472592e-02,3.305731646047801453e-01,-2.971230009372392566e-01
+-7.768601142984342678e-01,-6.422884315394463695e-01,6.164529528762044963e-01,-6.863219185351859992e-03,2.715306766317940323e-02,3.207146034974340809e-01,-3.033465380139672707e-01
+-7.755387448638585424e-01,-6.214535375051262189e-01,6.276996270030669178e-01,-6.792439002001836139e-03,2.721218647478502659e-02,3.106695514117127632e-01,-3.091908867559007290e-01
+-7.742139689965273908e-01,-6.003120886504943465e-01,6.381738401779909475e-01,-6.721084447642771374e-03,2.726849870067497156e-02,3.004542946853520635e-01,-3.146492313425279796e-01
+-7.728857925153117536e-01,-5.788968667469464036e-01,6.478646211874246053e-01,-6.649226000512563350e-03,2.732200884221631584e-02,2.900854295580010400e-01,-3.197155173987281107e-01
+-7.715542212540194011e-01,-5.572411477977813332e-01,6.567625295523561446e-01,-6.576934050019884494e-03,2.737272405307820608e-02,2.795798304876417895e-01,-3.243844534143167091e-01
+-7.702192610613689538e-01,-5.353786273944495244e-01,6.648596702057392172e-01,-6.504278787173008690e-03,2.742065412765387969e-02,2.689546067730948509e-01,-3.286515496908071765e-01
+-7.688809178009646805e-01,-5.133433413857972916e-01,6.721497286618957068e-01,-6.431330097806424277e-03,2.746581149046703821e-02,2.582270637899141130e-01,-3.325131089848102350e-01
+-7.675391973512698529e-01,-4.911695863335392986e-01,6.786279834136289546e-01,-6.358157421352697222e-03,2.750821117650602970e-02,2.474146610136816471e-01,-3.359662496819701416e-01
+-7.661941056055823207e-01,-4.688918379865649744e-01,6.842913210356236231e-01,-6.284829653310818227e-03,2.754787081128936763e-02,2.365349707267427048e-01,-3.390089134751358979e-01
+-7.648456484720069781e-01,-4.465446672637797265e-01,6.891382474905013433e-01,-6.211415027834569363e-03,2.758481057387776256e-02,2.256056389661932615e-01,-3.416398695224526216e-01
+-7.634938318734313389e-01,-4.241626613392558975e-01,6.931688957561268349e-01,-6.137980997549735002e-03,2.761905316518279210e-02,2.146443350929592986e-01,-3.438587146804559236e-01
+-7.621386617474984471e-01,-4.017803357835629252e-01,6.963850223129088857e-01,-6.064594118521959148e-03,2.765062377572207603e-02,2.036687169503928474e-01,-3.456658836573325289e-01
+-7.607801440465815634e-01,-3.794320517036286478e-01,6.987900070978687594e-01,-5.991319951271143612e-03,2.767955002273815926e-02,1.926963861149845192e-01,-3.470626415924710217e-01
+-7.594182847377570766e-01,-3.571519346519789750e-01,7.003888447030763853e-01,-5.918222939415006614e-03,2.770586190806827165e-02,1.817448399479514309e-01,-3.480510819133973976e-01
+-7.580530898027795228e-01,-3.349737893525306376e-01,7.011881303946849719e-01,-5.845366307902067834e-03,2.772959176485137109e-02,1.708314322699681087e-01,-3.486341227391104858e-01
+-7.566845652380541631e-01,-3.129310157089180144e-01,7.011960415000257907e-01,-5.772811965856647354e-03,2.775077419183224689e-02,1.599733330759091354e-01,-3.488155008196570650e-01
+-7.553127170546113378e-01,-2.910565292961397321e-01,7.004223198209906798e-01,-5.700620404719067280e-03,2.776944597903576589e-02,1.491874819547505671e-01,-3.485997524604378950e-01
+-7.539375512780798205e-01,-2.693826805836360139e-01,6.988782402163556950e-01,-5.628850588030471017e-03,2.778564604908619756e-02,1.384905466656307371e-01,-3.479922093893880741e-01
+-7.525590739486605063e-01,-2.479411737840396535e-01,6.965765804768047076e-01,-5.557559875883788351e-03,2.779941537087613629e-02,1.278988858551477203e-01,-3.469989813522384448e-01
+-7.511772911210995440e-01,-2.267629920311740599e-01,6.935315860892934392e-01,-5.486803926281188203e-03,2.781079688417757712e-02,1.174285040017026965e-01,-3.456269376272700367e-01
+-7.497922088646621352e-01,-2.058783199156153099e-01,6.897589299072457436e-01,-5.416636618885399372e-03,2.781983541118822692e-02,1.070950160543692048e-01,-3.438836876557266331e-01
+-7.484038332631057777e-01,-1.853164721568988926e-01,6.852756688224277415e-01,-5.347109960022280485e-03,2.782657757342120106e-02,9.691360593882354657e-02,-3.417775571268127366e-01
+-7.470121704146531760e-01,-1.651058196776931108e-01,6.801001917379916328e-01,-5.278274021294224530e-03,2.783107168870911294e-02,8.689899592517534577e-02,-3.393175705151845745e-01
+-7.456172264319661513e-01,-1.452737271291073273e-01,6.742521729417820797e-01,-5.210176858074487723e-03,2.783336768007393935e-02,7.706540111559335993e-02,-3.365134139849599304e-01
+-7.442190074421178858e-01,-1.258464834071363614e-01,6.677525086880787830e-01,-5.142864445773684054e-03,2.783351698221509657e-02,6.742650424296788014e-02,-3.333754206586261315e-01
+-7.428175195865669433e-01,-1.068492416992091465e-01,6.606232635049233970e-01,-5.076380627898165014e-03,2.783157243082131152e-02,5.799542032512963741e-02,-3.299145289576139173e-01
+-7.414127690211299582e-01,-8.830596126520727507e-02,6.528876039462200298e-01,-5.010767044849161883e-03,2.782758815621156831e-02,4.878466632080938337e-02,-3.261422562539655901e-01
+-7.400047619159539902e-01,-7.023935471348484449e-02,6.445697316253072851e-01,-4.946063096890338309e-03,2.782161948337621288e-02,3.980612923707145534e-02,-3.220706661368086254e-01
+-7.385935044554904350e-01,-5.267083306238425772e-02,6.356948166073680406e-01,-4.882305889657919361e-03,2.781372281601822427e-02,3.107104761234672635e-02,-3.177123278038884724e-01
+-7.371790028384670457e-01,-3.562046377515961509e-02,6.262889202896364527e-01,-4.819530198439218004e-03,2.780395551815164484e-02,2.258997833435950897e-02,-3.130802885223619714e-01
+-7.357612632778612882e-01,-1.910692780869006208e-02,6.163789248036775081e-01,-4.757768445864084261e-03,2.779237581405380811e-02,1.437277536742847563e-02,-3.081880278098838022e-01
+-7.343402920008724744e-01,-3.147479710642392490e-03,6.059924557868318074e-01,-4.697050655196744438e-03,2.777904267545845685e-02,6.428573187171862137e-03,-3.030494186137129842e-01
+-7.329160952488951164e-01,1.224208748497369013e-02,5.951577985374429947e-01,-4.637404445242219314e-03,2.776401568645253670e-02,-1.234228035393559802e-03,-2.976786961002072518e-01
+-7.314886792774907276e-01,2.704746324801108118e-02,5.839038247674296356e-01,-4.578855012268512326e-03,2.774735494003562758e-02,-8.607992684808545014e-03,-2.920904024410702626e-01
+-7.300580503563610657e-01,4.125585287109085481e-02,5.722599040738809428e-01,-4.521425108438349369e-03,2.772912092986262733e-02,-1.568584556708883804e-02,-2.862993567975579134e-01
+-7.286242147693199334e-01,5.485599059511848258e-02,5.602558227028922033e-01,-4.465135058342446610e-03,2.770937441445018787e-02,-2.246169417416360101e-02,-2.803206088733053436e-01
+-7.271871788142661996e-01,6.783815546134439045e-02,5.479217009014223372e-01,-4.410002749990632638e-03,2.768817632385999275e-02,-2.893023422077886653e-02,-2.741693930642224442e-01
+-7.257469488031560445e-01,8.019418268895495139e-02,5.352879070694330776e-01,-4.356043643945717476e-03,2.766558763133467047e-02,-3.508695185152480023e-02,-2.678610872750115113e-01
+-7.243035310619742040e-01,9.191746655497841367e-02,5.223849717416452210e-01,-4.303270785885921108e-03,2.764166924078128446e-02,-4.092813000348782121e-02,-2.614111714459028679e-01
+-7.228569319307079910e-01,1.030029592388042314e-01,5.092435035211884120e-01,-4.251694840677505347e-03,2.761648187511055869e-02,-4.645085054686542980e-02,-2.548351825379747693e-01
+-7.214071577633178745e-01,1.134471674338558972e-01,4.958941077766920724e-01,-4.201324098476962032e-03,2.759008597200805074e-02,-5.165298886013654223e-02,-2.481486651823076039e-01
+-7.199542149277103897e-01,1.232481411894914697e-01,4.823672955534962781e-01,-4.152164517016200540e-03,2.756254156420617774e-02,-5.653321130589015336e-02,-2.413671422471771744e-01
+-7.184981098057098281e-01,1.324054581953006127e-01,4.686934081335284441e-01,-4.104219766113880764e-03,2.753390818546122901e-02,-6.109096971560481343e-02,-2.345060583505959029e-01
+-7.170388487930305921e-01,1.409202060967279513e-01,4.549025314377802554e-01,-4.057491254948318408e-03,2.750424475706279154e-02,-6.532648820677826318e-02,-2.275807454127465046e-01
+-7.155764382992484407e-01,1.487949548928474974e-01,4.410244169684522375e-01,-4.011978192739126120e-03,2.747360949606144145e-02,-6.924075746137529908e-02,-2.206063796361217300e-01
+-7.141108847477731780e-01,1.560337323670046572e-01,4.270884049159409113e-01,-3.967677629463899590e-03,2.744205981226447572e-02,-7.283551245215971748e-02,-2.135979387945444452e-01
+-7.126421945758196763e-01,1.626419861259636723e-01,4.131233447953304716e-01,-3.924584521895233685e-03,2.740965221263463869e-02,-7.611322472541401374e-02,-2.065701691822568742e-01
+-7.111703742343798984e-01,1.686265487535627272e-01,3.991575262230190879e-01,-3.882691795130953014e-03,2.737644222019664300e-02,-7.907707864709018208e-02,-1.995375392151380878e-01
+-7.096954301881950311e-01,1.739955952408191175e-01,3.852186048874153279e-01,-3.841990399337128891e-03,2.734248428243290369e-02,-8.173095233062353804e-02,-1.925142083203967847e-01
+-7.082173689157260643e-01,1.787585979075866638e-01,3.713335353350998846e-01,-3.802469388590626418e-03,2.730783169251114334e-02,-8.407939390390442580e-02,-1.855139899086110655e-01
+-7.067361969091264795e-01,1.829262774252065349e-01,3.575285060198299947e-01,-3.764115988336481401e-03,2.727253650483707145e-02,-8.612759619596485616e-02,-1.785503175178511093e-01
+-7.052519206742123847e-01,1.865105481776033747e-01,3.438288767163226178e-01,-3.726915681487570717e-03,2.723664946502215439e-02,-8.788137365623702180e-02,-1.716362144596568773e-01
+-7.037645467304356472e-01,1.895244627627028500e-01,3.302591231905295799e-01,-3.690852281239814965e-03,2.720021994940279367e-02,-8.934713304362938702e-02,-1.647842582794263877e-01
+-7.022740816108536954e-01,1.919821550007946853e-01,3.168427802898884371e-01,-3.655908010301331217e-03,2.716329589819729481e-02,-9.053183964422104046e-02,-1.580065570473310754e-01
+-7.007805318621018742e-01,1.938987766782999111e-01,3.036023909795180220e-01,-3.622063593171327613e-03,2.712592374671244505e-02,-9.144298856238372575e-02,-1.513147237758842578e-01
+-6.992839040443636911e-01,1.952904310365432827e-01,2.905594617360880982e-01,-3.589298350576005606e-03,2.708814838254304525e-02,-9.208857606502596216e-02,-1.447198485584015315e-01
+-6.977842047313435048e-01,1.961741092944723763e-01,2.777344215865885091e-01,-3.557590271320531811e-03,2.705001309998088591e-02,-9.247705983191734092e-02,-1.382324738382690599e-01
+-6.962814405102357718e-01,1.965676209697732324e-01,2.651465781948085798e-01,-3.526916117053039736e-03,2.701155954933568207e-02,-9.261732609167422359e-02,-1.318625854564202948e-01
+-6.947756179816976241e-01,1.964895228869645016e-01,2.528140919240359219e-01,-3.497251518766437625e-03,2.697282770177799466e-02,-9.251865515534336837e-02,-1.256195804701483731e-01
+-6.932667437598194482e-01,1.959590472986015575e-01,2.407539418559273003e-01,-3.468571052061568752e-03,2.693385582265364966e-02,-9.219068523939223658e-02,-1.195122607165396977e-01
+-6.917548244720952422e-01,1.949960317545167410e-01,2.289819013746396403e-01,-3.440848352130282221e-03,2.689468043967604971e-02,-9.164337033633222773e-02,-1.135488192433143084e-01
+-6.902398667593944159e-01,1.936208414566090164e-01,2.175125210648420626e-01,-3.414056196972621755e-03,2.685533631706737978e-02,-9.088695020497709887e-02,-1.077368239200351963e-01
+-6.887218772759319263e-01,1.918542973573130472e-01,2.063591091435490465e-01,-3.388166609169910443e-03,2.681585645676485760e-02,-8.993190832763416076e-02,-1.020832166112086159e-01
+-6.872008626892394112e-01,1.897176042552653208e-01,1.955337259669575056e-01,-3.363150947727518072e-03,2.677627206565127635e-02,-8.878892931445850345e-02,-9.659429689339345260e-02
+-6.856768296801356577e-01,1.872322698688696496e-01,1.850471722691645615e-01,-3.338979993913167646e-03,2.673661256194744679e-02,-8.746887336656589129e-02,-9.127572840796964460e-02
+-6.841497849426976252e-01,1.844200378698228338e-01,1.749089903540149982e-01,-3.315624062403336059e-03,2.669690558114824058e-02,-8.598272539897473810e-02,-8.613253171247574336e-02
+-6.826197351842306915e-01,1.813028108350228373e-01,1.651274667345295488e-01,-3.293053077292097414e-03,2.665717696652062166e-02,-8.434156458304610526e-02,-8.116908505946678065e-02
+-6.810866871252394539e-01,1.779025794655771653e-01,1.557096374299454800e-01,-3.271236676444729506e-03,2.661745078773698064e-02,-8.255652432124410689e-02,-7.638913064532043551e-02
+-6.795506474993976420e-01,1.742413501869868875e-01,1.466613023211462419e-01,-3.250144289103983758e-03,2.657774934751414020e-02,-8.063875808954272528e-02,-7.179577409540581490e-02
+-6.780116230535196964e-01,1.703410753249816334e-01,1.379870369781843464e-01,-3.229745229360961659e-03,2.653809321597068449e-02,-7.859940391167750950e-02,-6.739149863525456530e-02
+-6.764696205475297930e-01,1.662235885405092262e-01,1.296902149800724380e-01,-3.210008779157171380e-03,2.649850124057492873e-02,-7.644954303939840989e-02,-6.317816921993704704e-02
+-6.749246467544329775e-01,1.619105342894126276e-01,1.217730289855175618e-01,-3.190904277387936888e-03,2.645899057439870042e-02,-7.420017448237899160e-02,-5.915704828358387540e-02
+-6.733767084602854114e-01,1.574233070068953855e-01,1.142365219083737032e-01,-3.172401187880392708e-03,2.641957671535904301e-02,-7.186217650041178129e-02,-5.532880143060043415e-02
+-6.718258124641639517e-01,1.527829890549135261e-01,1.070806108930882211e-01,-3.154469182640496066e-03,2.638027353301749048e-02,-6.944627613148955658e-02,-5.169352509106162757e-02
+-6.702719655781371744e-01,1.480102901034561491e-01,1.003041276846739494e-01,-3.137078224244179391e-03,2.634109331738328924e-02,-6.696302223348438165e-02,-4.825075144500409058e-02
+-6.687151746272345099e-01,1.431254951759408089e-01,9.390485227602068941e-02,-3.120198622434741144e-03,2.630204681937981653e-02,-6.442274893380921241e-02,-4.499947347607797382e-02
+-6.671554464494171555e-01,1.381484079991691372e-01,8.787955346059045381e-02,-3.103801113417002135e-03,2.626314328628293754e-02,-6.183555495024538329e-02,-4.193816409081640995e-02
+-6.655927878955469890e-01,1.330983017105574207e-01,8.222403328245440901e-02,-3.087856915264634416e-03,2.622439052790784958e-02,-5.921127655908076798e-02,-3.906479440935520936e-02
+-6.640272058293580359e-01,1.279938744114658689e-01,7.693316897951003874e-02,-3.072337788634773442e-03,2.618579495688872980e-02,-5.655945925403549346e-02,-3.637686268057802735e-02
+-6.624587071274248284e-01,1.228532035482302887e-01,7.200096306936602797e-02,-3.057216105368988895e-03,2.614736164625600101e-02,-5.388933967454870300e-02,-3.387141328192740108e-02
+-6.608872986791325399e-01,1.176937078158692140e-01,6.742059322040604152e-02,-3.042464879558641564e-03,2.610909438138877062e-02,-5.120982175197223707e-02,-3.154506088731093283e-02
+-6.593129873866474533e-01,1.125321081249939514e-01,6.318445802225176877e-02,-3.028057841158979507e-03,2.607099572871179188e-02,-4.852946314439030018e-02,-2.939402649138554008e-02
+-6.577357801648857638e-01,1.073843998409026584e-01,5.928424009318283144e-02,-3.013969463571485909e-03,2.603306709196527569e-02,-4.585644923410021889e-02,-2.741414448934284542e-02
+-6.561556839414840470e-01,1.022658202381259152e-01,5.571094731799999811e-02,-3.000174999962548512e-03,2.599530876505401489e-02,-4.319858491660429067e-02,-2.560091371318088532e-02
+-6.545727056567677282e-01,9.719082422837055368e-02,5.245497703178002985e-02,-2.986650545946731381e-03,2.595772000321387718e-02,-4.056327977696453507e-02,-2.394950778379722686e-02
+-6.529868522637218842e-01,9.217306289686569287e-02,4.950616815033663520e-02,-2.973373042471577112e-03,2.592029908829365098e-02,-3.795753693598230560e-02,-2.245480979129482052e-02
+-6.513981307279594901e-01,8.722536770603617207e-02,4.685385669308213957e-02,-2.960320322446355866e-03,2.588304338910908664e-02,-3.538794020065745560e-02,-2.111144173680542385e-02
+-6.498065480276917771e-01,8.235973484139155754e-02,4.448693288429941134e-02,-2.947471126402195253e-03,2.584594941887769295e-02,-3.286064982977668358e-02,-1.991379153830862350e-02
+-6.482121111536969238e-01,7.758731351047033764e-02,4.239389706232301464e-02,-2.934805129957455353e-03,2.580901291048386115e-02,-3.038139968090144624e-02,-1.885604220672307263e-02
+-6.466148271092899691e-01,7.291840244741586152e-02,4.056291600658139263e-02,-2.922302950757471336e-03,2.577222888692961775e-02,-2.795548763742331660e-02,-1.793219959938922164e-02
+-6.450147029102913931e-01,6.836244685261344545e-02,3.898187701353662826e-02,-2.909946168607296154e-03,2.573559170664590362e-02,-2.558777371935026801e-02,-1.713612319559228159e-02
+-6.434117455849966971e-01,6.392803487398902862e-02,3.763844385652581614e-02,-2.897717330594044502e-03,2.569909514113333238e-02,-2.328268735003788392e-02,-1.646155160184211222e-02
+-6.418059621741456500e-01,5.962290670690204020e-02,3.652010931084499756e-02,-2.885599960205487147e-03,2.566273243425658268e-02,-2.104421902201297420e-02,-1.590213223938376219e-02
+-6.401973597308906472e-01,5.545395626836639030e-02,3.561424951636656122e-02,-2.873578553048075981e-03,2.562649636549465398e-02,-1.887593367538131089e-02,-1.545144483349214851e-02
+-6.385859453207668457e-01,5.142724759885528019e-02,3.490817193588176270e-02,-2.861638577841228107e-03,2.559037931202729627e-02,-1.678096394984303005e-02,-1.510303368693612612e-02
+-6.369717260216599675e-01,4.754802234989394250e-02,3.438916858480099703e-02,-2.849766476439122200e-03,2.555437329445338357e-02,-1.476202730947405103e-02,-1.485042640344283287e-02
+-6.353547089237763235e-01,4.382071815136303478e-02,3.404456223327029935e-02,-2.837949638800151431e-03,2.551847005602222995e-02,-1.282142957104300679e-02,-1.468716170773649854e-02
+-6.337349011296107282e-01,4.024898776509656939e-02,3.386175017538205906e-02,-2.826176410563400673e-03,2.548266110600980877e-02,-1.096107338156827350e-02,-1.460681736368392783e-02
+-6.321123097539158575e-01,3.683572009905188016e-02,3.382825339461298375e-02,-2.814436068907207218e-03,2.544693777942267504e-02,-9.182469230892590165e-03,-1.460302316018684092e-02
+-6.304869419236707184e-01,3.358306363798366917e-02,3.393175294023522554e-02,-2.802718804439102186e-03,2.541129127887441566e-02,-7.486747537257574560e-03,-1.466949274392109412e-02
+-6.288588047780496737e-01,3.049245041243193316e-02,3.416013396352111930e-02,-2.791015706186839897e-03,2.537571273987440298e-02,-5.874674893066487817e-03,-1.480003567349226154e-02
+-6.272279054683905786e-01,2.756462798707633166e-02,3.450151918418981750e-02,-2.779318732658436764e-03,2.534019326891079338e-02,-4.346659995629798337e-03,-1.498858362572705501e-02
+-6.255942511581639165e-01,2.479968459788478921e-02,3.494430591305963674e-02,-2.767620699869058690e-03,2.530472398472308898e-02,-2.902776686910316095e-03,-1.522920405429985247e-02
+-6.239578490229409358e-01,2.219708147965914966e-02,3.547719857179074815e-02,-2.755915241117005542e-03,2.526929607311050388e-02,-1.542777307717351767e-03,-1.551611623250364128e-02
+-6.223187062503623412e-01,1.975568636718682061e-02,3.608923583856238348e-02,-2.744196784953132403e-03,2.523390082556976888e-02,-2.661071770657027222e-04,-1.584371117052880279e-02
+-6.206768300401065419e-01,1.747380741129597390e-02,3.676982001407474304e-02,-2.732460528665675830e-03,2.519852966898432356e-02,9.280788544413811876e-04,-1.620656106000881863e-02
+-6.190322276038583427e-01,1.534922549322720667e-02,3.750873993007908247e-02,-2.720702399633232946e-03,2.516317420326842288e-02,2.040894469287074173e-03,-1.659943444382997249e-02
+-6.173849061652770809e-01,1.337923111978578210e-02,3.829619224195058430e-02,-2.708919035156665510e-03,2.512782624108345422e-02,3.073706628332282739e-03,-1.701730799499778443e-02
+-6.157348729599646520e-01,1.156066050312136093e-02,3.912280317700501941e-02,-2.697107734095277858e-03,2.509247783543560109e-02,4.028117343786092805e-03,-1.745537101578691769e-02
+-6.140821352354338680e-01,9.889933199067904579e-03,3.997963910991855080e-02,-2.685266433058401160e-03,2.505712130004001184e-02,4.905947388081659602e-03,-1.790904448844354152e-02
+-6.124267002510773716e-01,8.363084450072890211e-03,4.085822685626309958e-02,-2.673393671660079154e-03,2.502174923719755958e-02,5.709209534741101037e-03,-1.837397556721623651e-02
+-6.107685752781343291e-01,6.975806187521307704e-03,4.175055873450683047e-02,-2.661488547908472981e-03,2.498635457579412300e-02,6.440097931423397273e-03,-1.884605438204890043e-02
+-6.091077675996596774e-01,5.723483670905219096e-03,4.264910457403827543e-02,-2.649550697513764711e-03,2.495093056373796220e-02,7.100969608218449941e-03,-1.932141169068531811e-02
+-6.074442845104918165e-01,4.601227135298481888e-03,4.354681712344893318e-02,-2.637580240704193169e-03,2.491547080191240313e-02,7.694317347482755240e-03,-1.979642292392066669e-02
+-6.057781333172199689e-01,3.603912785999560057e-03,4.443713258986420334e-02,-2.625577757892326621e-03,2.487996925711704471e-02,8.222761186883933981e-03,-2.026771555152483140e-02
+-6.041093213381530935e-01,2.726214579258466088e-03,4.531397587469875032e-02,-2.613544258143210997e-03,2.484442026905164802e-02,8.689024120513420477e-03,-2.073216203025267032e-02
+-6.024378559032870228e-01,1.962639535768446065e-03,4.617175748604115254e-02,-2.601481123807784443e-03,2.480881856567072538e-02,9.095916244125624994e-03,-2.118688296503280083e-02
+-6.007637443542724887e-01,1.307561720300303180e-03,4.700536748712400709e-02,-2.589390102281666651e-03,2.477315927167274279e-02,9.446318543434743809e-03,-2.162925067314642666e-02
+-5.990869940443830366e-01,7.552554442169575211e-04,4.781017751912940150e-02,-2.577273249037169509e-03,2.473743790167466997e-02,9.743167669407221121e-03,-2.205687271203571678e-02
+-5.974076123384820525e-01,2.999231315027374185e-04,4.858202283604640126e-02,-2.565132907242950819e-03,2.470165037195332092e-02,9.989433428049331043e-03,-2.246760826075557688e-02
+-5.957256066129916761e-01,-6.427155132781462788e-05,4.931720550897786270e-02,-2.552971672693799769e-03,2.466579300677646663e-02,1.018810911163661563e-02,-2.285954047256263943e-02
+-5.940409842558590503e-01,-3.431749976001758251e-04,5.001247086985122908e-02,-2.540792353389330870e-03,2.462986253099290479e-02,1.034219671591553140e-02,-2.323099496673301095e-02
+-5.923537526665245689e-01,-5.426176586107509100e-04,5.066500692980918652e-02,-2.528597959578081540e-03,2.459385605960601531e-02,1.045469049924867451e-02,-2.358051151476174903e-02
+-5.906639192558894580e-01,-6.683893695264464703e-04,5.127242189645571380e-02,-2.516391641272881785e-03,2.455777111236506771e-02,1.052856162177438770e-02,-2.390685338048116962e-02
+-5.889714914462828022e-01,-7.262101414760933470e-04,5.183273362763451964e-02,-2.504176693396206223e-03,2.452160558822697248e-02,1.056675500202554525e-02,-2.420899119003639549e-02
+-5.872764766714295703e-01,-7.217157869476879470e-04,5.234435117362176010e-02,-2.491956504133989794e-03,2.448535776963621513e-02,1.057216219823429830e-02,-2.448609887318986197e-02
+-5.855788823764169759e-01,-6.604273799885975512e-04,5.280605981139950555e-02,-2.479734548261748432e-03,2.444902631541495328e-02,1.054762853175448059e-02,-2.473754062132353659e-02
+-5.838787160176629465e-01,-5.477393905434893616e-04,5.321700018838271956e-02,-2.467514341745764579e-03,2.441261023085142950e-02,1.049592919782391109e-02,-2.496286678105473153e-02
+-5.821759850628824839e-01,-3.888998951934138134e-04,5.357665213004861315e-02,-2.455299442230360601e-03,2.437610888170111192e-02,1.041976427955797722e-02,-2.516179921839498124e-02
+-5.804706969910552461e-01,-1.889928369934419986e-04,5.388481339683730276e-02,-2.443093397788720714e-03,2.433952197049724098e-02,1.032175398592195641e-02,-2.533422468840746539e-02
+-5.787628592923925730e-01,4.707442154708450148e-05,5.414157901162785363e-02,-2.430899761925054119e-03,2.430284951604670327e-02,1.020442802819770435e-02,-2.548018598445855876e-02
+-5.770524794683048464e-01,3.145813647269638837e-04,5.434732509310442883e-02,-2.418722040616493561e-03,2.426609185973861368e-02,1.007021494566030795e-02,-2.559986379528656972e-02
+-5.753395650313679610e-01,6.090133462899773161e-04,5.450268173343846695e-02,-2.406563699736586834e-03,2.422924963048167166e-02,9.921451839513571042e-03,-2.569357855751535843e-02
+-5.736241235052911280e-01,9.260581214787934985e-04,5.460851940009370936e-02,-2.394428134654316438e-03,2.419232373133502201e-02,9.760355860990140367e-03,-2.576176669106269509e-02
+-5.719061624248831244e-01,1.261625307763847986e-03,5.466592214817111300e-02,-2.382318664208531080e-03,2.415531534990304416e-02,9.589042528159189005e-03,-2.580498158092749475e-02
+-5.701856893360195411e-01,1.611846023227287176e-03,5.467617216456008289e-02,-2.370238511178958309e-03,2.411822590127931515e-02,9.409510608992650849e-03,-2.582387358975640063e-02
+-5.684627117956096987e-01,1.973078447836932973e-03,5.464072482123254193e-02,-2.358190798916675029e-03,2.408105704627799479e-02,9.223641723522574623e-03,-2.581918839268257035e-02
+-5.667372373715630074e-01,2.341911817801750704e-03,5.456119389866213865e-02,-2.346178533239211012e-03,2.404381065032335993e-02,9.033200486559153988e-03,-2.579174700306954837e-02
+-5.650092736427564377e-01,2.715165483496650555e-03,5.443932705717952070e-02,-2.334204594388020453e-03,2.400648879560712476e-02,8.839828724527317577e-03,-2.574244506126715029e-02
+-5.632788281990006585e-01,3.089896141730149280e-03,5.427698988728479890e-02,-2.322271732319586366e-03,2.396909373476417693e-02,8.645058188662315540e-03,-2.567223721722430851e-02
+-5.615459086410068412e-01,3.463386485155130635e-03,5.407614565191294631e-02,-2.310382565471137076e-03,2.393162788311797504e-02,8.450291424525708389e-03,-2.558213095483371016e-02
+-5.598105225803533536e-01,3.833150343692720563e-03,5.383884092179484077e-02,-2.298539559356182517e-03,2.389409382727966311e-02,8.256816383796248410e-03,-2.547317089846240848e-02
+-5.580726776394524524e-01,4.196930458061158206e-03,5.356718079476888855e-02,-2.286745040068785713e-03,2.385649427880612294e-02,8.065809852932152163e-03,-2.534644433134180391e-02
+-5.563323814515162002e-01,4.552688143617756623e-03,5.326332349421427498e-02,-2.275001189471043041e-03,2.381883206553081500e-02,7.878328116292047587e-03,-2.520305219269726796e-02
+-5.545896416605240464e-01,4.898602860597077273e-03,5.292945492612097236e-02,-2.263310020689348820e-03,2.378111013203164023e-02,7.695318436281278068e-03,-2.504411958202757554e-02
+-5.528444659211878554e-01,5.233065575622272356e-03,5.256777835574306251e-02,-2.251673413692571561e-03,2.374333150720712993e-02,7.517620934175859096e-03,-2.487077994088283950e-02
+-5.510968618989195988e-01,5.554670845672405634e-03,5.218050127922887332e-02,-2.240093088265568758e-03,2.370549929528068672e-02,7.345969916399078953e-03,-2.468416660198278698e-02
+-5.493468372697968283e-01,5.862208335713316350e-03,5.176982061651235617e-02,-2.228570619669732224e-03,2.366761666617754611e-02,7.180995777702954899e-03,-2.448540965694624358e-02
+-5.475943997205290348e-01,6.154657157659637091e-03,5.133791252060780630e-02,-2.217107423140975229e-03,2.362968685295031845e-02,7.023233632301628844e-03,-2.427562638640784823e-02
+-5.458395569484247867e-01,6.431178150593401532e-03,5.088691702371794107e-02,-2.205704782163818694e-03,2.359171311671602600e-02,6.873129324022036156e-03,-2.405592362017465458e-02
+-5.440823166613565354e-01,6.691100319241367021e-03,5.041893665377097933e-02,-2.194363829985069344e-03,2.355369874482660109e-02,6.731035527745586611e-03,-2.382737616795381139e-02
+-5.423226865777274197e-01,6.933915076953753830e-03,4.993601636902791213e-02,-2.183085555413988212e-03,2.351564704977263109e-02,6.597223338210456318e-03,-2.359104254480984264e-02
+-5.405606744264380703e-01,7.159266665517124983e-03,4.944014448235803383e-02,-2.171870821568732107e-03,2.347756134546651746e-02,6.471887359107594308e-03,-2.334794359405732364e-02
+-5.387962879468511934e-01,7.366939695890535600e-03,4.893323877410574868e-02,-2.160720356301644527e-03,2.343944494380703625e-02,6.355145884192018625e-03,-2.309907110911120151e-02
+-5.370295348887590414e-01,7.556852339262330222e-03,4.841714661904426842e-02,-2.149634767784710902e-03,2.340130114574482528e-02,6.247051991626536953e-03,-2.284537236805748647e-02
+-5.352604230123479967e-01,7.729042586061911010e-03,4.789363217040022713e-02,-2.138614532211897436e-03,2.336313323329506156e-02,6.147591796222972096e-03,-2.258776107576277239e-02
+-5.334889600881657090e-01,7.883662879652619457e-03,4.736437745399477844e-02,-2.127660034364972207e-03,2.332494446597790580e-02,6.056698405075783796e-03,-2.232710430402347693e-02
+-5.317151538970863456e-01,8.020965420210702732e-03,4.683097770538099947e-02,-2.116771529714576387e-03,2.328673806087095885e-02,5.974248234704035432e-03,-2.206422223657999657e-02
+-5.299390122302762851e-01,8.141294979712261279e-03,4.629493391063854713e-02,-2.105949190995695577e-03,2.324851720933230476e-02,5.900071130953299207e-03,-2.179989508912942228e-02
+-5.281605428891603671e-01,8.245080292659665716e-03,4.575765922692614368e-02,-2.095193086299206465e-03,2.321028504566752249e-02,5.833957411349361549e-03,-2.153484585173623747e-02
+-5.263797536853872527e-01,8.332820858966074815e-03,4.522046786310129907e-02,-2.084503193935429485e-03,2.317204465483828357e-02,5.775655782857346855e-03,-2.126975767315099700e-02
+-5.245966524407953413e-01,8.405079931976094584e-03,4.468458178036045086e-02,-2.073879414567398900e-03,2.313379907183026399e-02,5.724882229285094964e-03,-2.100525948958330974e-02
+-5.228112469873781309e-01,8.462476195846738158e-03,4.415112565773287451e-02,-2.063321572970234098e-03,2.309555127864234073e-02,5.681325718412826714e-03,-2.074193521195950851e-02
+-5.210235451672501350e-01,8.505674314664984567e-03,4.362113182922788129e-02,-2.052829426425417051e-03,2.305730419155514743e-02,5.644650987251311272e-03,-2.048031566554308486e-02
+-5.192335548326121319e-01,8.535376785939919816e-03,4.309553704055206491e-02,-2.042402662975766753e-03,2.301906066793034736e-02,5.614502892948469021e-03,-2.022088720837093498e-02
+-5.174412838457173036e-01,8.552316177044541265e-03,4.257518615505223591e-02,-2.032040925643709833e-03,2.298082349567381888e-02,5.590510538478793597e-03,-1.996408855024721374e-02
+-5.156467400788354860e-01,8.557247000154255545e-03,4.206083521747625703e-02,-2.021743803178181406e-03,2.294259540499231018e-02,5.572289724990260769e-03,-1.971030989521727408e-02
+-5.138499314142201957e-01,8.550941344637286862e-03,4.155315168711019858e-02,-2.011510842602856619e-03,2.290437905415309422e-02,5.559451410014989818e-03,-1.945989839900508570e-02
+-5.120508657440723255e-01,8.534178880244646173e-03,4.105272048426647502e-02,-2.001341545387210017e-03,2.286617703160824175e-02,5.551598539364125881e-03,-1.921315362345032626e-02
+-5.102495509705069487e-01,8.507742060136679052e-03,4.056004182058329383e-02,-1.991235378808188430e-03,2.282799186536882061e-02,5.548331533230466822e-03,-1.897033918366389585e-02
+-5.084459950055177924e-01,8.472412666508496309e-03,4.007554038275461944e-02,-1.981191789875449104e-03,2.278982601766682894e-02,5.549255178301843242e-03,-1.873167378758594862e-02
+-5.066402057709425977e-01,8.428963036387029928e-03,3.959956491015132612e-02,-1.971210196219332599e-03,2.275168188034439043e-02,5.553974428847811146e-03,-1.849734086281914353e-02
+-5.048321911984281485e-01,8.378155042239392516e-03,3.913239479711464602e-02,-1.961290005919748280e-03,2.271356178848547847e-02,5.562103632095916073e-03,-1.826748548584943357e-02
+-5.030219592293964093e-01,8.320733405553327328e-03,3.867424284781754984e-02,-1.951430604630767854e-03,2.267546799885884062e-02,5.573264021851833359e-03,-1.804221890173847431e-02
+-5.012095178150079988e-01,8.257421265051821668e-03,3.822525950316007365e-02,-1.941631374263828565e-03,2.263740272385979940e-02,5.587084348999443034e-03,-1.782162056621200766e-02
+-4.993948749161288836e-01,8.188922415194956062e-03,3.778553727927127376e-02,-1.931891686245481264e-03,2.259936809969105564e-02,5.603212950690956290e-03,-1.760573998473937904e-02
+-4.975780385032944064e-01,8.115908589754333713e-03,3.735511534983113058e-02,-1.922210914490988295e-03,2.256136620915870708e-02,5.621300739327221921e-03,-1.739459839151697579e-02
+-4.957590165566744811e-01,8.039027410116392908e-03,3.693398286959707899e-02,-1.912588428465243759e-03,2.252339908846186928e-02,5.641021931349807575e-03,-1.718819281923511869e-02
+-4.939378170660390643e-01,7.958894459769152649e-03,3.652208440948137569e-02,-1.903023612206040417e-03,2.248546870583946478e-02,5.662064043958273292e-03,-1.698649616724137029e-02
+-4.921144480307220181e-01,7.876092006247902705e-03,3.611932449773660320e-02,-1.893515844012348638e-03,2.244757698231416784e-02,5.684129429607079516e-03,-1.678945883502839034e-02
+-4.902889174595871369e-01,7.791169458735564858e-03,3.572556960990212055e-02,-1.884064519893050450e-03,2.240972579246768015e-02,5.706939184044267088e-03,-1.659701496628817122e-02
+-4.884612333709922316e-01,7.704641970278039258e-03,3.534065541357377105e-02,-1.874669037482722686e-03,2.237191695890857965e-02,5.730232743164725105e-03,-1.640907861105145998e-02
+-4.866314037927538805e-01,7.616988940316076623e-03,3.496438645094698122e-02,-1.865328817242975414e-03,2.233415225979571220e-02,5.753766511520303981e-03,-1.622555378715910546e-02
+-4.847994367621128453e-01,7.528654157256611454e-03,3.459654543745539768e-02,-1.856043289317237414e-03,2.229643343036990383e-02,5.777314837171277154e-03,-1.604632618696845425e-02
+-4.829653403256978783e-01,7.440045908706990896e-03,3.423689144087976172e-02,-1.846811903570216547e-03,2.225876217277395278e-02,5.800670250908020013e-03,-1.587127536923104223e-02
+-4.811291225394909166e-01,7.351538548452069693e-03,3.388516876424877561e-02,-1.837634120666124544e-03,2.222114014368856832e-02,5.823645817135341719e-03,-1.570026648410148204e-02
+-4.792907914687918880e-01,7.263469772826242066e-03,3.354110364626989443e-02,-1.828509424638596718e-03,2.218356896792129593e-02,5.846068874763852771e-03,-1.553316441464809572e-02
+-4.774503551881826846e-01,7.176144879173566166e-03,3.320441518358149130e-02,-1.819437320423635245e-03,2.214605023543290974e-02,5.867787359843888370e-03,-1.536982080418510498e-02
+-4.756078217814919684e-01,7.089834040785752099e-03,3.287481139666187380e-02,-1.810417326739646336e-03,2.210858551089195032e-02,5.888662579153870931e-03,-1.521008847308366260e-02
+-4.737631993417600329e-01,7.004778494863576031e-03,3.255199682475292539e-02,-1.801448986265218781e-03,2.207117632593243867e-02,5.908578293787354087e-03,-1.505381373966759723e-02
+-4.719164959712024987e-01,6.921185016256010264e-03,3.223567144254425865e-02,-1.792531858978685909e-03,2.203382418271722723e-02,5.927427489930719995e-03,-1.490084452524420566e-02
+-4.700677197811755081e-01,6.839233045741967623e-03,3.192553625955306396e-02,-1.783665528202965558e-03,2.199653056852958477e-02,5.945122578958180609e-03,-1.475102543760129815e-02
+-4.682168788921395874e-01,6.759073230858424382e-03,3.162129260598748465e-02,-1.774849592884090999e-03,2.195929694110998823e-02,5.961589247010045875e-03,-1.460420420081512405e-02
+-4.663639814336241196e-01,6.680829528874578788e-03,3.132264627716714600e-02,-1.766083675191705820e-03,2.192212474131974981e-02,5.976766782080427005e-03,-1.446022848729111056e-02
+-4.645090355441917063e-01,6.604600778774773369e-03,3.102930774338939074e-02,-1.757367411526364331e-03,2.188501539143656791e-02,5.990607248607587865e-03,-1.431894964633405147e-02
+-4.626520493714022519e-01,6.530463203966128949e-03,3.074099402784187876e-02,-1.748700461429566165e-03,2.184797029473357216e-02,6.003076292667677838e-03,-1.418022283449024354e-02
+-4.607930310717773814e-01,6.458469777143255666e-03,3.045743059417226062e-02,-1.740082502746092370e-03,2.181099083862137952e-02,6.014147989688571616e-03,-1.404390667017868348e-02
+-4.589319888107645240e-01,6.388654672181611274e-03,3.017835228940261155e-02,-1.731513229509802285e-03,2.177407840052940011e-02,6.023809192400767822e-03,-1.390986421743869300e-02
+-4.570689307627009978e-01,6.321032838607055192e-03,2.990350353143142112e-02,-1.722992353723112229e-03,2.173723434152212419e-02,6.032054730944686090e-03,-1.377796496142963227e-02
+-4.552038651107780942e-01,6.255603256738279279e-03,2.963264074934597592e-02,-1.714519603386286317e-03,2.170046001353252013e-02,6.038889542897547640e-03,-1.364808215496433538e-02
+-4.533368000470053838e-01,6.192348965837752087e-03,2.936553108217770588e-02,-1.706094718881710250e-03,2.166375675385212585e-02,6.044324809667239728e-03,-1.352009679628529519e-02
+-4.514677437721743569e-01,6.131239957099677423e-03,2.910195404625583529e-02,-1.697717459860478536e-03,2.162712589584220302e-02,6.048379547794534121e-03,-1.339389567886851116e-02
+-4.495967044958226744e-01,6.072234466179324448e-03,2.884170216775705162e-02,-1.689387596342314815e-03,2.159056875761548519e-02,6.051079305344848371e-03,-1.326937104308552329e-02
+-4.477236904361980296e-01,6.015279336879801944e-03,2.858457985068673518e-02,-1.681104911771366629e-03,2.155408665462563156e-02,6.052453242961259791e-03,-1.314642322189494769e-02
+-4.458487098202219556e-01,5.960313629548907866e-03,2.833040489538829476e-02,-1.672869203533229121e-03,2.151768089313787005e-02,6.052537442209582515e-03,-1.302495802652452357e-02
+-4.439717708834537979e-01,5.907267962551059591e-03,2.807900762474541470e-02,-1.664680278250831426e-03,2.148135277180455646e-02,6.051370371988783019e-03,-1.290488835257836038e-02
+-4.420928818700546881e-01,5.856066882299226783e-03,2.783023106193814253e-02,-1.656537955458946797e-03,2.144510358339755257e-02,6.048994203981317326e-03,-1.278613357348238992e-02
+-4.402120510327509062e-01,5.806629629947453360e-03,2.758393110937556666e-02,-1.648442060912564669e-03,2.140893461681044052e-02,6.045453302150747242e-03,-1.266861871569007272e-02
+-4.383292866327981319e-01,5.758872022927685741e-03,2.733997502456747206e-02,-1.640392430665325516e-03,2.137284715393298479e-02,6.040794990241978198e-03,-1.255227663491768203e-02
+-4.364445969399446956e-01,5.712706684254858190e-03,2.709824253393211188e-02,-1.632388910150808260e-03,2.133684247088685204e-02,6.035067414253236610e-03,-1.243704507754790634e-02
+-4.345579902323956079e-01,5.668044312024540440e-03,2.685862482247400665e-02,-1.624431349724910672e-03,2.130092183797750033e-02,6.028319551303623278e-03,-1.232286757925794146e-02
+-4.326694747967760879e-01,5.624794492514429214e-03,2.662102336499975325e-02,-1.616519606415693557e-03,2.126508652472522756e-02,6.020600560017008544e-03,-1.220969453695092827e-02
+-4.307790589280949822e-01,5.582867805436900373e-03,2.638535067333741893e-02,-1.608653546422158425e-03,2.122933778992871015e-02,6.011961750494680852e-03,-1.209748058561651435e-02
+-4.288867509297087377e-01,5.542173136551628576e-03,2.615152911819506459e-02,-1.600833035999456311e-03,2.119367689136326255e-02,6.002449762532622961e-03,-1.198618550255931350e-02
+-4.269925591132845422e-01,5.502623087407194219e-03,2.591948981834328591e-02,-1.593057950392711298e-03,2.115810508600379086e-02,5.992115117108452391e-03,-1.187577492180489670e-02
+-4.250964917987639091e-01,5.464131096351744835e-03,2.568917352917523406e-02,-1.585328165224609166e-03,2.112262361742559685e-02,5.981005412540714439e-03,-1.176621725370345997e-02
+-4.231985573143263735e-01,5.426612873928989868e-03,2.546052812155854758e-02,-1.577643561455181493e-03,2.108723372805288918e-02,5.969166790111250440e-03,-1.165748696829521994e-02
+-4.212987639963524100e-01,5.389986934693695095e-03,2.523350989104409040e-02,-1.570004021113185697e-03,2.105193665493449232e-02,5.956643878923444622e-03,-1.154956068238244260e-02
+-4.193971201893874623e-01,5.354175480493433316e-03,2.500808101641209535e-02,-1.562409430858401104e-03,2.101673363016456561e-02,5.943480564694476193e-03,-1.144242046626948658e-02
+-4.174936342461046945e-01,5.319103476493775365e-03,2.478421116246951744e-02,-1.554859677617422041e-03,2.098162587886793704e-02,5.929717526919872465e-03,-1.133604939237594679e-02
+-4.155883145272687984e-01,5.284700189800269933e-03,2.456187434311292439e-02,-1.547354650756612576e-03,2.094661462392438320e-02,5.915394540662161776e-03,-1.123043599451047266e-02
+-4.136811694016988561e-01,5.250898624368576309e-03,2.434105071949514698e-02,-1.539894242931961858e-03,2.091170107922361854e-02,5.900548972295363397e-03,-1.112556950724229166e-02
+-4.117722072462318139e-01,5.217635738722203181e-03,2.412172470395490889e-02,-1.532478346189282848e-03,2.087688645498156359e-02,5.885215870950202648e-03,-1.102144207473277834e-02
+-4.098614364456857340e-01,5.184852579361850372e-03,2.390388502881930430e-02,-1.525106849835698155e-03,2.084217195588303900e-02,5.869428020279134593e-03,-1.091804732521894396e-02
+-4.079488653928228237e-01,5.152495297258193746e-03,2.368752256470733897e-02,-1.517779648704224152e-03,2.080755877781804292e-02,5.853217753805124192e-03,-1.081538322187045736e-02
+-4.060345024883124099e-01,5.120512393470191999e-03,2.347263303907816401e-02,-1.510496638042366615e-03,2.077304810945181102e-02,5.836611798100639625e-03,-1.071344578716974284e-02
+-4.041183561406945790e-01,5.088858576522611692e-03,2.325921331356070598e-02,-1.503257711144839878e-03,2.073864113786741403e-02,5.819638600766120252e-03,-1.061223497080597197e-02
+-4.022004347663427071e-01,5.057491484172920403e-03,2.304726371380937458e-02,-1.496062760598068512e-03,2.070433903110255325e-02,5.802322357444383502e-03,-1.051174923820374051e-02
+-4.002807467894267113e-01,5.026371930577063288e-03,2.283678447574479659e-02,-1.488911677768194964e-03,2.067014296288710931e-02,5.784683717253710084e-03,-1.041199126232847383e-02
+-3.983593006418760796e-01,4.995466423292369527e-03,2.262777869922966145e-02,-1.481804358383814789e-03,2.063605409377784750e-02,5.766744803728390996e-03,-1.031296148677939180e-02
+-3.964361047633429003e-01,4.964743674572019531e-03,2.242024938068387305e-02,-1.474740693506153104e-03,2.060207357758444754e-02,5.748523025322768362e-03,-1.021466286466016747e-02
+-3.945111676011643920e-01,4.934176565011568101e-03,2.221420104831889342e-02,-1.467720578269379695e-03,2.056820256046065776e-02,5.730035150897445223e-03,-1.011709704062226274e-02
+-3.925844976103263773e-01,4.903741354346490174e-03,2.200963802847626105e-02,-1.460743901493775920e-03,2.053444218173411037e-02,5.711296257858388542e-03,-1.002026692246319427e-02
+-3.906561032534255906e-01,4.873417250368549357e-03,2.180656475048979581e-02,-1.453810558184050021e-03,2.050079356838336100e-02,5.692319377938275665e-03,-9.924175501385119544e-03
+-3.887259930006330966e-01,4.843186276474868365e-03,2.160498609791566479e-02,-1.446920437341663946e-03,2.046725784543524870e-02,5.673115722518447257e-03,-9.828824654803154301e-03
+-3.867941753296565421e-01,4.813033896623165886e-03,2.140490569277535976e-02,-1.440073432302075261e-03,2.043383612232259339e-02,5.653696341028410961e-03,-9.734217891644886267e-03
+-3.848606587257031308e-01,4.782946765326753086e-03,2.120632810449920094e-02,-1.433269431926804905e-03,2.040052950771024942e-02,5.634068395728634646e-03,-9.640355803033115456e-03
+-3.829254516814425413e-01,4.752916107262450049e-03,2.100925559574587029e-02,-1.426508325596199033e-03,2.036733909415731902e-02,5.614241993809643803e-03,-9.547241817230643982e-03
+-3.809885626969692352e-01,4.722932194955329656e-03,2.081369084400523997e-02,-1.419790005221499866e-03,2.033426597009796791e-02,5.594220325008321913e-03,-9.454876808078551201e-03
+-3.790500002797654311e-01,4.692990260466270262e-03,2.061963512066292137e-02,-1.413114361849087623e-03,2.030131121748535161e-02,5.574011230429182923e-03,-9.363262274741435132e-03
+-3.771097729446636349e-01,4.663085295868108632e-03,2.042708982462604697e-02,-1.406481283478606265e-03,2.026847590414267847e-02,5.553617926434906496e-03,-9.272397282273740771e-03
+-3.751678892138092802e-01,4.633214996155272068e-03,2.023605370011598639e-02,-1.399890657355142530e-03,2.023576109235021223e-02,5.533044981179525632e-03,-9.182283541123395509e-03
+-3.732243576166232035e-01,4.603377622813159195e-03,2.004652589607399185e-02,-1.393342374786121947e-03,2.020316783232954660e-02,5.512294749503675317e-03,-9.092919576339250284e-03
+-3.712791866897643955e-01,4.573572930579619146e-03,1.985850331543489652e-02,-1.386836324330455086e-03,2.017069716836983675e-02,5.491369531833142047e-03,-9.004305642608519122e-03
+-3.693323849770918654e-01,4.543801677714114952e-03,1.967198308508413818e-02,-1.380372399509313154e-03,2.013835013536595039e-02,5.470271055273658554e-03,-8.916439073154517128e-03
+-3.673839610296283364e-01,4.514065784785294307e-03,1.948696063981042795e-02,-1.373950485710353315e-03,2.010612775700361399e-02,5.449001147290180244e-03,-8.829317883803984579e-03
+-3.654339234055213326e-01,4.484367195507386709e-03,1.930343036030149687e-02,-1.367570474715858880e-03,2.007403104965025872e-02,5.427559951724975074e-03,-8.742939616039897832e-03
+-3.634822806700064302e-01,4.454709382034544897e-03,1.912138559007880295e-02,-1.361232254312270441e-03,2.004206101833244658e-02,5.405949077716310988e-03,-8.657301368771266187e-03
+-3.615290413953694548e-01,4.425094464299221542e-03,1.894081877646556014e-02,-1.354935719361252677e-03,2.001021866340795680e-02,5.384166499830138342e-03,-8.572399618360938486e-03
+-3.595742141609086784e-01,4.395527599929367236e-03,1.876172189416523001e-02,-1.348680757141322399e-03,1.997850497658117466e-02,5.362215064394726567e-03,-8.488229526360312888e-03
+-3.576178075528972378e-01,4.366011706405491764e-03,1.858408513113861057e-02,-1.342467262350785593e-03,1.994692093338671854e-02,5.340092843719179000e-03,-8.404787520030465239e-03
+-3.556598301645456095e-01,4.336550724874201242e-03,1.840789904728310292e-02,-1.336295123542686862e-03,1.991546751330997012e-02,5.317799480466087712e-03,-8.322067372787948780e-03
+-3.537002905959633070e-01,4.307149855233072429e-03,1.823315183388633451e-02,-1.330164236862903639e-03,1.988414567581026429e-02,5.295336828986228067e-03,-8.240065479704503093e-03
+-3.517391974541219102e-01,4.277812117259600228e-03,1.805983302714050595e-02,-1.324074493677239667e-03,1.985295637434387681e-02,5.272702683951061356e-03,-8.158774038207007584e-03
+-3.497765593528163741e-01,4.248541991948065998e-03,1.788792952649684645e-02,-1.318025788721637276e-03,1.982190055699867282e-02,5.249897761740042468e-03,-8.078188661186716524e-03
+-3.478123849126281697e-01,4.219343422591797024e-03,1.771742936445448377e-02,-1.312018014372967091e-03,1.979097915886130279e-02,5.226922084696075839e-03,-7.998301458546422382e-03
+-3.458466827608864258e-01,4.190219970916378781e-03,1.754831808024193834e-02,-1.306051069597660014e-03,1.976019310744735191e-02,5.203775385409560063e-03,-7.919107986422894638e-03
+-3.438794615316307368e-01,4.161174782392951764e-03,1.738058373826601236e-02,-1.300124847444383272e-03,1.972954332353553039e-02,5.180457133940858695e-03,-7.840597986314203419e-03
+-3.419107298655730265e-01,4.132211382849788547e-03,1.721421000872136106e-02,-1.294239244991084040e-03,1.969903072021638585e-02,5.156968105780289531e-03,-7.762768504749787725e-03
+-3.399404964100594673e-01,4.103332649656439221e-03,1.704918443628626254e-02,-1.288394163632652998e-03,1.966865619876689147e-02,5.133308510434400965e-03,-7.685608531219171972e-03
+-3.379687698190326217e-01,4.074541036476575333e-03,1.688549097751973135e-02,-1.282589496391878768e-03,1.963842065510996993e-02,5.109478455111951360e-03,-7.609113127117193322e-03
+-3.359955587529934729e-01,4.045838763985910511e-03,1.672311510738459794e-02,-1.276825148707899605e-03,1.960832497648600653e-02,5.085478330827571981e-03,-7.533273981542943330e-03
+-3.340208718789632880e-01,4.017228384378589146e-03,1.656204173701174634e-02,-1.271101018695897002e-03,1.957837004194269703e-02,5.061309886705395687e-03,-7.458083439168661406e-03
+-3.320447178704457047e-01,3.988711295603309011e-03,1.640225601166424643e-02,-1.265417007425917537e-03,1.954855671598845421e-02,5.036973448801766728e-03,-7.383533120620808764e-03
+-3.300671054073883171e-01,3.960288072123457556e-03,1.624374202967125355e-02,-1.259773014223318976e-03,1.951888586733255271e-02,5.012468535263305859e-03,-7.309616432850205882e-03
+-3.280880431761448723e-01,3.931961433405484554e-03,1.608648452744780577e-02,-1.254168946193080867e-03,1.948935834587007740e-02,4.987799408308869580e-03,-7.236325510342926821e-03
+-3.261075398694370242e-01,3.903730152340280802e-03,1.593046847107319752e-02,-1.248604705982483574e-03,1.945997499805374176e-02,4.962963631299920249e-03,-7.163652073166128925e-03
+-3.241256041863160853e-01,3.875596405028677079e-03,1.577567866528395424e-02,-1.243080196953126907e-03,1.943073666464607829e-02,4.937965822057611108e-03,-7.091588244242027009e-03
+-3.221422448321247800e-01,3.847558937132963732e-03,1.562209942062777086e-02,-1.237595324338742900e-03,1.940164417282610326e-02,4.912804806982243831e-03,-7.020127248845322958e-03
+-3.201574705184592196e-01,3.819618649682791391e-03,1.546971616093622930e-02,-1.232149995383092665e-03,1.937269835124684828e-02,4.887484057721014699e-03,-6.949260468302276804e-03
+-3.181712899631304881e-01,3.791775127499631345e-03,1.531851313323893786e-02,-1.226744116985583862e-03,1.934390000779243057e-02,4.862005141615539580e-03,-6.878981793290410950e-03
+-3.161837118901262844e-01,3.764027133331417606e-03,1.516847622978820254e-02,-1.221377601306237928e-03,1.931524995468423059e-02,4.836368605641347580e-03,-6.809282384604093200e-03
+-3.141947450295727307e-01,3.736374747344313376e-03,1.501959071094592472e-02,-1.216050352310439056e-03,1.928674898993321118e-02,4.810577943972655160e-03,-6.740154987165388432e-03
+-3.122043981176959027e-01,3.708816098747454420e-03,1.487184164109076230e-02,-1.210762283345513928e-03,1.925839790783685085e-02,4.784633424703068662e-03,-6.671593150160959156e-03
+-3.102126798967836940e-01,3.681350999345517263e-03,1.472521549647258510e-02,-1.205513302750955886e-03,1.923019749423963975e-02,4.758538857335898112e-03,-6.603588243127345159e-03
+-3.082195991151471803e-01,3.653977744731780244e-03,1.457969733732052131e-02,-1.200303321901020816e-03,1.920214852410273565e-02,4.732295550092018022e-03,-6.536134782906254451e-03
+-3.062251645270823164e-01,3.626695091127821093e-03,1.443527359379734830e-02,-1.195132256826228912e-03,1.917425176814580370e-02,4.705905972384196410e-03,-6.469225228749376534e-03
+-3.042293848928312450e-01,3.599501126330457122e-03,1.429193120955490719e-02,-1.190000018029336790e-03,1.914650798926494785e-02,4.679371594854747803e-03,-6.402851601834994369e-03
+-3.022322689785442162e-01,3.572394656349027215e-03,1.414965596102981706e-02,-1.184906518174455060e-03,1.911891794584378440e-02,4.652695447937960256e-03,-6.337008645209164619e-03
+-3.002338255562408409e-01,3.545374161109814733e-03,1.400843521739189058e-02,-1.179851673998324373e-03,1.909148238183115340e-02,4.625880124181306480e-03,-6.271688653285549010e-03
+-2.982340634037716764e-01,3.518436963787428123e-03,1.386825560786576587e-02,-1.174835398236919511e-03,1.906420204214578570e-02,4.598926183548924032e-03,-6.206885845918779995e-03
+-2.962329913047793695e-01,3.491582571968160353e-03,1.372910427775477123e-02,-1.169857612057029900e-03,1.903707766070969476e-02,4.571838382911763872e-03,-6.142594010336215743e-03
+-2.942306180486604639e-01,3.464808238209806258e-03,1.359096942177647101e-02,-1.164918229343588638e-03,1.901010996083185231e-02,4.544617319388043718e-03,-6.078805495062116794e-03
+-2.922269524305266541e-01,3.438112136097111558e-03,1.345383778244451344e-02,-1.160017168996076197e-03,1.898329966700545990e-02,4.517265355460906712e-03,-6.015515891564733424e-03
+-2.902220032511660386e-01,3.411492971329204912e-03,1.331769804990400562e-02,-1.155154350858214910e-03,1.895664749040080685e-02,4.489785862075029894e-03,-5.952717648619627357e-03
+-2.882157793170046500e-01,3.384948349616051424e-03,1.318253798282688002e-02,-1.150329692139045144e-03,1.893015413595839844e-02,4.462180133571407893e-03,-5.890405457739212453e-03
+-2.862082894400676536e-01,3.358476687583377817e-03,1.304834594826071587e-02,-1.145543113900912308e-03,1.890382030415835302e-02,4.434450948220244881e-03,-5.828573351472606315e-03
+-2.841995424379405444e-01,3.332075987803843377e-03,1.291511036414676010e-02,-1.140794536907093100e-03,1.887764668521401887e-02,4.406600251557652413e-03,-5.767215733134573898e-03
+-2.821895471337307892e-01,3.305744093579030592e-03,1.278282018370431421e-02,-1.136083882899966866e-03,1.885163396897428234e-02,4.378629616636184156e-03,-5.706326452013224006e-03
+-2.801783123560285804e-01,3.279480312097095979e-03,1.265146405620193142e-02,-1.131411072385705204e-03,1.882578283063874799e-02,4.350543272411373091e-03,-5.645900359793178461e-03
+-2.781658469388685329e-01,3.253281188460446262e-03,1.252103103557638576e-02,-1.126776030454560004e-03,1.880009394347549254e-02,4.322340167206138728e-03,-5.585931964352300848e-03
+-2.761521597216903823e-01,3.227146606903652515e-03,1.239151076188049587e-02,-1.122178677625211085e-03,1.877456797699554245e-02,4.294025397938900507e-03,-5.526415071846275257e-03
+-2.741372595493007380e-01,3.201074212350100111e-03,1.226289191942720076e-02,-1.117618938786223148e-03,1.874920558314282018e-02,4.265599741770207017e-03,-5.467345660048771906e-03
+-2.721211552718337257e-01,3.175062271238027894e-03,1.213516453481444830e-02,-1.113096739422919862e-03,1.872400741897756182e-02,4.237065013406707964e-03,-5.408717551978823278e-03
+-2.701038557447121846e-01,3.149109791330812706e-03,1.200831780872852611e-02,-1.108612006021204507e-03,1.869897412574644813e-02,4.208424284773418060e-03,-5.350526464287924493e-03
+-2.680853698286093101e-01,3.123214407857874235e-03,1.188234243540009852e-02,-1.104164663757311194e-03,1.867410634435268113e-02,4.179677917794720997e-03,-5.292765641624028704e-03
+-2.660657063894090180e-01,3.097375532667113410e-03,1.175722753712299395e-02,-1.099754636313706032e-03,1.864940470645332776e-02,4.150829467257994426e-03,-5.235431588315719517e-03
+-2.640448742981674202e-01,3.071590953178908543e-03,1.163296336453037254e-02,-1.095381854630548711e-03,1.862486983690542219e-02,4.121879302841741435e-03,-5.178518989947029742e-03
+-2.620228824310735782e-01,3.045859631101465511e-03,1.150954013491356509e-02,-1.091046245846929216e-03,1.860050235837197585e-02,4.092829857254479294e-03,-5.122022876852212747e-03
+-2.599997396694109231e-01,3.020180612544901434e-03,1.138694821280252824e-02,-1.086747739843357688e-03,1.857630288125254950e-02,4.063683583779234380e-03,-5.065938271644059541e-03
+-2.579754548995178420e-01,2.994552106412304033e-03,1.126517807880134808e-02,-1.082486264212276748e-03,1.855227201121270944e-02,4.034441230769367746e-03,-5.010260234694823359e-03
+-2.559500370127490432e-01,2.968972961605040664e-03,1.114422017053801235e-02,-1.078261748804050513e-03,1.852841035123485233e-02,4.005104614117287187e-03,-4.954984149861632603e-03
+-2.539234949054359203e-01,2.943442673136002340e-03,1.102406491411713797e-02,-1.074074123983779355e-03,1.850471849271999661e-02,3.975676632316247142e-03,-4.900105654382364354e-03
+-2.518958374788481391e-01,2.917959262381221702e-03,1.090470318644901418e-02,-1.069923322545524762e-03,1.848119702104790776e-02,3.946157296071953896e-03,-4.845619761271155283e-03
+-2.498670736391540859e-01,2.892521698822133301e-03,1.078612593113181806e-02,-1.065809273816562984e-03,1.845784652343794227e-02,3.916548273498124053e-03,-4.791521569881642718e-03
+-2.478372122973819258e-01,2.867130130265849609e-03,1.066832376624956419e-02,-1.061731910225526248e-03,1.843466757021799962e-02,3.886853326525524510e-03,-4.737806992291641872e-03
+-2.458062623693803850e-01,2.841782162639277208e-03,1.055128784366325505e-02,-1.057691163774294833e-03,1.841166072792905367e-02,3.857071340560614976e-03,-4.684471133996954105e-03
+-2.437742327757797534e-01,2.816477338200595523e-03,1.043500906205573238e-02,-1.053686967506118699e-03,1.838882656409969452e-02,3.827204726177019841e-03,-4.631509761988119103e-03
+-2.417411324419524721e-01,2.791214847187445391e-03,1.031947850522096574e-02,-1.049719255040649580e-03,1.836616563382147127e-02,3.797255133828912610e-03,-4.578918476314522944e-03
+-2.397069702979740813e-01,2.765994011989424128e-03,1.020468712893039982e-02,-1.045787962957076987e-03,1.834367848651292932e-02,3.767224366569072846e-03,-4.526693291045580901e-03
+-2.376717552785839183e-01,2.740813191922712028e-03,1.009062669822568224e-02,-1.041893026254218316e-03,1.832136566681016529e-02,3.737112331784957914e-03,-4.474828871568720980e-03
+-2.356354963231459820e-01,2.715672637653394634e-03,9.977288179831188958e-03,-1.038034378987836084e-03,1.829922771608863408e-02,3.706922398443385138e-03,-4.423321543861097674e-03
+-2.335982023756095483e-01,2.690571236869919620e-03,9.864662997651050050e-03,-1.034211959181111648e-03,1.827726515801745416e-02,3.676655299826116653e-03,-4.372166936192711274e-03
+-2.315598823844699505e-01,2.665507047263053399e-03,9.752742784521772207e-03,-1.030425702365303032e-03,1.825547852760274828e-02,3.646310137935463661e-03,-4.321360440668995870e-03
+-2.295205453027292508e-01,2.640481370213765538e-03,9.641518787913997898e-03,-1.026675546643253346e-03,1.823386834271529047e-02,3.615892025952715562e-03,-4.270898324210684743e-03
+-2.274802000878569097e-01,2.615491833286828164e-03,9.530983002972481452e-03,-1.022961428955513220e-03,1.821243511488757766e-02,3.585399122340150561e-03,-4.220775600977270080e-03
+-2.254388557017504569e-01,2.590538445033331669e-03,9.421126469165245002e-03,-1.019283287577305670e-03,1.819117935710597503e-02,3.554833987707380159e-03,-4.170989223631640062e-03
+-2.233965211106962168e-01,2.565620454901969185e-03,9.311941061533320768e-03,-1.015641065172704759e-03,1.817010157029841713e-02,3.524197700922293477e-03,-4.121534722264378128e-03
+-2.213532052853296739e-01,2.540737298847218183e-03,9.203419039822744938e-03,-1.012034699906438993e-03,1.814920225119635161e-02,3.493491635785619075e-03,-4.072407047811662725e-03
+-2.193089172005963650e-01,2.515888110532190150e-03,9.095551997439400502e-03,-1.008464128735437606e-03,1.812848189114983286e-02,3.462716547782952042e-03,-4.023602540779264505e-03
+-2.172636658357122441e-01,2.491072736953172089e-03,8.988331404103013650e-03,-1.004929294106396680e-03,1.810794097426157018e-02,3.431874481609744395e-03,-3.975117912167049593e-03
+-2.152174601741243254e-01,2.466289755819076144e-03,8.881749676968687252e-03,-1.001430137452903645e-03,1.808757998055639596e-02,3.400965056515725411e-03,-3.926948233602687958e-03
+-2.131703092034713254e-01,2.441539335233408690e-03,8.775798287738048251e-03,-9.979666011130303160e-04,1.806739938626704400e-02,3.369990830928626478e-03,-3.879090485090834657e-03
+-2.111222219155439450e-01,2.416820465682872118e-03,8.670469811217102266e-03,-9.945386286559611613e-04,1.804739965725318113e-02,3.338952114625218671e-03,-3.831539712734938034e-03
+-2.090732073062456231e-01,2.392132830561471100e-03,8.565755806366160893e-03,-9.911461626918697827e-04,1.802758125820867405e-02,3.307850478290982540e-03,-3.784293000582924539e-03
+-2.070232743755528182e-01,2.367475771649461304e-03,8.461648944733789895e-03,-9.877891492742769518e-04,1.800794464390684940e-02,3.276686815679324440e-03,-3.737345478724237759e-03
+-2.049724321274755956e-01,2.342848610460558211e-03,8.358141418061851985e-03,-9.844675289023335427e-04,1.798849026791381894e-02,3.245461947841299637e-03,-3.690693307904960307e-03
+-2.029206895700181590e-01,2.318251020371502567e-03,8.255225252414810991e-03,-9.811812471091020844e-04,1.796921857566221442e-02,3.214177322472104061e-03,-3.644333089434345380e-03
+-2.008680557151390489e-01,2.293682454372327970e-03,8.152892713947286832e-03,-9.779302495169475869e-04,1.795013000707300702e-02,3.182833942267569889e-03,-3.598261103334987002e-03
+-1.988145395787117298e-01,2.269142132335924371e-03,8.051136407351276378e-03,-9.747144831643725577e-04,1.793122499767564951e-02,3.151432342346807604e-03,-3.552473122737382326e-03
+-1.967601501804849828e-01,2.244629925644530798e-03,7.949948445878438422e-03,-9.715338935307113455e-04,1.791250397732349620e-02,3.119974250001477170e-03,-3.506965968932712618e-03
+-1.947048965440432433e-01,2.220144672968822879e-03,7.849321402813072210e-03,-9.683884316278775764e-04,1.789396737100634391e-02,3.088459427717145551e-03,-3.461735726278567741e-03
+-1.926487876967668822e-01,2.195687001013282760e-03,7.749248468490752516e-03,-9.652780409973161883e-04,1.787561559829284677e-02,3.056890962848818512e-03,-3.416777446732046503e-03
+-1.905918326697928489e-01,2.171255493412132739e-03,7.649721412167526623e-03,-9.622026688887388860e-04,1.785744906704367016e-02,3.025268085125235647e-03,-3.372088970090240424e-03
+-1.885340404979746198e-01,2.146849715031372806e-03,7.550732998538658980e-03,-9.591622664740311575e-04,1.783946819218107036e-02,2.993591837862720484e-03,-3.327666391760537162e-03
+-1.864754202198428135e-01,2.122469762929542550e-03,7.452276061022977335e-03,-9.561567841078201209e-04,1.782167337229891707e-02,2.961864236567715320e-03,-3.283505799211778719e-03
+-1.844159808775653053e-01,2.098114602702982456e-03,7.354343583437355017e-03,-9.531861698947970878e-04,1.780406500664276359e-02,2.930085149997931418e-03,-3.239603117525628306e-03
+-1.823557315169075932e-01,2.073783980389314920e-03,7.256928142414290593e-03,-9.502503726160305099e-04,1.778664348665301084e-02,2.898255885574483725e-03,-3.195955153485413505e-03
+-1.802946811871931343e-01,2.049477573485481395e-03,7.160022094119999382e-03,-9.473493451130971703e-04,1.776940920137916324e-02,2.866377597764987930e-03,-3.152559244122674856e-03
+-1.782328389412633773e-01,2.025194978641575022e-03,7.063618763130377273e-03,-9.444830426595153278e-04,1.775236252750230642e-02,2.834451265427246573e-03,-3.109411026056537863e-03
+-1.761702138354382385e-01,2.000935077113158433e-03,6.967711282173525561e-03,-9.416514143479885919e-04,1.773550384747586348e-02,2.802476502383582143e-03,-3.066506609740306949e-03
+-1.741068149294762724e-01,1.976698522905293107e-03,6.872292236685097200e-03,-9.388544142208900441e-04,1.771883352889324151e-02,2.770456223006960688e-03,-3.023843244318385852e-03
+-1.720426512865346202e-01,1.952483534220121131e-03,6.777355019923925888e-03,-9.360919942836314544e-04,1.770235194202291959e-02,2.738388751315513295e-03,-2.981416775260469290e-03
+-1.699777319731296532e-01,1.928291353321979909e-03,6.682892234048459605e-03,-9.333641100042095210e-04,1.768605944561121163e-02,2.706278058594661137e-03,-2.939224640830200366e-03
+-1.679120660590968095e-01,1.904119708691886766e-03,6.588897853913370378e-03,-9.306707134986608480e-04,1.766995639690302142e-02,2.674121505171432253e-03,-2.897261813299447254e-03
+-1.658456626175507931e-01,1.879970160768154337e-03,6.495363828073936335e-03,-9.280117591864010980e-04,1.765404314780393003e-02,2.641923613824358977e-03,-2.855527170528296410e-03
+-1.637785307248458277e-01,1.855840582425618678e-03,6.402284098640778978e-03,-9.253872077058273183e-04,1.763832004015839991e-02,2.609681971117018811e-03,-2.814015957063198663e-03
+-1.617106794605358555e-01,1.831731380483074155e-03,6.309652172897499932e-03,-9.227970100150752978e-04,1.762278742197596199e-02,2.577398892767864685e-03,-2.772724335804843416e-03
+-1.596421179073343188e-01,1.807642088829609396e-03,6.217460483663866076e-03,-9.202411245530572404e-04,1.760744562601978050e-02,2.545075036133717054e-03,-2.731650585657016336e-03
+-1.575728551510746644e-01,1.783572452468612686e-03,6.125703084798708557e-03,-9.177195102734909959e-04,1.759229498428316676e-02,2.512711433023843716e-03,-2.690790045723217942e-03
+-1.555029002806702365e-01,1.759521590455284494e-03,6.034373244879424024e-03,-9.152321217870711860e-04,1.757733582291906571e-02,2.480307920410746922e-03,-2.650139625523476039e-03
+-1.534322623880743364e-01,1.735490132639755639e-03,5.943464070704352828e-03,-9.127789182608156329e-04,1.756256846356666484e-02,2.447867147194977833e-03,-2.609696634333776803e-03
+-1.513609505682403933e-01,1.711476523291847757e-03,5.852969050071906223e-03,-9.103598598914823623e-04,1.754799321843951432e-02,2.415387643785982599e-03,-2.569457761669747490e-03
+-1.492889739190818854e-01,1.687480809779777838e-03,5.762881792931138364e-03,-9.079749074805954206e-04,1.753361040777616162e-02,2.382870926104111841e-03,-2.529419561118888965e-03
+-1.472163415414325383e-01,1.663503310423132065e-03,5.673196168310312010e-03,-9.056240187134788474e-04,1.751942033092796641e-02,2.350318998108840048e-03,-2.489578191801815853e-03
+-1.451430625390061624e-01,1.639542193642914113e-03,5.583905220590911107e-03,-9.033071532087166584e-04,1.750542329533699704e-02,2.317729813074566585e-03,-2.449931448808472401e-03
+-1.430691460183568520e-01,1.615598890744627571e-03,5.495002607486542540e-03,-9.010242745908644216e-04,1.749161959738309430e-02,2.285107422752607857e-03,-2.410476065309120029e-03
+-1.409946010888388224e-01,1.591671747726795642e-03,5.406482319769613347e-03,-8.987753438871421644e-04,1.747800952635720356e-02,2.252450074978078459e-03,-2.371208236407595839e-03
+-1.389194368625665810e-01,1.567760913388158584e-03,5.318337862109622644e-03,-8.965603214977944458e-04,1.746459337372592915e-02,2.219759379122942643e-03,-2.332125153477068724e-03
+-1.368436624543747093e-01,1.543865775458662078e-03,5.230562752985949866e-03,-8.943791711615039662e-04,1.745137142222770443e-02,2.187035499208401981e-03,-2.293224065169219804e-03
+-1.347672869817779229e-01,1.519986501056825017e-03,5.143150812825274948e-03,-8.922318573556155957e-04,1.743834395156100558e-02,2.154280042609694589e-03,-2.254501735614575231e-03
+-1.326903195649310474e-01,1.496122575723042927e-03,5.056096042993745224e-03,-8.901183437764906368e-04,1.742551123154667536e-02,2.121493321043213184e-03,-2.215954669699088733e-03
+-1.306127693265889400e-01,1.472273231935662370e-03,4.969392078764802399e-03,-8.880385933400054485e-04,1.741287353449808353e-02,2.088675154497262113e-03,-2.177580137228406296e-03
+-1.285346453920664100e-01,1.448438750487692850e-03,4.883032648183693175e-03,-8.859925730187593381e-04,1.740043112506564280e-02,2.055827311603812286e-03,-2.139375310942336333e-03
+-1.264559568891981123e-01,1.424618626422329047e-03,4.797012053538541460e-03,-8.839802467502746487e-04,1.738818426145849472e-02,2.022950071109232140e-03,-2.101336362299972664e-03
+-1.243767129482984818e-01,1.400812398093709017e-03,4.711323688999752117e-03,-8.820015803209506778e-04,1.737613319945957233e-02,1.990043776785228955e-03,-2.063461239305051410e-03
+-1.222969227021216548e-01,1.377019932953390483e-03,4.625961711249966524e-03,-8.800565419459420533e-04,1.736427819043598331e-02,1.957109373614434808e-03,-2.025746532655919516e-03
+-1.202165952858213066e-01,1.353240716210252258e-03,4.540920092997270847e-03,-8.781450985326792920e-04,1.735261948035918961e-02,1.924147074380858861e-03,-1.988189248905026379e-03
+-1.181357398369105166e-01,1.329474696958250638e-03,4.456192878880135648e-03,-8.762672174376453810e-04,1.734115731267679011e-02,1.891157946540354858e-03,-1.950786329179449684e-03
+-1.160543654952217590e-01,1.305721711776799965e-03,4.371773882828234800e-03,-8.744228674070286074e-04,1.732989192099300760e-02,1.858142831744875358e-03,-1.913535215179898268e-03
+-1.139724814028665600e-01,1.281980600282257333e-03,4.287657360803794233e-03,-8.726120182463187758e-04,1.731882354165668880e-02,1.825100688098939012e-03,-1.876432586379741347e-03
+-1.118900967041955297e-01,1.258252205019277882e-03,4.203837611686953891e-03,-8.708346372340717342e-04,1.730795240436391186e-02,1.792034214995619906e-03,-1.839475106921977920e-03
+-1.098072205457580330e-01,1.234535877848311550e-03,4.120308318264438718e-03,-8.690906930751748222e-04,1.729727872793243929e-02,1.758943303178505529e-03,-1.802660659857445143e-03
+-1.077238620762621668e-01,1.210830739458303813e-03,4.037063458559621287e-03,-8.673801596866106750e-04,1.728680273516873186e-02,1.725827396554500265e-03,-1.765986637049311511e-03
+-1.056400304465344858e-01,1.187137426757124649e-03,3.954097724213768116e-03,-8.657030081219478862e-04,1.727652464191318377e-02,1.692688768989202095e-03,-1.729449153358015179e-03
+-1.035557348094797714e-01,1.163455438146462993e-03,3.871405124669719065e-03,-8.640592079955708828e-04,1.726644465369871900e-02,1.659527546893238429e-03,-1.693045662412087432e-03
+-1.014709843200409523e-01,1.139783961626340087e-03,3.788979526611429855e-03,-8.624487324817323121e-04,1.725656297895765412e-02,1.626343262412444134e-03,-1.656773943843635900e-03
+-9.938578813515881738e-02,1.116123142481827884e-03,3.706815316157862934e-03,-8.608715573955982425e-04,1.724687982087567267e-02,1.593137230583874372e-03,-1.620630860304716429e-03
+-9.730015541373175647e-02,1.092472870619401156e-03,3.624907172153143866e-03,-8.593276539525786196e-04,1.723739537564235313e-02,1.559910275991907786e-03,-1.584612781352331794e-03
+-9.521409531657563929e-02,1.068832530265337744e-03,3.543248903377468027e-03,-8.578169954972417839e-04,1.722810983513470978e-02,1.526662263689472195e-03,-1.548717762280597604e-03
+-9.312761700638348683e-02,1.045202034406808632e-03,3.461835093487037902e-03,-8.563395565936787138e-04,1.721902339076483018e-02,1.493394039128147872e-03,-1.512942458578667203e-03
+-9.104072964768519793e-02,1.021581443667551620e-03,3.380659872230261809e-03,-8.548953117321542432e-04,1.721013622296206289e-02,1.460106713862886632e-03,-1.477284428656704899e-03
+-8.895344240680749803e-02,9.979698064895964858e-04,3.299717704235022916e-03,-8.534842363130575038e-04,1.720144851381548434e-02,1.426799491248548647e-03,-1.441740653542592605e-03
+-8.686576445183338824e-02,9.743674473211884086e-04,3.219002756020999164e-03,-8.521063068649225138e-04,1.719296043893966258e-02,1.393473956959728364e-03,-1.406308722496408615e-03
+-8.477770495256206629e-02,9.507735207743913015e-04,3.138509600639707506e-03,-8.507615002304762990e-04,1.718467217215071535e-02,1.360129493501193755e-03,-1.370985510693494567e-03
+-8.268927308046866709e-02,9.271888434909345564e-04,3.058232602969740248e-03,-8.494497934485877077e-04,1.717658387882778359e-02,1.326768590088474049e-03,-1.335768330353340801e-03
+-8.060047800866380896e-02,9.036120912679459268e-04,2.978166328007803183e-03,-8.481711623162867079e-04,1.716869572026364504e-02,1.293389712613886777e-03,-1.300654164092291017e-03
+-7.851132891185348683e-02,8.800437421476151219e-04,2.898304574422336938e-03,-8.469255874083210945e-04,1.716100785741144288e-02,1.259994697289661082e-03,-1.265641475771841053e-03
+-7.642183496629867401e-02,8.564831393918027934e-04,2.818642718437124137e-03,-8.457130479240649971e-04,1.715352044251154673e-02,1.226583243859689660e-03,-1.230725814475089590e-03
+-7.433200534977503493e-02,8.329303447287193752e-04,2.739174695063843756e-03,-8.445335200557759631e-04,1.714623362735671550e-02,1.193156390799047541e-03,-1.195905473521093791e-03
+-7.224184924153258247e-02,8.093850825629696211e-04,2.659894943874817168e-03,-8.433869858887181083e-04,1.713914755525183797e-02,1.159714535991003374e-03,-1.161177840923752072e-03
+-7.015137582225546009e-02,7.858469014019249825e-04,2.580798023358811446e-03,-8.422734271582097820e-04,1.713226236885497661e-02,1.126257739610055791e-03,-1.126540121274039949e-03
+-6.806059427402150197e-02,7.623154765337913311e-04,2.501879010270089722e-03,-8.411928225415545544e-04,1.712557820736578668e-02,1.092786291939109276e-03,-1.091988585832289914e-03
+-6.596951378026195967e-02,7.387909034297768631e-04,2.423131434217826435e-03,-8.401451533577061325e-04,1.711909520545089325e-02,1.059301263814798255e-03,-1.057522443961810216e-03
+-6.387814352572118715e-02,7.152729995718514833e-04,2.344550521598336070e-03,-8.391304057668348432e-04,1.711281349024488391e-02,1.025803197609720949e-03,-1.023137759547934015e-03
+-6.178649269641621478e-02,6.917610870603216723e-04,2.266130594934168650e-03,-8.381485604314783911e-04,1.710673318789596858e-02,9.922917001231239351e-04,-9.888323261305720195e-04
+-5.969457047959648988e-02,6.682554188239046801e-04,2.187866593102728071e-03,-8.371996019905839602e-04,1.710085442157094002e-02,9.587681167763251883e-04,-9.546028153157591666e-04
+-5.760238606370348541e-02,6.447561606709381610e-04,2.109752891662017554e-03,-8.362835104694760994e-04,1.709517730224113449e-02,9.252336232362876880e-04,-9.204469874466785721e-04
+-5.550994863833031562e-02,6.212617231033640925e-04,2.031783772432073278e-03,-8.354002739806668529e-04,1.708970194493900779e-02,8.916860979001255474e-04,-8.863628100761631809e-04
+-5.341726739418143494e-02,5.977730397473489778e-04,1.953954273807970154e-03,-8.345498773117973959e-04,1.708442846226356296e-02,8.581281440685459326e-04,-8.523468624413305106e-04
+-5.132435152303221199e-02,5.742895078098032846e-04,1.876259135744128307e-03,-8.337323052505965070e-04,1.707935695514629232e-02,8.245594786636935248e-04,-8.183963124566629626e-04
+-4.923121021768859379e-02,5.508107994088781711e-04,1.798692776104984922e-03,-8.329475410350632193e-04,1.707448752704090328e-02,7.909803285310887735e-04,-7.845089599679536032e-04
+-4.713785267194667977e-02,5.273368757768139101e-04,1.721249492332535633e-03,-8.321955754177095578e-04,1.706982027432824012e-02,7.573914574776349296e-04,-7.506828574705039095e-04
+-4.504428808055239986e-02,5.038676886565878832e-04,1.643924630286941512e-03,-8.314763946375710408e-04,1.706535528910581995e-02,7.237936065104558182e-04,-7.169141174245305219e-04
+-4.295052563916110233e-02,4.804028525976894969e-04,1.566712385921319260e-03,-8.307899864883782530e-04,1.706109265775446887e-02,6.901868796518256322e-04,-6.832010329038953542e-04
+-4.085657454429714869e-02,4.569421269108194222e-04,1.489608128353814894e-03,-8.301363372922674483e-04,1.705703246602900333e-02,6.565716478209767662e-04,-6.495397208755530200e-04
+-3.876244399331353618e-02,4.334856886539072292e-04,1.412605535852473441e-03,-8.295154348499450764e-04,1.705317479130001937e-02,6.229490608900855372e-04,-6.159294909429745106e-04
+-3.666814318435148573e-02,4.100323283640498794e-04,1.335699627157894932e-03,-8.289272749401096606e-04,1.704951971114532575e-02,5.893176649309007139e-04,-5.823671606036762757e-04
+-3.457368131630006447e-02,3.865827778584176580e-04,1.258885921814037164e-03,-8.283718435077276968e-04,1.704606730204275694e-02,5.556796420778943287e-04,-5.488486285256266193e-04
+-3.247906758875574590e-02,3.631370761587899166e-04,1.182158302978010946e-03,-8.278491294431017129e-04,1.704281762397581376e-02,5.220358717614807234e-04,-5.153728806404752000e-04
+-3.038431120198204285e-02,3.396934074587769662e-04,1.105511819463074214e-03,-8.273591250645684608e-04,1.703977074619731844e-02,4.883837522165733209e-04,-4.819367351930565818e-04
+-2.828942135686907111e-02,3.162534085593001159e-04,1.028940852442183787e-03,-8.269018228654092780e-04,1.703692673111863870e-02,4.547271520520635791e-04,-4.485382791258278731e-04
+-2.619440725489314420e-02,2.928160782285128025e-04,9.524408146222592244e-04,-8.264772149941163092e-04,1.703428562816807629e-02,4.210649906126762176e-04,-4.151736809425995251e-04
+-2.409927809807633700e-02,2.693810873331392705e-04,8.760056317439043807e-04,-8.260852929403171346e-04,1.703184749277885171e-02,3.873974446078788246e-04,-3.818419119023266049e-04
+-2.200404308894610833e-02,2.459485684577182455e-04,7.996307378660531557e-04,-8.257260536954047188e-04,1.702961237160366056e-02,3.537255538748333055e-04,-3.485391286815286808e-04
+-1.990871143049483677e-02,2.225180873493680812e-04,7.233107281241198203e-04,-8.253994861108061116e-04,1.702758030817208210e-02,3.200492930511959788e-04,-3.152630734135611259e-04
+-1.781329232613943978e-02,1.990893544539459349e-04,6.470401162824475231e-04,-8.251055884215543427e-04,1.702575134383297467e-02,2.863689059321191505e-04,-2.820116521970632631e-04
+-1.571779497968089911e-02,1.756626018903604278e-04,5.708143362595631629e-04,-8.248443511851146588e-04,1.702412551362548002e-02,2.526856124515963254e-04,-2.487810560687441592e-04
+-1.362222859526389211e-02,1.522370537682972685e-04,4.946271056695378775e-04,-8.246157718821746103e-04,1.702270284903146386e-02,2.189987408080950498e-04,-2.155707033983047692e-04
+-1.152660237733631014e-02,1.288129728076797473e-04,4.184739816569792723e-04,-8.244198482732582314e-04,1.702148337966774561e-02,1.853095651138080002e-04,-1.823765715498982469e-04
+-9.430925530608870802e-03,1.053898412841675774e-04,3.423494801987202901e-04,-8.242565761277010922e-04,1.702046712795083835e-02,1.516178946609241158e-04,-1.491965970498282097e-04
+-7.335207260014662417e-03,8.196767543128259637e-05,2.662485955240164012e-04,-8.241259518033477137e-04,1.701965411579003939e-02,1.179245389147050212e-04,-1.160278256364293204e-04
+-5.239456770668723257e-03,5.854629093649077833e-05,1.901657480497178515e-04,-8.240279736039088278e-04,1.701904435676685270e-02,8.422993036686636135e-05,-8.286838378535121635e-05
+-3.143683267827608162e-03,3.512533829827304828e-05,1.140959831929323401e-04,-8.239626405224594226e-04,1.701863786395985470e-02,5.053419103675434876e-05,-4.971522998133120076e-05
+-1.047895956848953446e-03,1.170470949379773770e-05,3.803388277719581864e-05,-8.239299522006214571e-04,1.701843464457217930e-02,1.683789519112693995e-05,-1.656619460831709506e-05
+1.047895956848953446e-03,-1.171595095671988791e-05,-3.802537020471871687e-05,-8.239299075268406150e-04,1.701843470401125102e-02,-1.685884715985106898e-05,1.658201758587787465e-05
+3.143683267827608162e-03,-3.513658255351335334e-05,-1.140873235011214602e-04,-8.239625053500401579e-04,1.701863804170533276e-02,-5.055514611064378615e-05,4.973133111852614109e-05
+5.239456770668723257e-03,-5.855762636124268013e-05,-1.901571565769669028e-04,-8.240277474799360914e-04,1.701904465381168241e-02,-8.425105221213526725e-05,8.288436159081172380e-05
+7.335207260014662417e-03,-8.197902964175913555e-05,-2.662399789816827038e-04,-8.241256351513179213e-04,1.701965453235694536e-02,-1.179456897509463238e-04,1.160438572940391709e-04
+9.430925530608870802e-03,-1.054011917830527254e-04,-3.423410076424499536e-04,-8.242561691780202161e-04,1.702046766440498682e-02,-1.516390302623061594e-04,1.492123679424300442e-04
+1.152660237733631014e-02,-1.288243291409319152e-04,-4.184656476094095927e-04,-8.244193529433808563e-04,1.702148403557207992e-02,-1.853307013342082269e-04,1.823920941955240458e-04
+1.362222859526389211e-02,-1.522484844065454847e-04,-4.946187164092515293e-04,-8.246151885578022981e-04,1.702270362507849952e-02,-2.190200040021143412e-04,2.155863432594759977e-04
+1.571779497968089911e-02,-1.756741189059638094e-04,-5.708057707013262927e-04,-8.248436781190797863e-04,1.702412641042112471e-02,-2.527070231285156684e-04,2.487970424108442422e-04
+1.781329232613943978e-02,-1.991007883581658185e-04,-6.470320413207341281e-04,-8.251048270542803197e-04,1.702575236166248632e-02,-2.863901442216377526e-04,2.820267368719913759e-04
+1.990871143049483677e-02,-2.225295716322156394e-04,-7.233026193554201351e-04,-8.253986400449866024e-04,1.702758144613734231e-02,-3.200706072353001188e-04,3.152782419765611834e-04
+2.200404308894610833e-02,-2.459600997316095097e-04,-7.996227145266170686e-04,-8.257251214432110914e-04,1.702961363082190760e-02,-3.537469354788049317e-04,3.485541598447593876e-04
+2.409927809807633700e-02,-2.693927472683030344e-04,-8.759976171708792848e-04,-8.260842766775744472e-04,1.703184887338748954e-02,-3.874190447262404642e-04,3.818569517359500032e-04
+2.619440725489314420e-02,-2.928276999162520789e-04,-9.524325134240104644e-04,-8.264761114576140051e-04,1.703428713153117002e-02,-4.210864942379833166e-04,4.151892855639960594e-04
+2.828942135686907111e-02,-3.162651192671200873e-04,-1.028932759834279460e-03,-8.269006325171599468e-04,1.703692835584345172e-02,-4.547487956945492479e-04,4.485535222564926362e-04
+3.038431120198204285e-02,-3.397053976543186948e-04,-1.105503618260040904e-03,-8.273578479075439007e-04,1.703977249607827385e-02,-4.884058909130973344e-04,4.819522138639368777e-04
+3.247906758875574590e-02,-3.631483611859548833e-04,-1.182150113134888261e-03,-8.278477651292161059e-04,1.704281949539038568e-02,-5.220566572144001065e-04,5.153883721961254613e-04
+3.457368131630006447e-02,-3.865944971893161175e-04,-1.258877782350703795e-03,-8.283703914880708417e-04,1.704606929266741164e-02,-5.557012086747188054e-04,5.488640620839243004e-04
+3.666814318435148573e-02,-4.100440925282829749e-04,-1.335691870431537139e-03,-8.289257383179742455e-04,1.704952182547377071e-02,-5.893392803272706499e-04,5.823819152602740168e-04
+3.876244399331353618e-02,-4.334970336069394700e-04,-1.412597312319214355e-03,-8.295138130361821458e-04,1.705317702539390495e-02,-6.229698528442382827e-04,6.159451618994692124e-04
+4.085657454429714869e-02,-4.569538290620872253e-04,-1.489599832115865666e-03,-8.301346260045229086e-04,1.705703481971561700e-02,-6.565930700000000659e-04,6.495555715270974690e-04
+4.295052563916110233e-02,-4.804145412806691291e-04,-1.566704387353783524e-03,-8.307881876977906343e-04,1.706109513342030787e-02,-6.902082349550532321e-04,6.832163711073505237e-04
+4.504428808055239986e-02,-5.038793797000867391e-04,-1.643916656124997473e-03,-8.314745097931960623e-04,1.706535788565971198e-02,-7.238149224570855763e-04,7.169294578321427957e-04
+4.713785267194667977e-02,-5.273485869773575276e-04,-1.721241557424899949e-03,-8.321936045630943081e-04,1.706982299225209831e-02,-7.574127652915086089e-04,7.506981745769398737e-04
+4.923121021768859379e-02,-5.508224676715465281e-04,-1.798684588096303039e-03,-8.329454830666352784e-04,1.707449036572368864e-02,-7.910015077561848061e-04,7.845248042714705883e-04
+5.132435152303221199e-02,-5.743009427758151257e-04,-1.876250935175215575e-03,-8.337301576729267728e-04,1.707935991309329077e-02,-8.245801701994903482e-04,8.184122352777997624e-04
+5.341726739418143494e-02,-5.977846781078626891e-04,-1.953946282719737620e-03,-8.345476417744364817e-04,1.708443153828441766e-02,-8.581491645830780684e-04,8.523624498052317531e-04
+5.550994863833031562e-02,-6.212732467479815165e-04,-2.031775678257647516e-03,-8.353979507346151511e-04,1.708970514094903040e-02,-8.917068487425261611e-04,8.863786503939220449e-04
+5.760238606370348541e-02,-6.447673956787128595e-04,-2.109744520400413936e-03,-8.362810970263273190e-04,1.709518061376152573e-02,-9.252537761793988699e-04,9.204634093306978630e-04
+5.969457047959648988e-02,-6.682672646578859425e-04,-2.187858507747315103e-03,-8.371970974508566570e-04,1.710085785111325682e-02,-9.587893561214813013e-04,9.546187655251101674e-04
+6.178649269641621478e-02,-6.917729629069591405e-04,-2.266122661039696694e-03,-8.381459683839253411e-04,1.710673673898317759e-02,-9.923129347471663572e-04,9.888480590850312092e-04
+6.387814352572118715e-02,-7.152847906988312300e-04,-2.344542439041876012e-03,-8.391277245903189025e-04,1.711281716175834522e-02,-1.025824210221334748e-03,1.023153840340472291e-03
+6.596951378026195967e-02,-7.388027708625470721e-04,-2.423123395962070541e-03,-8.401423831655782529e-04,1.711909899692732120e-02,-1.059322354141940727e-03,1.057538513125338454e-03
+6.806059427402150197e-02,-7.623273762864687074e-04,-2.501870810378077365e-03,-8.411899614425580535e-04,1.712558211958213616e-02,-1.092807375452391726e-03,1.092005031892667385e-03
+7.015137582225546009e-02,-7.858586638969757617e-04,-2.580790214510725062e-03,-8.422704765671279892e-04,1.713226640051933661e-02,-1.126278496198232330e-03,1.126555910329965276e-03
+7.224184924153258247e-02,-8.093968705941835366e-04,-2.659887119477254581e-03,-8.433839477767242484e-04,1.713915170540062510e-02,-1.159735268814404176e-03,1.161193737689271898e-03
+7.433200534977503493e-02,-8.329425304231877214e-04,-2.739166696723221127e-03,-8.445303927963160014e-04,1.714623789770090487e-02,-1.193177795445034620e-03,1.195921777364046144e-03
+7.642183496629867401e-02,-8.564953754864834635e-04,-2.818634904452179801e-03,-8.457098305043527246e-04,1.715352483622802071e-02,-1.226604667054521924e-03,1.230741855924761012e-03
+7.851132891185348683e-02,-8.800559340996076977e-04,-2.898297036854388230e-03,-8.469222833571377279e-04,1.716101237262300516e-02,-1.260015959599169186e-03,1.265657084568531758e-03
+8.060047800866380896e-02,-9.036245157497519408e-04,-2.978158486423565770e-03,-8.481677700239463385e-04,1.716870035891358837e-02,-1.293411330642683542e-03,1.300670430901173548e-03
+8.268927308046866709e-02,-9.272011769548336004e-04,-3.058225166814921171e-03,-8.494463131593951288e-04,1.717658864025111559e-02,-1.326789954980086422e-03,1.335783927309642160e-03
+8.477770495256206629e-02,-9.507860916928169816e-04,-3.138502210003116504e-03,-8.507579345421229953e-04,1.718467705762457415e-02,-1.360151218967513374e-03,1.371001115022226933e-03
+8.686576445183338824e-02,-9.743797267002827838e-04,-3.218995444800401475e-03,-8.521026555786789976e-04,1.719296544721874917e-02,-1.393495048923109941e-03,1.406324273012184720e-03
+8.895344240680749803e-02,-9.979822003319570919e-04,-3.299710301711926679e-03,-8.534804992351643785e-04,1.720145364372212965e-02,-1.426820708678483820e-03,1.441756472747350365e-03
+9.104072964768519793e-02,-1.021593686496570638e-03,-3.380652465281302685e-03,-8.548914875902361831e-04,1.721014147364721344e-02,-1.460127556730537839e-03,1.477300356110754784e-03
+9.312761700638348683e-02,-1.045214744061234106e-03,-3.461827792249374831e-03,-8.563356455782954180e-04,1.721902876350414960e-02,-1.493415663612263989e-03,1.512958290261068013e-03
+9.521409531657563929e-02,-1.068845025675288843e-03,-3.543241706310411993e-03,-8.578129985841011565e-04,1.722811533165174991e-02,-1.526683390435436619e-03,1.548733503553458967e-03
+9.730015541373175647e-02,-1.092485436102346536e-03,-3.624899880814387576e-03,-8.593235704638403898e-04,1.723740099335566156e-02,-1.559931435791468961e-03,1.584628806791105529e-03
+9.938578813515881738e-02,-1.116135728919289598e-03,-3.706808064222923658e-03,-8.608673867650154911e-04,1.724688556109108678e-02,-1.593158328996051440e-03,1.620646921788091929e-03
+1.014709843200409523e-01,-1.139796509621428819e-03,-3.788971982674042317e-03,-8.624444729667930885e-04,1.725656884009884456e-02,-1.626364185712595844e-03,1.656790665300192106e-03
+1.035557348094797714e-01,-1.163467988199776842e-03,-3.871397519899393447e-03,-8.640548567125559904e-04,1.726645063588254717e-02,-1.659548368678932491e-03,1.693062612945871962e-03
+1.056400304465344858e-01,-1.187150158548661927e-03,-3.954090295305102802e-03,-8.656985654835196338e-04,1.727653074449738563e-02,-1.692709823815538063e-03,1.729465891682814950e-03
+1.077238620762621668e-01,-1.210843783349012468e-03,-4.037056133862325542e-03,-8.673756270188885068e-04,1.728680896192438116e-02,-1.725848926443527892e-03,1.766003300013821659e-03
+1.098072205457580330e-01,-1.234548365140338130e-03,-4.120300842312509156e-03,-8.690860693325328044e-04,1.729728507593587816e-02,-1.758963676882858247e-03,1.802677728963747363e-03
+1.118900967041955297e-01,-1.258265259207792764e-03,-4.203830268260311080e-03,-8.708299215218726378e-04,1.730795887253334941e-02,-1.792055536681769418e-03,1.839492052576392972e-03
+1.139724814028665600e-01,-1.281993549372596394e-03,-4.287650325313009658e-03,-8.726072125846839759e-04,1.731883013411464681e-02,-1.825121695366286548e-03,1.876449082208582041e-03
+1.160543654952217590e-01,-1.305734262991884798e-03,-4.371766987659804655e-03,-8.744179740981739219e-04,1.732989863220092339e-02,-1.858162973266947729e-03,1.913551577983139962e-03
+1.181357398369105166e-01,-1.329487576841234574e-03,-4.456185824672474934e-03,-8.762622352680194153e-04,1.734116414267574194e-02,-1.891178582121363569e-03,1.950803122764939793e-03
+1.202165952858213066e-01,-1.353253719310698903e-03,-4.540913218635443671e-03,-8.781400270743392085e-04,1.735262643095076759e-02,-1.924167816454224605e-03,1.988205840391111569e-03
+1.222969227021216548e-01,-1.377033049320546942e-03,-4.625954816282097301e-03,-8.800513817672907073e-04,1.736428526241118320e-02,-1.957130201109192735e-03,2.025763300442992836e-03
+1.243767129482984818e-01,-1.400825501776910460e-03,-4.711316842148415354e-03,-8.819963306769395151e-04,1.737614039254336412e-02,-1.990064451149279115e-03,2.063478057114846807e-03
+1.264559568891981123e-01,-1.424631832443916954e-03,-4.797005212231259495e-03,-8.839749074837293741e-04,1.738819157554492845e-02,-2.022970805591873828e-03,2.101353312555555471e-03
+1.285346453920664100e-01,-1.448452074634343006e-03,-4.883025962658399215e-03,-8.859871441889504654e-04,1.740043856070249903e-02,-2.055848133355939883e-03,2.139392114541358112e-03
+1.306127693265889400e-01,-1.472286666890819645e-03,-4.969385473930291487e-03,-8.880330758711095116e-04,1.741288109236072135e-02,-2.088696047438080503e-03,2.177596937551978720e-03
+1.326903195649310474e-01,-1.496136047622796455e-03,-5.056089566354234066e-03,-8.901127378288263342e-04,1.742551891179574056e-02,-2.121514144251921731e-03,2.215971380313272801e-03
+1.347672869817779229e-01,-1.519999962695783510e-03,-5.143144421331029628e-03,-8.922261638115267131e-04,1.743835175350059996e-02,-2.154300705129798786e-03,2.254518439856185389e-03
+1.368436624543747093e-01,-1.543879512218878161e-03,-5.230556216334356599e-03,-8.943733888767927027e-04,1.745137934653739550e-02,-2.187056533739192756e-03,2.293241197522400462e-03
+1.389194368625665810e-01,-1.567774642547745123e-03,-5.318331368167119981e-03,-8.965544488917578936e-04,1.746460142176408867e-02,-2.219780253073588318e-03,2.332142364354276506e-03
+1.409946010888388224e-01,-1.591685369213220779e-03,-5.406476035287094764e-03,-8.987693820186436070e-04,1.747801769614572634e-02,-2.252470598271863814e-03,2.371225215868485222e-03
+1.430691460183568520e-01,-1.615612734605962209e-03,-5.494996462331903052e-03,-9.010182244699065958e-04,1.749162788867856394e-02,-2.285128211712244943e-03,2.410492947539102926e-03
+1.451430625390061624e-01,-1.639556455962637568e-03,-5.583899139892793337e-03,-9.033010155549880873e-04,1.750543171164027287e-02,-2.317751232877247707e-03,2.449948376911329374e-03
+1.472163415414325383e-01,-1.663517026883573700e-03,-5.673189998382753563e-03,-9.056177921493551710e-04,1.751942887080451886e-02,-2.350339238134346344e-03,2.489595456678494762e-03
+1.492889739190818854e-01,-1.687495063769858762e-03,-5.762875941218550783e-03,-9.079685926328134741e-04,1.753361906868500242e-02,-2.382892015126848295e-03,2.529436401616875964e-03
+1.513609505682403933e-01,-1.711490764425884110e-03,-5.852963167555715919e-03,-9.103534580717785639e-04,1.754800200557907197e-02,-2.415408547286646088e-03,2.569474835030804462e-03
+1.534322623880743364e-01,-1.735504242572523175e-03,-5.943458121888278538e-03,-9.127724275359395479e-04,1.756257737196484370e-02,-2.447887640721754885e-03,2.609714009925121446e-03
+1.555029002806702365e-01,-1.759536234522675715e-03,-6.034367384398601174e-03,-9.152255417897550748e-04,1.757734485659526516e-02,-2.480329248830538073e-03,2.650157016257833394e-03
+1.575728551510746644e-01,-1.783587030644808412e-03,-6.125697323312467336e-03,-9.177128411747368514e-04,1.759230414333539522e-02,-2.512732468691502435e-03,2.690807434447071555e-03
+1.596421179073343188e-01,-1.807656822017959493e-03,-6.217454750946396942e-03,-9.202343664784073257e-04,1.760745491139632107e-02,-2.545096190620858238e-03,2.731668106942177366e-03
+1.617106794605358555e-01,-1.831746149675642955e-03,-6.309646348128822128e-03,-9.227901615132020566e-04,1.762279683229555152e-02,-2.577419940171322369e-03,2.772742219060494919e-03
+1.637785307248458277e-01,-1.855855366076121895e-03,-6.402278604146685766e-03,-9.253802690576131643e-04,1.763832957722726025e-02,-2.609702868282315173e-03,2.814033413297590668e-03
+1.658456626175507931e-01,-1.879984547720707673e-03,-6.495358271931490901e-03,-9.280047320824084348e-04,1.765405280566396604e-02,-2.641943587050246418e-03,2.855544937502698961e-03
+1.679120660590968095e-01,-1.904135006279105433e-03,-6.588892209586248369e-03,-9.306635946969717653e-04,1.766996618001399552e-02,-2.674143002888640635e-03,2.897279943730332596e-03
+1.699777319731296532e-01,-1.928306039337004214e-03,-6.682886984360888669e-03,-9.333569015872530831e-04,1.768606935372002420e-02,-2.706298223905227469e-03,2.939242232362043999e-03
+1.720426512865346202e-01,-1.952498782101463231e-03,-6.777349583124012838e-03,-9.360846959477034422e-04,1.770236197376641799e-02,-2.738409781576889822e-03,2.981434921629587138e-03
+1.741068149294762724e-01,-1.976713410110461817e-03,-6.872287066230713115e-03,-9.388470255063776412e-04,1.771884368518670747e-02,-2.770476385594578851e-03,3.023861098255925615e-03
+1.761702138354382385e-01,-2.000950329563719668e-03,-6.967705988441605727e-03,-9.416439354444724628e-04,1.773551412662887142e-02,-2.802497155515635240e-03,3.066524904740322317e-03
+1.782328389412633773e-01,-2.025209875563389189e-03,-7.063613673370591761e-03,-9.444754725564932438e-04,1.775237292947255338e-02,-2.834471054804139391e-03,3.109429151519425404e-03
+1.802946811871931343e-01,-2.049493108528408804e-03,-7.160016925243382728e-03,-9.473416844022148958e-04,1.776941972541854625e-02,-2.866398383670192875e-03,3.152577733824909849e-03
+1.823557315169075932e-01,-2.073799422109403288e-03,-7.256922863855353421e-03,-9.502426185583311971e-04,1.778665413675911688e-02,-2.898276293746507130e-03,3.195974067749538915e-03
+1.844159808775653053e-01,-2.098130258460116724e-03,-7.354338512316174921e-03,-9.531783221932019689e-04,1.780407578037290772e-02,-2.930105753869023319e-03,3.239621864688496840e-03
+1.864754202198428135e-01,-2.122485419202056224e-03,-7.452271373926367196e-03,-9.561488452333196221e-04,1.782168427161016575e-02,-2.961884632922408191e-03,3.283524051067029850e-03
+1.885340404979746198e-01,-2.146865627270279527e-03,-7.550728357205754165e-03,-9.591542378786774075e-04,1.783947921588714680e-02,-2.993612502815207535e-03,3.327684785493958530e-03
+1.905918326697928489e-01,-2.171271178406507674e-03,-7.649716810992390412e-03,-9.621945497985123644e-04,1.785746021620316509e-02,-3.025288109737138322e-03,3.372107519182198157e-03
+1.926487876967668822e-01,-2.195702819109690996e-03,-7.749243896022186412e-03,-9.652698306146706214e-04,1.787562686880703536e-02,-3.056911019873601292e-03,3.416796175639565548e-03
+1.947048965440432433e-01,-2.220160886578347297e-03,-7.849317286717024875e-03,-9.683801316109904656e-04,1.789397876735422666e-02,-3.088480006257450939e-03,3.461753836065103573e-03
+1.967601501804849828e-01,-2.244645877208133362e-03,-7.949944219819397515e-03,-9.715255054084953211e-04,1.791251549739053886e-02,-3.119994114247859069e-03,3.506984524468447720e-03
+1.988145395787117298e-01,-2.269158345450616315e-03,-8.051132186023785692e-03,-9.747060042318636541e-04,1.793123664123097080e-02,-3.151452471079480011e-03,3.552491912032775759e-03
+2.008680557151390489e-01,-2.293698626359588592e-03,-8.152888549440276758e-03,-9.779216799409864123e-04,1.795014177387960208e-02,-3.182853765029721674e-03,3.598280031775772693e-03
+2.029206895700181590e-01,-2.318267497894966299e-03,-8.255221225562077783e-03,-9.811725861428473566e-04,1.796923046602920751e-02,-3.214197486331182158e-03,3.644352008491380784e-03
+2.049724321274755956e-01,-2.342865293761132138e-03,-8.358137602676510985e-03,-9.844587779686429698e-04,1.798850228382483513e-02,-3.245482262847743625e-03,3.690712082302081021e-03
+2.070232743755528182e-01,-2.367492320278018637e-03,-8.461645391049420553e-03,-9.877803092320646024e-04,1.800795678462117216e-02,-3.276706640510883804e-03,3.737364017376975690e-03
+2.090732073062456231e-01,-2.392149622759318171e-03,-8.565752353349141007e-03,-9.911372352381570862e-04,1.802759352232348802e-02,-3.307870519192554915e-03,3.784311608524270308e-03
+2.111222219155439450e-01,-2.416837539702887221e-03,-8.670466233355687141e-03,-9.945296116375629356e-04,1.804741204774613916e-02,-3.338972440332967952e-03,3.831558815968578394e-03
+2.131703092034713254e-01,-2.441556316165357458e-03,-8.775794899297899992e-03,-9.979574942276137495e-04,1.806741190203771733e-02,-3.370010735472601764e-03,3.879109497571863428e-03
+2.152174601741243254e-01,-2.466306909430133337e-03,-8.881746309494162328e-03,-1.001420940546989295e-03,1.808759262135821444e-02,-3.400985035150152352e-03,3.926967474387536156e-03
+2.172636658357122441e-01,-2.491089906953977782e-03,-8.988328254060143005e-03,-1.004920007259298489e-03,1.810795373953872495e-02,-3.431894238210427949e-03,3.975137016030083625e-03
+2.193089172005963650e-01,-2.515905628721141259e-03,-9.095549011529738617e-03,-1.008454752931710677e-03,1.812849478186953633e-02,-3.462736701372624588e-03,4.023621610861814052e-03
+2.213532052853296739e-01,-2.540754913384660543e-03,-9.203416308464020673e-03,-1.012025235981022214e-03,1.814921526861049822e-02,-3.493511710973227699e-03,4.072425917885810938e-03
+2.233965211106962168e-01,-2.565638252760717598e-03,-9.311938572934325489e-03,-1.015631514890056935e-03,1.817011471449143239e-02,-3.524217857592884355e-03,4.121553417737310911e-03
+2.254388557017504569e-01,-2.590556390084795130e-03,-9.421123832513488700e-03,-1.019273650368322792e-03,1.819119262863707828e-02,-3.554854154818778110e-03,4.171008480675500625e-03
+2.274802000878569097e-01,-2.615509795441482450e-03,-9.530980314091377567e-03,-1.022951701801467226e-03,1.821244851382856911e-02,-3.585419052609035030e-03,4.220795243381728241e-03
+2.295205453027292508e-01,-2.640499121738886477e-03,-9.641516454797743885e-03,-1.026665730385236622e-03,1.823388186596795052e-02,-3.615911289009383130e-03,4.270917590499793103e-03
+2.315598823844699505e-01,-2.665525432569231922e-03,-9.752740532640476190e-03,-1.030415798313200071e-03,1.825549217634163043e-02,-3.646330313642716543e-03,4.321379849098842645e-03
+2.335982023756095483e-01,-2.690589347858551601e-03,-9.864660822076577740e-03,-1.034201966547778229e-03,1.827727893408179899e-02,-3.676674682081612112e-03,4.372186499543390288e-03
+2.356354963231459820e-01,-2.715691026984427391e-03,-9.977285948741790658e-03,-1.038024297045116173e-03,1.829924161572108604e-02,-3.706942019998295083e-03,4.423341512875253456e-03
+2.376717552785839183e-01,-2.740832035335226402e-03,-1.009062449432246954e-02,-1.041882852843997080e-03,1.832137969536464656e-02,-3.737132518715644437e-03,4.474849094776863802e-03
+2.397069702979740813e-01,-2.766012567482339316e-03,-1.020468521913722726e-02,-1.045777699222614392e-03,1.834369264148217812e-02,-3.767243724023163019e-03,4.526713271591910337e-03
+2.417411324419524721e-01,-2.791233926658583803e-03,-1.031947646767742213e-02,-1.049708900331064801e-03,1.836617991598521113e-02,-3.797275180545357440e-03,4.578939008802514893e-03
+2.437742327757797534e-01,-2.816496308639028327e-03,-1.043500727161184201e-02,-1.053676520755089359e-03,1.838884097427211789e-02,-3.827224271543272407e-03,4.631530147242841206e-03
+2.458062623693803850e-01,-2.841801285289887770e-03,-1.055128625347605785e-02,-1.057680626501862186e-03,1.841167526479513608e-02,-3.857090870634388019e-03,4.684491463485421746e-03
+2.478372122973819258e-01,-2.867149295806184495e-03,-1.066832239192594227e-02,-1.061721282973660842e-03,1.843468223307234352e-02,-3.886872632610334759e-03,4.737827240461268095e-03
+2.498670736391540859e-01,-2.892541635735385117e-03,-1.078612468349699795e-02,-1.065798557157285331e-03,1.845786131543035100e-02,-3.916568717686886095e-03,4.791541907614442659e-03
+2.518958374788481391e-01,-2.917978812486095655e-03,-1.090470214769686218e-02,-1.069912516774316385e-03,1.848121194389639940e-02,-3.946176702943531532e-03,4.845640038191835128e-03
+2.539234949054359203e-01,-2.943462313617888158e-03,-1.102406374249506604e-02,-1.074063228423305328e-03,1.850473354100248824e-02,-3.975695892639123652e-03,4.900126515073866679e-03
+2.559500370127490432e-01,-2.968993008873421595e-03,-1.114421912014835515e-02,-1.078250761190308266e-03,1.852842552832857995e-02,-4.005124317182461383e-03,4.955005121735659920e-03
+2.579754548995178420e-01,-2.994572099313825859e-03,-1.126517724444831697e-02,-1.082475185676186292e-03,1.855228731627928465e-02,-4.034460507876648852e-03,5.010281143841417185e-03
+2.599997396694109231e-01,-3.020200716451069706e-03,-1.138694752179711389e-02,-1.086736570414962048e-03,1.857631831378343812e-02,-4.063702740883987606e-03,5.065959258291653777e-03
+2.620228824310735782e-01,-3.045879795290064307e-03,-1.150953938541771082e-02,-1.091034985019772403e-03,1.860051791699145890e-02,-4.092848795104304070e-03,5.122044323319654979e-03
+2.640448742981674202e-01,-3.071611315619829884e-03,-1.163296261691963707e-02,-1.095370499871206734e-03,1.862488552245290807e-02,-4.121898276096005141e-03,5.178540786991425726e-03
+2.660657063894090180e-01,-3.097395781345367062e-03,-1.175722690014804894e-02,-1.099743187427946892e-03,1.864942051674068485e-02,-4.150847886720203384e-03,5.235453535989888912e-03
+2.680853698286093101e-01,-3.123235285375726335e-03,-1.188234204798340257e-02,-1.104153120295759110e-03,1.867412228114667672e-02,-4.179697171720024064e-03,5.292787483387090090e-03
+2.701038557447121846e-01,-3.149130436014470173e-03,-1.200831778113166382e-02,-1.108600370743461272e-03,1.869899018988146686e-02,-4.208442753404938001e-03,5.350547997297696451e-03
+2.721211552718337257e-01,-3.175083404029088816e-03,-1.213516438771245105e-02,-1.113085011323392513e-03,1.872402360886679315e-02,-4.237084044396746164e-03,5.408739678929811757e-03
+2.741372595493007380e-01,-3.201095279323006287e-03,-1.226289197127414417e-02,-1.117607116886786987e-03,1.874922190152645413e-02,-4.265618292288584495e-03,5.467367787839788086e-03
+2.761521597216903823e-01,-3.227167731596447455e-03,-1.239151095934471793e-02,-1.122166761913039349e-03,1.877458441951456586e-02,-4.294043695516606106e-03,5.526437304459008715e-03
+2.781658469388685329e-01,-3.253302789931375102e-03,-1.252103172847454475e-02,-1.126764022970456851e-03,1.880011051384279305e-02,-4.322358993440493166e-03,5.585953649868857784e-03
+2.801783123560285804e-01,-3.279501549964902816e-03,-1.265146467726543066e-02,-1.131398974034228096e-03,1.882579952598387868e-02,-4.350561047437315546e-03,5.645922565914341697e-03
+2.821895471337307892e-01,-3.305766133547788626e-03,-1.278282102450566636e-02,-1.136071691905698750e-03,1.885165079028925175e-02,-4.378648522033447461e-03,5.706348636151805688e-03
+2.841995424379405444e-01,-3.332097903868379892e-03,-1.291511139696470412e-02,-1.140782254786974195e-03,1.887766363540739276e-02,-4.406618546958808273e-03,5.767237951372602804e-03
+2.862082894400676536e-01,-3.358498777152338702e-03,-1.304834704288261420e-02,-1.145530739558592824e-03,1.890383737997966551e-02,-4.434469187159195171e-03,5.828595852089274790e-03
+2.882157793170046500e-01,-3.384970890225041853e-03,-1.318253925498061481e-02,-1.150317225658048547e-03,1.893017134023356265e-02,-4.462198832218935811e-03,5.890428027938325768e-03
+2.902220032511660386e-01,-3.411515774733745419e-03,-1.331769948770192974e-02,-1.155141791824070008e-03,1.895666482438981143e-02,-4.489804663335879298e-03,5.952740314918003768e-03
+2.922269524305266541e-01,-3.438135246415233420e-03,-1.345383946563989686e-02,-1.160004518524812784e-03,1.898331713244343424e-02,-4.517284337864835818e-03,6.015538508686067189e-03
+2.942306180486604639e-01,-3.464831225085071489e-03,-1.359097103979253068e-02,-1.164905486047389119e-03,1.901012755658492218e-02,-4.544635671804657905e-03,6.078828649661955838e-03
+2.962329913047793695e-01,-3.491605931472911012e-03,-1.372910626033580633e-02,-1.169844775997911724e-03,1.903709538565484527e-02,-4.571857031038709281e-03,6.142616900994553319e-03
+2.982340634037716764e-01,-3.518460637263128715e-03,-1.386825744853929614e-02,-1.174822469128462702e-03,1.906421989917348944e-02,-4.598945012943618516e-03,6.206909426614936991e-03
+3.002338255562408409e-01,-3.545397750651004942e-03,-1.400843741638303080e-02,-1.179838650169047183e-03,1.909150036944676221e-02,-4.625898384205780721e-03,6.271711990453702687e-03
+3.022322689785442162e-01,-3.572418635563769096e-03,-1.414965848887500544e-02,-1.184893402715382106e-03,1.911893606376952395e-02,-4.652714021962909424e-03,6.337031798382530140e-03
+3.042293848928312450e-01,-3.599525214700919924e-03,-1.429193387496174861e-02,-1.189986810475438363e-03,1.914652623887069458e-02,-4.679389952185961099e-03,6.402874934011033685e-03
+3.062251645270823164e-01,-3.626719179174510768e-03,-1.443527671766181575e-02,-1.195118958997350720e-03,1.917427014729932572e-02,-4.705923902850792269e-03,6.469248142625340717e-03
+3.082195991151471803e-01,-3.654002171358969927e-03,-1.457970039984499838e-02,-1.200289934271880346e-03,1.920216703298226885e-02,-4.732313684253857712e-03,6.536158257539495184e-03
+3.102126798967836940e-01,-3.681375534508722625e-03,-1.472521863331364630e-02,-1.205499822480470083e-03,1.923021613324926715e-02,-4.758556758929216070e-03,6.603612028668063606e-03
+3.122043981176959027e-01,-3.708840972833925970e-03,-1.487184534534790985e-02,-1.210748712710686030e-03,1.925841667713653130e-02,-4.784651520765452470e-03,6.671616326642588850e-03
+3.141947450295727307e-01,-3.736399474374297279e-03,-1.501959430575058006e-02,-1.216036691896171633e-03,1.928676788917902535e-02,-4.810595318158297426e-03,6.740178828370468889e-03
+3.161837118901262844e-01,-3.764052015895661893e-03,-1.516848017255219630e-02,-1.221363849495005855e-03,1.931526898067109296e-02,-4.836385820127860058e-03,6.809306037592514384e-03
+3.181712899631304881e-01,-3.791800204536007607e-03,-1.531851714406738500e-02,-1.226730275383613360e-03,1.934391916159252170e-02,-4.862022264307234651e-03,6.879005787707916791e-03
+3.201574705184592196e-01,-3.819644155207674915e-03,-1.546972012839145476e-02,-1.232136060839244662e-03,1.937271763258678614e-02,-4.887501522273633887e-03,6.949285018025487710e-03
+3.221422448321247800e-01,-3.847584635132070014e-03,-1.562210362223348699e-02,-1.237581296683225047e-03,1.940166358499087126e-02,-4.912822165651816765e-03,7.020151838465325464e-03
+3.241256041863160853e-01,-3.875622004338817508e-03,-1.577568313350490373e-02,-1.243066076190174965e-03,1.943075620335231804e-02,-4.937982523671924882e-03,7.091612817826401828e-03
+3.261075398694370242e-01,-3.903756586170370902e-03,-1.593047333467744603e-02,-1.248590494042535991e-03,1.945999466706565054e-02,-4.962981420494836857e-03,7.163676394317365984e-03
+3.280880431761448723e-01,-3.931987715422747588e-03,-1.608648966275473546e-02,-1.254154644201743544e-03,1.948937814539839636e-02,-4.987816430417719646e-03,7.236349816016004327e-03
+3.300671054073883171e-01,-3.960315118174478725e-03,-1.624374714720637491e-02,-1.259758621882451062e-03,1.951890579879247620e-02,-5.012486502133909998e-03,7.309641271030466701e-03
+3.320447178704457047e-01,-3.988737932947247246e-03,-1.640226125569029317e-02,-1.265402522272325754e-03,1.954857677941507568e-02,-5.036990156094160904e-03,7.383558226080721119e-03
+3.340208718789632880e-01,-4.017255650743878012e-03,-1.656204731466935989e-02,-1.271086441841994680e-03,1.957839023464142195e-02,-5.061327274657201510e-03,7.458108428795543095e-03
+3.359955587529934729e-01,-4.045866304278686841e-03,-1.672312097994621202e-02,-1.276810480809485509e-03,1.960834530297229233e-02,-5.085495728109004285e-03,7.533298933276208822e-03
+3.379687698190326217e-01,-4.074568696544362753e-03,-1.688549667227992601e-02,-1.282574736879086696e-03,1.963844111360040293e-02,-5.109495567007085737e-03,7.609138932779381179e-03
+3.399404964100594673e-01,-4.103360350375113812e-03,-1.704919033393751532e-02,-1.288379308412811869e-03,1.966867678872531722e-02,-5.133325182868378640e-03,7.685634482499853655e-03
+3.419107298655730265e-01,-4.132239445145761622e-03,-1.721421643202647456e-02,-1.294224298136411244e-03,1.969905144030507926e-02,-5.156984934738406343e-03,7.762794002036199278e-03
+3.438794615316307368e-01,-4.161203118349170345e-03,-1.738058999084963335e-02,-1.300109806529761410e-03,1.972956417614721292e-02,-5.180473948776838887e-03,7.840624340887128452e-03
+3.458466827608864258e-01,-4.190248275709954583e-03,-1.754832510781439633e-02,-1.306035936643093693e-03,1.976021409019192848e-02,-5.203791608752486933e-03,7.919133430685142139e-03
+3.478123849126281697e-01,-4.219372295144757472e-03,-1.771743621727813953e-02,-1.312002790075921833e-03,1.979100027247216245e-02,-5.226938833622729423e-03,7.998327778967426541e-03
+3.497765593528163741e-01,-4.248571157034212488e-03,-1.788793673707415877e-02,-1.318010471286545048e-03,1.982192180393331157e-02,-5.249914514288383204e-03,8.078214865056717459e-03
+3.517391974541219102e-01,-4.277841290162474885e-03,-1.805984042013521543e-02,-1.324059083799621582e-03,1.985297775336350087e-02,-5.272718900265168859e-03,8.158800460053221987e-03
+3.537002905959633070e-01,-4.307179325425763115e-03,-1.823315971548822995e-02,-1.330148735832762307e-03,1.988416718559167518e-02,-5.295353045804890302e-03,8.240091551283445712e-03
+3.556598301645456095e-01,-4.336580652612918169e-03,-1.840790686467094761e-02,-1.336279531034468030e-03,1.991548915595168362e-02,-5.317815992167124542e-03,8.322094136532537378e-03
+3.576178075528972378e-01,-4.366041401954122089e-03,-1.858409355007026309e-02,-1.342451578467851458e-03,1.994694270763379754e-02,-5.340108350929254066e-03,8.404813733644118240e-03
+3.595742141609086784e-01,-4.395557631205019265e-03,-1.876173023525806380e-02,-1.348664982998729143e-03,1.997852687901416693e-02,-5.362230625923267158e-03,8.488256469762669315e-03
+3.615290413953694548e-01,-4.425125034595592255e-03,-1.894082753716589893e-02,-1.354919853026546970e-03,2.001024069787052731e-02,-5.384182490755384624e-03,8.572426365037367699e-03
+3.634822806700064302e-01,-4.454739877286584618e-03,-1.912139442817336124e-02,-1.361216297371470926e-03,2.004208318316403475e-02,-5.405964340647305584e-03,8.657328566658677171e-03
+3.654339234055213326e-01,-4.484398517520062945e-03,-1.930343950441492512e-02,-1.367554425048632134e-03,2.007405334638867958e-02,-5.427576171455564952e-03,8.742966842815017015e-03
+3.673839610296283364e-01,-4.514097384088833556e-03,-1.948697015189535323e-02,-1.373934345730439118e-03,2.010615018913394780e-02,-5.449017287520364732e-03,8.829345029662300989e-03
+3.693323849770918654e-01,-4.543833671916912460e-03,-1.967199289172718094e-02,-1.380356168900975355e-03,2.013837270301251092e-02,-5.470287330356697619e-03,8.916466282209559169e-03
+3.712791866897643955e-01,-4.573605128563596085e-03,-1.985851329519715269e-02,-1.386820004212532863e-03,2.017071987230598454e-02,-5.491385577218271981e-03,9.004333149100518172e-03
+3.732243576166232035e-01,-4.603409933465224535e-03,-2.004653585218356404e-02,-1.393325961969171800e-03,2.020319067133567736e-02,-5.512310388014069092e-03,9.092947755833687834e-03
+3.751678892138092802e-01,-4.633247492264936339e-03,-2.023606421503663239e-02,-1.399874152778653180e-03,2.023578406472531857e-02,-5.533060342900301271e-03,9.182311308176845446e-03
+3.771097729446636349e-01,-4.663118278066971309e-03,-2.042710061673507285e-02,-1.406464688575088506e-03,2.026849901132071752e-02,-5.553633568697126377e-03,9.272425171620793036e-03
+3.790500002797654311e-01,-4.693023162667986879e-03,-2.061964618587249637e-02,-1.413097677075070367e-03,2.030133445827233402e-02,-5.574026085385906833e-03,9.363290300777560651e-03
+3.809885626969692352e-01,-4.722965585388918738e-03,-2.081370186527512628e-02,-1.419773228766377194e-03,2.033428934381279612e-02,-5.594235451734704684e-03,9.454905572073086378e-03
+3.829254516814425413e-01,-4.752949485975274148e-03,-2.100926681408528049e-02,-1.426491455620410107e-03,2.036736260054123823e-02,-5.614256448450864745e-03,9.547270874091967272e-03
+3.848606587257031308e-01,-4.782980841344887900e-03,-2.120633957791907484e-02,-1.433252467782582728e-03,2.040055314687393151e-02,-5.634083501271188420e-03,9.640385050788981974e-03
+3.867941753296565421e-01,-4.813067998530910600e-03,-2.140491764137301620e-02,-1.440056375519420569e-03,2.043385989605457642e-02,-5.653710832142023850e-03,9.734246924335646436e-03
+3.887259930006330966e-01,-4.843220916094862806e-03,-2.160499787274338182e-02,-1.446903286550228025e-03,2.046728175221788557e-02,-5.673130552508873041e-03,9.828854695843458958e-03
+3.906561032534255906e-01,-4.873452058477463662e-03,-2.180657696777300175e-02,-1.453793311307852642e-03,2.050081761046589415e-02,-5.692333848099905350e-03,9.924205401943969607e-03
+3.925844976103263773e-01,-4.903776622846215891e-03,-2.200965034432225340e-02,-1.460726559603689858e-03,2.053446635748726670e-02,-5.711310909099772716e-03,1.002029733381388718e-02
+3.945111676011643920e-01,-4.934212233350155491e-03,-2.221421381722009716e-02,-1.467703139844651590e-03,2.056822687311317746e-02,-5.730049863213631731e-03,1.011712730436526916e-02
+3.964361047633429003e-01,-4.964779343952270205e-03,-2.242026243629438934e-02,-1.474723161255414019e-03,2.060209802481694982e-02,-5.748537044475264361e-03,1.021469329878035674e-02
+3.983593006418760796e-01,-4.995502319417923290e-03,-2.262779189527363080e-02,-1.481786729621698039e-03,2.063607867419705449e-02,-5.766758546364767965e-03,1.031299237152089737e-02
+4.002807467894267113e-01,-5.026408383559635012e-03,-2.283679824096510741e-02,-1.488893954815502085e-03,2.067016767689829060e-02,-5.784697795757551969e-03,1.041202179951062658e-02
+4.022004347663427071e-01,-5.057528220010591713e-03,-2.304727755053439381e-02,-1.496044942480974967e-03,2.070436388068603792e-02,-5.802336251492031145e-03,1.051178036659047681e-02
+4.041183561406945790e-01,-5.088896058250748713e-03,-2.325922781155729049e-02,-1.503239799452581790e-03,2.073866612355868000e-02,-5.819653171857695761e-03,1.061226558996369862e-02
+4.060345024883124099e-01,-5.120550241151038996e-03,-2.347264746303964755e-02,-1.510478632478595967e-03,2.077307323597530828e-02,-5.836626326923568218e-03,1.071347728106932309e-02
+4.079488653928228237e-01,-5.152533045745131580e-03,-2.368753737458891701e-02,-1.517761547754844370e-03,2.080758404088690894e-02,-5.853231361140912720e-03,1.081541473312151377e-02
+4.098614364456857340e-01,-5.184891041386194450e-03,-2.390389983310186456e-02,-1.525088652550366354e-03,2.084219735686541203e-02,-5.869442222598809066e-03,1.091807959276704966e-02
+4.117722072462318139e-01,-5.217674061140877353e-03,-2.412174024985065321e-02,-1.532460052625222568e-03,2.087691199332612768e-02,-5.885229060987146391e-03,1.102147370205019787e-02
+4.136811694016988561e-01,-5.250937173458166893e-03,-2.434106663462694276e-02,-1.539875856794813060e-03,2.091172675150293134e-02,-5.900561828910963653e-03,1.112560119684509116e-02
+4.155883145272687984e-01,-5.284739298218006816e-03,-2.456189044601890178e-02,-1.547336170863165914e-03,2.094664043111839000e-02,-5.915407679577401427e-03,1.123046809071205979e-02
+4.174936342461046945e-01,-5.319142808050428446e-03,-2.478422751859379186e-02,-1.554841103325234385e-03,2.098165182211317314e-02,-5.929730309428947119e-03,1.133608177636367606e-02
+4.193971201893874623e-01,-5.354215148315637392e-03,-2.500809788617461282e-02,-1.562390762803617019e-03,2.101675970772012847e-02,-5.943493194200972357e-03,1.144245265342985936e-02
+4.212987639963524100e-01,-5.390027268887524585e-03,-2.523352704703000354e-02,-1.569985260457716679e-03,2.105196286940767478e-02,-5.956656965253691489e-03,1.154959310264423722e-02
+4.231985573143263735e-01,-5.426653576945308401e-03,-2.546054581195857092e-02,-1.577624708771401939e-03,2.108726008060220203e-02,-5.969179765903306727e-03,1.165751915961044625e-02
+4.250964917987639091e-01,-5.464172047514387169e-03,-2.568919140465522263e-02,-1.585309221617238907e-03,2.112265010825969611e-02,-5.981018041508984889e-03,1.176624987426397491e-02
+4.269925591132845422e-01,-5.502664361830934131e-03,-2.591950804700784289e-02,-1.593038914567930899e-03,2.115813171320848549e-02,-5.992127529541242323e-03,1.187580765950705178e-02
+4.288867509297087377e-01,-5.542214985064179700e-03,-2.615154762502591712e-02,-1.600813908749112611e-03,2.119370365748197982e-02,-6.002462417554685348e-03,1.198621850108808930e-02
+4.307790589280949822e-01,-5.582909419594278208e-03,-2.638536965093086356e-02,-1.608634327494432676e-03,2.122936469180241800e-02,-6.011973120580636071e-03,1.209751348678393505e-02
+4.326694747967760879e-01,-5.624836943339354793e-03,-2.662104254773439704e-02,-1.616500297232027284e-03,2.126511356186557011e-02,-6.020612640999317700e-03,1.220972784174256635e-02
+4.345579902323956079e-01,-5.668086888308678482e-03,-2.685864418269194934e-02,-1.624411947509670851e-03,2.130094901223520201e-02,-6.028330996688483628e-03,1.232290134248186994e-02
+4.364445969399446956e-01,-5.712749865396877028e-03,-2.709826247034092736e-02,-1.632369416765538797e-03,2.133686978090784669e-02,-6.035079109234661732e-03,1.243707855465810315e-02
+4.383292866327981319e-01,-5.758915598417672053e-03,-2.733999499872560510e-02,-1.640372845343969054e-03,2.137287460216981405e-02,-6.040806525662572625e-03,1.255231083860073107e-02
+4.402120510327509062e-01,-5.806673513138604567e-03,-2.758395149233434929e-02,-1.648422382802023668e-03,2.140896220178114748e-02,-6.045464500283435731e-03,1.266865295347319696e-02
+4.420928818700546881e-01,-5.856111188039061534e-03,-2.783025168384414133e-02,-1.656518184591339662e-03,2.144513130547208088e-02,-6.049005264033020801e-03,1.278616816724268898e-02
+4.439717708834537979e-01,-5.907312558797056003e-03,-2.807902858522715386e-02,-1.664660414443093006e-03,2.148138063017760932e-02,-6.051381029419427605e-03,1.290492311909763150e-02
+4.458487098202219556e-01,-5.960358683388444594e-03,-2.833042594608248255e-02,-1.672849245481478802e-03,2.151770888736635398e-02,-6.052547992406184275e-03,1.302499343531959321e-02
+4.477236904361980296e-01,-6.015324909314569667e-03,-2.858460144709046649e-02,-1.681084859276921542e-03,2.155411478580117335e-02,-6.052463781524565663e-03,1.314645842302762484e-02
+4.495967044958226744e-01,-6.072280093664014257e-03,-2.884172407942784538e-02,-1.689367450776781783e-03,2.159059702419018414e-02,-6.051088943469162644e-03,1.326940647322343361e-02
+4.514677437721743569e-01,-6.131286216320160250e-03,-2.910197651764229870e-02,-1.697697221782993628e-03,2.162715429642985202e-02,-6.048389346116053722e-03,1.339393088466632117e-02
+4.533368000470053838e-01,-6.192395476053540182e-03,-2.936555379507025179e-02,-1.706074389606007744e-03,2.166378528950165319e-02,-6.044334032813460447e-03,1.352013238003166654e-02
+4.552038651107780942e-01,-6.255650296386309345e-03,-2.963266396410517423e-02,-1.714499182193093916e-03,2.170048868220618432e-02,-6.038898690570141010e-03,1.364811763485383571e-02
+4.570689307627009978e-01,-6.321080458658900544e-03,-2.990352725982366525e-02,-1.722971843406477261e-03,2.173726314557982223e-02,-6.032063875395069635e-03,1.377800032255495181e-02
+4.589319888107645240e-01,-6.388702621438057706e-03,-3.017837623911077960e-02,-1.731492629524881499e-03,2.177410733872935794e-02,-6.023817838455475075e-03,1.390990001608701128e-02
+4.607930310717773814e-01,-6.458518054089089008e-03,-3.045745483372435183e-02,-1.740061812620949909e-03,2.181101991031266646e-02,-6.014156109544320332e-03,1.404394278714514444e-02
+4.626520493714022519e-01,-6.530511995257185821e-03,-3.074101869017712915e-02,-1.748679680710222509e-03,2.184799949799701504e-02,-6.003084210499970430e-03,1.418025903087754246e-02
+4.645090355441917063e-01,-6.604650304558434999e-03,-3.102933280212347850e-02,-1.757346541439927523e-03,2.188504472885508673e-02,-5.990615350608158307e-03,1.431898598322700437e-02
+4.663639814336241196e-01,-6.680879403990288949e-03,-3.132267186821040966e-02,-1.766062715856822488e-03,2.192215421204878403e-02,-5.976774318846795990e-03,1.446026472291815981e-02
+4.682168788921395874e-01,-6.759123691730986420e-03,-3.162131854876392562e-02,-1.774828546364182625e-03,2.195932654537544743e-02,-5.961596633843657678e-03,1.460424068828313632e-02
+4.700677197811755081e-01,-6.839284006040245723e-03,-3.192556240167153392e-02,-1.783644391986261403e-03,2.199656030541717947e-02,-5.945129625428148025e-03,1.475106247887521260e-02
+4.719164959712024987e-01,-6.921236653020058915e-03,-3.223569809379505519e-02,-1.792510634341951776e-03,2.203385405374771663e-02,-5.927434498959946799e-03,1.490088155741225127e-02
+4.737631993417600329e-01,-7.004830707474545178e-03,-3.255202359965397862e-02,-1.801427671194487119e-03,2.207120633008117019e-02,-5.908585047288287921e-03,1.505385150534348411e-02
+4.756078217814919684e-01,-7.089887399943956003e-03,-3.287483881954184756e-02,-1.810395922310852491e-03,2.210861565270205373e-02,-5.888670117441235918e-03,1.521012601091809371e-02
+4.774503551881826846e-01,-7.176198161378245788e-03,-3.320444277767425206e-02,-1.819415825683168546e-03,2.214608051332387154e-02,-5.867793358839409196e-03,1.536985903186826805e-02
+4.792907914687918880e-01,-7.263523996324780518e-03,-3.354113200819298229e-02,-1.828487841031582435e-03,2.218359938018107241e-02,-5.846075213624860913e-03,1.553320223938686913e-02
+4.811291225394909166e-01,-7.351593185708179753e-03,-3.388519727822560113e-02,-1.837612447641295962e-03,2.222117069130850886e-02,-5.823651476787685723e-03,1.570030508855999388e-02
+4.829653403256978783e-01,-7.440101590088470454e-03,-3.423692111234542812e-02,-1.846790143632214476e-03,2.225879285638718980e-02,-5.800676383519312064e-03,1.587131289858685623e-02
+4.847994367621128453e-01,-7.528710139598149970e-03,-3.459657526743380868e-02,-1.856021444705333121e-03,2.229646425094027051e-02,-5.777320019979813662e-03,1.604636454711796251e-02
+4.866314037927538805e-01,-7.617045758319038878e-03,-3.496441707336248733e-02,-1.865306887454019945e-03,2.233418321553738359e-02,-5.753771718368806687e-03,1.622559182488410995e-02
+4.884612333709922316e-01,-7.704699324493669531e-03,-3.534068605180852585e-02,-1.874647022447709392e-03,2.237194805096912736e-02,-5.730237384624308102e-03,1.640911782044719713e-02
+4.902889174595871369e-01,-7.791227726540060280e-03,-3.572560123203804422e-02,-1.884042418437585631e-03,2.240975702048588966e-02,-5.706943940843874075e-03,1.659705357282786203e-02
+4.921144480307220181e-01,-7.876150737006005001e-03,-3.611935637427192425e-02,-1.893493658997974461e-03,2.244760834737983754e-02,-5.684133431282939117e-03,1.678949824975733973e-02
+4.939378170660390643e-01,-7.958953823686033824e-03,-3.652211684529899083e-02,-1.903001340534749139e-03,2.248550020531381657e-02,-5.662067585671452102e-03,1.698653586440346946e-02
+4.957590165566744811e-01,-8.039087674043005755e-03,-3.693401540274449113e-02,-1.912566070061653837e-03,2.252343072367533547e-02,-5.641025491263996529e-03,1.718823371350069670e-02
+4.975780385032944064e-01,-8.115969335281005004e-03,-3.735514827057275639e-02,-1.922188463891480589e-03,2.256139798034636351e-02,-5.621303512922369745e-03,1.739463998973126563e-02
+4.993948749161288836e-01,-8.188983473234772922e-03,-3.778557110213057874e-02,-1.931869145880659886e-03,2.259940000234394314e-02,-5.603214603447381065e-03,1.760578137556912184e-02
+5.012095178150079988e-01,-8.257483989277723466e-03,-3.822529398373108317e-02,-1.941608744594590678e-03,2.263743476126854962e-02,-5.587087404326728478e-03,1.782166226355719890e-02
+5.030219592293964093e-01,-8.320796264870273379e-03,-3.867427806104966476e-02,-1.951407886979981705e-03,2.267550017352020983e-02,-5.573265597326365443e-03,1.804226082300748601e-02
+5.048321911984281485e-01,-8.378219005290508600e-03,-3.913243069433854776e-02,-1.961267199956767335e-03,2.271359409702352131e-02,-5.562105535515687697e-03,1.826752778231116939e-02
+5.066402057709425977e-01,-8.429027447105679263e-03,-3.959960140188734101e-02,-1.971187302270768424e-03,2.275171432591331297e-02,-5.553975425329500844e-03,1.849738376422161573e-02
+5.084459950055177924e-01,-8.472477722810206613e-03,-4.007557769101675882e-02,-1.981168806620262068e-03,2.278985859535497330e-02,-5.549255638216870311e-03,1.873171694133810017e-02
+5.102495509705069487e-01,-8.507808253315127939e-03,-4.056008020055430763e-02,-1.991212309470677400e-03,2.282802457904871754e-02,-5.548332381226748523e-03,1.897038217645119831e-02
+5.120508657440723255e-01,-8.534245734409414558e-03,-4.105275959699721672e-02,-2.001318390149890887e-03,2.286620988118102590e-02,-5.551598891296783377e-03,1.921319715730549285e-02
+5.138499314142201957e-01,-8.551009496902892137e-03,-4.155319186720575486e-02,-2.011487603281723793e-03,2.290441204233718830e-02,-5.559452473392495660e-03,1.945994191405672410e-02
+5.156467400788354860e-01,-8.557316223297129626e-03,-4.206087548787569574e-02,-2.021720477501651788e-03,2.294262853644034664e-02,-5.572291091862373660e-03,1.971035529166049668e-02
+5.174412838457173036e-01,-8.552385578971114996e-03,-4.257522701868181325e-02,-2.032017507785019439e-03,2.298085676944017566e-02,-5.590510559449345281e-03,1.996413495361441803e-02
+5.192335548326121319e-01,-8.535447156746053463e-03,-4.309557878265419389e-02,-2.042379152586382748e-03,2.301909408136680601e-02,-5.614503078189769618e-03,2.022093415213215353e-02
+5.210235451672501350e-01,-8.505745348521527929e-03,-4.362117480364286620e-02,-2.052805823855410969e-03,2.305733774780553344e-02,-5.644650797961844359e-03,2.048036255375485643e-02
+5.228112469873781309e-01,-8.462547804161628731e-03,-4.415117005308154263e-02,-2.063297883258384362e-03,2.309558497518103201e-02,-5.681325028984720964e-03,2.074198175732702709e-02
+5.245966524407953413e-01,-8.405152151789229559e-03,-4.468462694031544863e-02,-2.073855637766952413e-03,2.313383290968825182e-02,-5.724881155989104686e-03,2.100530698755850509e-02
+5.263797536853872527e-01,-8.332893540265624843e-03,-4.522051439945604101e-02,-2.084479331257903245e-03,2.317207863142384944e-02,-5.775654097641230028e-03,2.126980503950222021e-02
+5.281605428891603671e-01,-8.245153950127780129e-03,-4.575770666602817921e-02,-2.095169138448770701e-03,2.321031916287885979e-02,-5.833956139074831897e-03,2.153489403477737082e-02
+5.299390122302762851e-01,-8.141369294101889406e-03,-4.629498289997578675e-02,-2.105925160186611182e-03,2.324855146917602020e-02,-5.900069739483487438e-03,2.179994293188593918e-02
+5.317151538970863456e-01,-8.021039372316574045e-03,-4.683102686977198836e-02,-2.116747414246006407e-03,2.328677246016547936e-02,-5.974244886155550931e-03,2.206427237086706905e-02
+5.334889600881657090e-01,-7.883737398767191179e-03,-4.736442833135730279e-02,-2.127635831227694865e-03,2.332497899814150316e-02,-6.056694921818323550e-03,2.232715389515103899e-02
+5.352604230123479967e-01,-7.729117922246329410e-03,-4.789368374494799852e-02,-2.138590245346406061e-03,2.336316790361677234e-02,-6.147588734034384328e-03,2.258781207203454378e-02
+5.370295348887590414e-01,-7.556927561396148928e-03,-4.841719956775283884e-02,-2.149610392599756673e-03,2.340133595041684125e-02,-6.247047698303609745e-03,2.284542353798750536e-02
+5.387962879468511934e-01,-7.367016037720833938e-03,-4.893329313987086759e-02,-2.160695899210036963e-03,2.343947988501373855e-02,-6.355142773449088886e-03,2.309912240341342737e-02
+5.405606744264380703e-01,-7.159343170136812061e-03,-4.944019964253687505e-02,-2.171846278670488441e-03,2.347759642527044721e-02,-6.471883743663565947e-03,2.334799620406366807e-02
+5.423226865777274197e-01,-6.933992250777284289e-03,-4.993607310440625197e-02,-2.183060929671061610e-03,2.351568226883066909e-02,-6.597220280697330824e-03,2.359109502230406427e-02
+5.440823166613565354e-01,-6.691177452073140616e-03,-5.041899422753531174e-02,-2.194339119598223697e-03,2.355373410378035562e-02,-6.731031815556150558e-03,2.382742990132969557e-02
+5.458395569484247867e-01,-6.431255698261235595e-03,-5.088697627098484610e-02,-2.205679990074687052e-03,2.359174861409204030e-02,-6.873125936389397339e-03,2.405597704072734433e-02
+5.475943997205290348e-01,-6.154735155864461682e-03,-5.133797176666511641e-02,-2.217082545306781061e-03,2.362972249225550256e-02,-7.023230765808836581e-03,2.427568262089667070e-02
+5.493468372697968283e-01,-5.862285733978941926e-03,-5.176988166442862693e-02,-2.228545653051090380e-03,2.366765244550439809e-02,-7.180991596890741978e-03,2.448546529997237126e-02
+5.510968618989195988e-01,-5.554748277989330317e-03,-5.218056318092108292e-02,-2.240068036763580035e-03,2.370553520969506320e-02,-7.345965748236548548e-03,2.468422339439502525e-02
+5.528444659211878554e-01,-5.233143738284640883e-03,-5.256784130853529857e-02,-2.251648273694236736e-03,2.374336756179383154e-02,-7.517618225898701004e-03,2.487083746291443923e-02
+5.545896416605240464e-01,-4.898680754711357520e-03,-5.292951879001729643e-02,-2.263284792800318405e-03,2.378114632910645690e-02,-7.695315459241471555e-03,2.504417803261203140e-02
+5.563323814515162002e-01,-4.552765501506348018e-03,-5.326338894556995052e-02,-2.274975873190434282e-03,2.381886840219986715e-02,-7.878324514641751874e-03,2.520311022559760389e-02
+5.580726776394524524e-01,-4.197007517668454972e-03,-5.356724760321823764e-02,-2.286719642653747483e-03,2.385653075206659729e-02,-8.065806219859252349e-03,2.534650228520387086e-02
+5.598105225803533536e-01,-3.833227398022070846e-03,-5.383890786309698806e-02,-2.298514075651529077e-03,2.389413043905134343e-02,-8.256813415854937291e-03,2.547323095972323093e-02
+5.615459086410068412e-01,-3.463462421118819988e-03,-5.407621442295477976e-02,-2.310356996833883188e-03,2.393166463069095959e-02,-8.450287180827022529e-03,2.558218981629637942e-02
+5.632788281990006585e-01,-3.089972112715807899e-03,-5.427705925165200729e-02,-2.322246082430124824e-03,2.396913061462675743e-02,-8.645054975902576738e-03,2.567229705457151415e-02
+5.650092736427564377e-01,-2.715241927480746467e-03,-5.443939738720886962e-02,-2.334178861645709330e-03,2.400652581629260027e-02,-8.839827504919071585e-03,2.574250501622542611e-02
+5.667372373715630074e-01,-2.341986827127301007e-03,-5.456126508117736806e-02,-2.346152719762767836e-03,2.404384781100373306e-02,-9.033197819509689031e-03,2.579180710977529642e-02
+5.684627117956096987e-01,-1.973153207730181206e-03,-5.464079679312977555e-02,-2.358164906103779948e-03,2.408109434231964219e-02,-9.223639961704757986e-03,2.581924857300049725e-02
+5.701856893360195411e-01,-1.611920242422606271e-03,-5.467624417693709576e-02,-2.370212537279375083e-03,2.411826333676219541e-02,-9.409509329403655323e-03,2.582393503274327262e-02
+5.719061624248831244e-01,-1.261698808045422140e-03,-5.466599489115611665e-02,-2.382292607897670833e-03,2.415535292272042583e-02,-9.589041510925261705e-03,2.580504275845994441e-02
+5.736241235052911280e-01,-9.261304319102735489e-04,-5.460859229190894587e-02,-2.394401996968391951e-03,2.419236144123497120e-02,-9.760354327043948078e-03,2.576182844070237440e-02
+5.753395650313679610e-01,-6.090846435926782539e-04,-5.450275529619649090e-02,-2.406537481181657279e-03,2.422928747061102797e-02,-9.921450212287718715e-03,2.569363962782829358e-02
+5.770524794683048464e-01,-3.146526948866030203e-04,-5.434739836775441535e-02,-2.418695742378199932e-03,2.426612983582441749e-02,-1.007021526515019909e-02,2.559992569287383377e-02
+5.787628592923925730e-01,-4.714379648145546850e-05,-5.414165253601706945e-02,-2.430873382221655429e-03,2.430288762475949632e-02,-1.020442663032882467e-02,2.548024739025515559e-02
+5.804706969910552461e-01,1.889237184981392066e-04,-5.388488603642094948e-02,-2.443066936343243640e-03,2.433956020854500732e-02,-1.032175410265394680e-02,2.533428740036814536e-02
+5.821759850628824839e-01,3.888316357764968426e-04,-5.357672497652296584e-02,-2.455272895199278951e-03,2.437614725285134482e-02,-1.041976480919807983e-02,2.516186084179744195e-02
+5.838787160176629465e-01,5.476719133657354834e-04,-5.321707264114552971e-02,-2.467487715543401853e-03,2.441264873385428844e-02,-1.049593029227778411e-02,2.496292807626504864e-02
+5.855788823764169759e-01,6.603603485409064496e-04,-5.280613137302532512e-02,-2.479707838361745308e-03,2.444906495242986019e-02,-1.054763080760012794e-02,2.473760213799781893e-02
+5.872764766714295703e-01,7.216497454247711597e-04,-5.234442210400475082e-02,-2.491929712823222256e-03,2.448539654248415090e-02,-1.057216459663198709e-02,2.448615972446594943e-02
+5.889714914462828022e-01,7.261455481014917994e-04,-5.183280343239073107e-02,-2.504149818192163545e-03,2.452164449130256330e-02,-1.056675659934020600e-02,2.420905188770005809e-02
+5.906639192558894580e-01,6.683248746010447784e-04,-5.127249076395702654e-02,-2.516364684440150924e-03,2.455781014641653104e-02,-1.052856485659460842e-02,2.390691314159108447e-02
+5.923537526665245689e-01,5.425543765633647963e-04,-5.066507457121285224e-02,-2.528570920106984910e-03,2.459389522554116317e-02,-1.045469316045933607e-02,2.358057043773841358e-02
+5.940409842558590503e-01,3.431120193603384369e-04,-5.001253718769282930e-02,-2.540765235334604619e-03,2.462990182656391025e-02,-1.034220035514689558e-02,2.323105277910470293e-02
+5.957256066129916761e-01,6.420916297703325787e-05,-4.931726941269752584e-02,-2.552944470835794965e-03,2.466583243472990003e-02,-1.018811300656788203e-02,2.285959875345012701e-02
+5.974076123384820525e-01,-2.999850022790464805e-04,-4.858208539585035651e-02,-2.565105622902899649e-03,2.470168993052671361e-02,-9.989437493647902386e-03,2.246766452888225921e-02
+5.990869940443830366e-01,-7.553170291874799396e-04,-4.781023758723422501e-02,-2.577245881538098304e-03,2.473747759139326846e-02,-9.743172083249217880e-03,2.205692863776035836e-02
+6.007637443542724887e-01,-1.307623537035624032e-03,-4.700542602054139096e-02,-2.589362654209268356e-03,2.477319909430468828e-02,-9.446323981150615140e-03,2.162930397511467900e-02
+6.024378559032870228e-01,-1.962700942102987584e-03,-4.617181295464990037e-02,-2.601453595727806593e-03,2.480885852313084033e-02,-9.095921167331030843e-03,2.118693602222371936e-02
+6.041093213381530935e-01,-2.726275492482173324e-03,-4.531402950353675269e-02,-2.613516650528236070e-03,2.484446035563248750e-02,-8.689027999012947229e-03,2.073221205494587757e-02
+6.057781333172199689e-01,-3.603974281557623376e-03,-4.443718302168647716e-02,-2.625550075333210522e-03,2.488000947255432707e-02,-8.222765619996854139e-03,2.026776459945230438e-02
+6.074442845104918165e-01,-4.601289133137083549e-03,-4.354686417610582200e-02,-2.637552478298357990e-03,2.491551114823704199e-02,-7.694321724793521701e-03,1.979647085172149198e-02
+6.091077675996596774e-01,-5.723546295150138200e-03,-4.264914887548826306e-02,-2.649522857904028708e-03,2.495097104072629968e-02,-7.100973657974318266e-03,1.932145684280381012e-02
+6.107685752781343291e-01,-6.975869841777673601e-03,-4.175059915061378468e-02,-2.661460632200288688e-03,2.498639518428082129e-02,-6.440101860814127603e-03,1.884609841650281697e-02
+6.124267002510773716e-01,-8.363148657397264166e-03,-4.085826342918875975e-02,-2.673365676129242901e-03,2.502178997578895225e-02,-5.709211854045681891e-03,1.837401794895763413e-02
+6.140821352354338680e-01,-9.889998828787270255e-03,-3.997967205605180291e-02,-2.685238359575136975e-03,2.505716216482885284e-02,-4.905949085417988381e-03,1.790908437128641006e-02
+6.157348729599646520e-01,-1.156072837392766763e-02,-3.912283179423937246e-02,-2.697079581526661690e-03,2.509251883074142125e-02,-4.028119265645805844e-03,1.745540929425213814e-02
+6.173849061652770809e-01,-1.337930063421793360e-02,-3.829621712077783957e-02,-2.708890804882138625e-03,2.512786736717983679e-02,-3.073706918906662835e-03,1.701734316181776480e-02
+6.190322276038583427e-01,-1.534929758037369314e-02,-3.750875991549935257e-02,-2.720674091906283548e-03,2.516321545949439312e-02,-2.040894088058007195e-03,1.659946828964073764e-02
+6.206768300401065419e-01,-1.747388226285521984e-02,-3.676983554475805882e-02,-2.732432140192114344e-03,2.519857105750356041e-02,-9.280773314672289068e-04,1.620659241587034394e-02
+6.223187062503623412e-01,-1.975576427351993097e-02,-3.608924700475986402e-02,-2.744168318538670655e-03,2.523394234679885925e-02,2.661101732345848379e-04,1.584373955259643293e-02
+6.239578490229409358e-01,-2.219716238003700479e-02,-3.547720480760925554e-02,-2.755886696666568110e-03,2.526933772583676505e-02,1.542782812663559197e-03,1.551614242260830438e-02
+6.255942511581639165e-01,-2.479976908144861797e-02,-3.494430747190726411e-02,-2.767592077439142914e-03,2.530476576545731895e-02,2.902784562649525762e-03,1.522922733699686761e-02
+6.272279054683905786e-01,-2.756471715253102500e-02,-3.450151552026646529e-02,-2.779290032506842069e-03,2.534023517898514877e-02,4.346669192613888434e-03,1.498860482657846278e-02
+6.288588047780496737e-01,-3.049254433229192304e-02,-3.416012524624118668e-02,-2.790986925716468833e-03,2.537575478259691472e-02,5.874686328357048640e-03,1.480005432300485375e-02
+6.304869419236707184e-01,-3.358316197469096709e-02,-3.393173944857825336e-02,-2.802689945941406503e-03,2.541133345084909886e-02,7.486762937802413954e-03,1.466950821658355789e-02
+6.321123097539158575e-01,-3.683582440689327914e-02,-3.382823456333931139e-02,-2.814407131655888121e-03,2.544698007894420633e-02,9.182486815502747049e-03,1.460303646631724503e-02
+6.337349011296107282e-01,-4.024909780343995103e-02,-3.386172638837021270e-02,-2.826147394280910212e-03,2.548270353494160381e-02,1.096109477152863547e-02,1.460682779050210753e-02
+6.353547089237763235e-01,-4.382083454972976466e-02,-3.404453325661354474e-02,-2.837920543019131683e-03,2.551851261005610647e-02,1.282145479078914893e-02,1.468716976060114622e-02
+6.369717260216599675e-01,-4.754814596227195655e-02,-3.438913503952886752e-02,-2.849737302398633234e-03,2.555441597492817338e-02,1.476205599469151801e-02,1.485043104774827830e-02
+6.385859453207668457e-01,-5.142737897440792516e-02,-3.490813328861434861e-02,-2.861609327074971908e-03,2.559042211866505157e-02,1.678099633691516532e-02,1.510303611905997392e-02
+6.401973597308906472e-01,-5.545409589792069799e-02,-3.561420584243248783e-02,-2.873549222947639065e-03,2.562653930144264194e-02,1.887597013631898274e-02,1.545144517802832761e-02
+6.418059621741456500e-01,-5.962305425088464811e-02,-3.652006113705127954e-02,-2.885570550710630806e-03,2.566277549473815894e-02,2.104426150876158308e-02,1.590212984682605468e-02
+6.434117455849966971e-01,-6.392819230187350710e-02,-3.763839128392340583e-02,-2.897687842994271529e-03,2.569913832629884196e-02,2.328273350439414788e-02,1.646154669635844364e-02
+6.450147029102913931e-01,-6.836261402435273027e-02,-3.898182006406720329e-02,-2.909916602683461535e-03,2.573563501910090812e-02,2.558782515390236115e-02,1.713611622768985987e-02
+6.466148271092899691e-01,-7.291858035103639113e-02,-4.056285504375364143e-02,-2.922273306081833461e-03,2.577227232774071536e-02,2.795554386121653842e-02,1.793219045965002006e-02
+6.482121111536969238e-01,-7.758750181879725538e-02,-4.239383246977530828e-02,-2.934775407543647411e-03,2.580905647934581340e-02,3.038146267640845152e-02,1.885603083078868369e-02
+6.498065480276917771e-01,-8.235993496110502465e-02,-4.448686440911468754e-02,-2.947441325227855360e-03,2.584599311341970185e-02,3.286071833137788056e-02,1.991377914019339807e-02
+6.513981307279594901e-01,-8.722558040480082875e-02,-4.685378529879280496e-02,-2.960290440664983206e-03,2.588308721413366820e-02,3.538801413918501310e-02,2.111142733648085565e-02
+6.529868522637218842e-01,-9.217328808452519839e-02,-4.950609405853014650e-02,-2.973343082546300344e-03,2.592034304480566806e-02,3.795761784416495227e-02,2.245479388808599777e-02
+6.545727056567677282e-01,-9.719106240717521883e-02,-5.245490068644492743e-02,-2.986620506876932514e-03,2.595776408929963142e-02,4.056336806543879570e-02,2.394949054972953309e-02
+6.561556839414840470e-01,-1.022660723895481988e-01,-5.571086900884465037e-02,-3.000144883347812159e-03,2.599535298071329117e-02,4.319868008085662320e-02,2.560089570019581304e-02
+6.577357801648857638e-01,-1.073846664679499741e-01,-5.928416067179945181e-02,-3.013939267403014863e-03,2.603311143891909665e-02,4.585655164482158042e-02,2.741412528732436321e-02
+6.593129873866474533e-01,-1.125323895910403244e-01,-6.318437880995432521e-02,-3.028027574021293581e-03,2.607104020792661361e-02,4.852957340697641037e-02,2.939400490152264608e-02
+6.608872986791325399e-01,-1.176940042945176373e-01,-6.742051271649823763e-02,-3.042434539643781161e-03,2.610913899075900657e-02,5.120994079482572847e-02,3.154504109430325554e-02
+6.624587071274248284e-01,-1.228535165236212534e-01,-7.200088395639206440e-02,-3.057185692505725964e-03,2.614740638684929022e-02,5.388946593045026295e-02,3.387139171908003232e-02
+6.640272058293580359e-01,-1.279942037319446169e-01,-7.693309056304671600e-02,-3.072307307910984286e-03,2.618583983212866007e-02,5.655959417890139146e-02,3.637684220462571866e-02
+6.655927878955469890e-01,-1.330986480956236340e-01,-8.222395646391167379e-02,-3.087826363097438079e-03,2.622443553748722220e-02,5.921141992571390261e-02,3.906477497575311375e-02
+6.671554464494171555e-01,-1.381487717665791370e-01,-8.787947912269913897e-02,-3.103770491491329026e-03,2.626318843278601864e-02,6.183570722933440927e-02,4.193814577979520369e-02
+6.687151746272345099e-01,-1.431258768054148034e-01,-9.390478077055465500e-02,-3.120167929239756836e-03,2.630209210280084683e-02,6.442291023123673799e-02,4.499945745149643017e-02
+6.702719655781371744e-01,-1.480106900343777510e-01,-1.003040601326070647e-01,-3.137047458525947692e-03,2.634113874057313412e-02,6.696319266399394787e-02,4.825073751474656669e-02
+6.718258124641639517e-01,-1.527834071611781641e-01,-1.070805481510108403e-01,-3.154438345247920390e-03,2.638031909484446491e-02,6.944645679755100109e-02,5.169351364268310461e-02
+6.733767084602854114e-01,-1.574237443624530997e-01,-1.142364648821332235e-01,-3.172370278462692452e-03,2.641962241687963522e-02,7.186236617625370304e-02,5.532879284053045243e-02
+6.749246467544329775e-01,-1.619109906993773107e-01,-1.217729791647540888e-01,-3.190873300139900667e-03,2.645903641830076383e-02,7.420037423768664764e-02,5.915704191198484230e-02
+6.764696205475297930e-01,-1.662240640010620440e-01,-1.296901720006503611e-01,-3.209977734332061013e-03,2.649854722493517542e-02,7.644975349645005691e-02,6.317816797794566230e-02
+6.780116230535196964e-01,-1.703415705682612691e-01,-1.379870026097376745e-01,-3.229714114413499357e-03,2.653813934107926431e-02,7.859962421837321522e-02,6.739150150467185241e-02
+6.795506474993976420e-01,-1.742418649586419677e-01,-1.466612774544785447e-01,-3.250113106427972558e-03,2.657779561389653905e-02,8.063898914378601901e-02,7.179578177317390286e-02
+6.810866871252394539e-01,-1.779031146187610157e-01,-1.557096231530630537e-01,-3.271205426831820876e-03,2.661749719590360275e-02,8.255676483971248480e-02,7.638914352161418897e-02
+6.826197351842306915e-01,-1.813033664169590442e-01,-1.651274636051876521e-01,-3.293021760892233477e-03,2.665722352208102497e-02,8.434181468410693883e-02,8.116910457707847415e-02
+6.841497849426976252e-01,-1.844206133697644245e-01,-1.749090000306214310e-01,-3.315592678505868696e-03,2.669695228499403827e-02,8.598298613330969398e-02,8.613255730368635232e-02
+6.856768296801356577e-01,-1.872328652133459226e-01,-1.850471955462376084e-01,-3.338948545318284021e-03,2.673665941408543203e-02,8.746914484682105639e-02,9.127576117281753498e-02
+6.872008626892394112e-01,-1.897182194498953089e-01,-1.955337640721674097e-01,-3.363119433907154916e-03,2.677631906441212475e-02,8.878921138267414914e-02,9.659433716205866050e-02
+6.887218772759319263e-01,-1.918549329103002299e-01,-2.063591631941707194e-01,-3.388135033160661735e-03,2.681590360536516629e-02,8.993219975745252170e-02,1.020832649572136486e-01
+6.902398667593944159e-01,-1.936214962194405809e-01,-2.175125916977307672e-01,-3.414024556930381898e-03,2.685538361599640669e-02,9.088725274684049393e-02,1.077368818478899176e-01
+6.917548244720952422e-01,-1.949967062879729984e-01,-2.289819907848657798e-01,-3.440816650441525914e-03,2.689472788802634018e-02,9.164368240303530366e-02,1.135488853595878339e-01
+6.932667437598194482e-01,-1.959597413236340013e-01,-2.407540505798519970e-01,-3.468539292729137455e-03,2.693390342447450972e-02,9.219100668286683842e-02,1.195123367549100202e-01
+6.947756179816976241e-01,-1.964902357299166402e-01,-2.528142217178618867e-01,-3.497219704591135198e-03,2.697287545821702021e-02,9.251898642940542539e-02,1.256196658873245386e-01
+6.962814405102357718e-01,-1.965683522919435822e-01,-2.651467295948212732e-01,-3.526884253439843860e-03,2.701160746092651915e-02,9.261766688210379406e-02,1.318626819996472077e-01
+6.977842047313435048e-01,-1.961748588226544798e-01,-2.777345954535794803e-01,-3.557558356348758822e-03,2.705006116808693697e-02,9.247740955996842926e-02,1.382325826422553805e-01
+6.992839040443636911e-01,-1.952911983035212629e-01,-2.905596604402163607e-01,-3.589266389479725625e-03,2.708819660956256897e-02,9.208893437266563176e-02,1.447199679163332087e-01
+7.007805318621018742e-01,-1.938995612620801057e-01,-3.036026143475309969e-01,-3.622031589847676727e-03,2.712597213555254308e-02,9.144335486295204762e-02,1.513148567307797954e-01
+7.022740816108536954e-01,-1.919829561115501682e-01,-3.168430300677910205e-01,-3.655875964542858614e-03,2.716334445224866806e-02,9.053221389641541439e-02,1.580067030170407560e-01
+7.037645467304356472e-01,-1.895252791675064719e-01,-3.302594005191138438e-01,-3.690820197897301883e-03,2.720026866894137765e-02,8.934751589265543803e-02,1.647844177821697620e-01
+7.052519206742123847e-01,-1.865113793029324063e-01,-3.438291822319352598e-01,-3.726883562056362668e-03,2.723669834821310781e-02,8.788176436701472527e-02,1.716363889173506962e-01
+7.067361969091264795e-01,-1.829271232114020040e-01,-3.575288410159535579e-01,-3.764083833654254235e-03,2.727258555449208963e-02,8.612799292673906093e-02,1.785505070702771424e-01
+7.082173689157260643e-01,-1.787594571194537907e-01,-3.713339014348144285e-01,-3.802437203055265835e-03,2.730788091306861690e-02,8.407979687520285295e-02,1.855141940197172490e-01
+7.096954301881950311e-01,-1.739964665052156356e-01,-3.852190027006928696e-01,-3.841958188324095386e-03,2.734253367452679426e-02,8.173136188155485926e-02,1.925144282881262048e-01
+7.111703742343798984e-01,-1.686274309840504471e-01,-3.991579566986546146e-01,-3.882659561302815969e-03,2.737649178222066604e-02,7.907749444128026051e-02,1.995377756288114479e-01
+7.126421945758196763e-01,-1.626428788332861253e-01,-4.131238085612063959e-01,-3.924552267741201160e-03,2.740970194498439788e-02,7.611364517139845853e-02,2.065704231518987743e-01
+7.141108847477731780e-01,-1.560346345798507284e-01,-4.270889028535198584e-01,-3.967645354531402178e-03,2.744210971818659278e-02,7.283593673004790103e-02,2.135982108660799406e-01
+7.155764382992484407e-01,-1.487958652811644245e-01,-4.410249507215481812e-01,-4.011945900723998260e-03,2.747365957878268594e-02,6.924118529519238119e-02,2.206066688299615497e-01
+7.170388487930305921e-01,-1.409211227450218040e-01,-4.549031012544411667e-01,-4.057458950457584930e-03,2.750429501561537510e-02,6.532692029341159723e-02,2.275810532629677085e-01
+7.184981098057098281e-01,-1.324063805801055094e-01,-4.686940148658860772e-01,-4.104187450074489422e-03,2.753395861897216940e-02,6.109140402136453013e-02,2.345063851481253847e-01
+7.199542149277103897e-01,-1.232490675413011050e-01,-4.823679402931709270e-01,-4.152132193978716988e-03,2.756259217395897951e-02,5.653364801755921681e-02,2.413674877164432575e-01
+7.214071577633178745e-01,-1.134480968538220114e-01,-4.958947910213242216e-01,-4.201291771718547799e-03,2.759013675732365986e-02,5.165342642134634205e-02,2.481490300372869162e-01
+7.228569319307079910e-01,-1.030038898936565667e-01,-5.092442267389474253e-01,-4.251662517623250488e-03,2.761653283785601987e-02,4.645128904814942866e-02,2.548355655424100474e-01
+7.243035310619742040e-01,-9.191839693346641471e-02,-5.223857338881044976e-01,-4.303238470776202772e-03,2.764172037778348315e-02,4.092856883231715120e-02,2.614115759381091530e-01
+7.257469488031560445e-01,-8.019511174273917520e-02,-5.352887095203682222e-01,-4.356011335626478777e-03,2.766563894344057847e-02,3.508738943434068502e-02,2.678615119128830435e-01
+7.271871788142661996e-01,-6.783908150489421918e-02,-5.479225445073671041e-01,-4.409970455976650570e-03,2.768822781101833164e-02,2.893067009233759798e-02,2.741698373424529267e-01
+7.286242147693199334e-01,-5.485691191944318951e-02,-5.602567072279777127e-01,-4.465102782427047938e-03,2.770942607668628460e-02,2.246212783365863705e-02,2.803210741703939801e-01
+7.300580503563610657e-01,-4.125676827333332741e-02,-5.722608300493529088e-01,-4.521392853881620738e-03,2.772917276589709906e-02,1.568627549241219041e-02,2.862998428957011510e-01
+7.314886792774907276e-01,-2.704837086070711535e-02,-5.839047929575642559e-01,-4.578822784315339858e-03,2.774740695242769450e-02,8.608418535231653762e-03,2.920909085230357083e-01
+7.329160952488951164e-01,-1.224298492385302375e-02,-5.951588087742235578e-01,-4.637372251584418945e-03,2.776406786935170859e-02,1.234650382171824249e-03,2.976792229303369952e-01
+7.343402920008724744e-01,3.146592514479088377e-03,-6.059935076168793122e-01,-4.697018497090830157e-03,2.777909502978521267e-02,-6.428158163210730310e-03,3.030499673213165046e-01
+7.357612632778612882e-01,1.910605428002015022e-02,-6.163800195710220642e-01,-4.757736329318987267e-03,2.779242833904160825e-02,-1.437236523404859280e-02,3.081885959808093878e-01
+7.371790028384670457e-01,3.561960494799773502e-02,-6.262900570374296816e-01,-4.819498131125545502e-03,2.780400820697085176e-02,-2.258957517452872890e-02,3.130808778805191372e-01
+7.385935044554904350e-01,5.266998995582020809e-02,-6.356959954188966533e-01,-4.882273874352496129e-03,2.781377566883234953e-02,-3.107065356102963263e-02,3.177129379446340107e-01
+7.400047619159539902e-01,7.023852924339824000e-02,-6.445709525950515939e-01,-4.946031140459415838e-03,2.782167250179699500e-02,-3.980574476308030435e-02,3.220712964542320478e-01
+7.414127690211299582e-01,8.830515587463683147e-02,-6.528888662084167960e-01,-5.010735151278093887e-03,2.782764133699551501e-02,-4.878429091101843529e-02,3.261429077609437344e-01
+7.428175195865669433e-01,1.068484572941059974e-01,-6.606245674450763605e-01,-5.076348801714179911e-03,2.783162577142955091e-02,-5.799505634609512794e-02,3.299152001383475752e-01
+7.442190074421178858e-01,1.258457221751451971e-01,-6.677538534724845043e-01,-5.142832693887981754e-03,2.783357048172168183e-02,-6.742615164674353734e-02,3.333761121002933869e-01
+7.456172264319661513e-01,1.452729904733237043e-01,-6.742535581198620953e-01,-5.210145183406600031e-03,2.783342133559879583e-02,-7.706506130487957018e-02,3.365141253733681670e-01
+7.470121704146531760e-01,1.651051092550770150e-01,-6.801016174619439747e-01,-5.278242432096110458e-03,2.783112549927831483e-02,-8.689866979735268171e-02,3.393183002233632894e-01
+7.484038332631057777e-01,1.853157895241867581e-01,-6.852771334257988789e-01,-5.347078461758018007e-03,2.782663153826232805e-02,-9.691329454095853890e-02,3.417783067562932819e-01
+7.497922088646621352e-01,2.058776675317930127e-01,-6.897604334575986318e-01,-5.416605216043217721e-03,2.781988952683274452e-02,-1.070947186945647722e-01,3.438844553766033374e-01
+7.511772911210995440e-01,2.267623708592703957e-01,-6.935331269737508020e-01,-5.486772624500842535e-03,2.781085114557031723e-02,-1.174282227369681242e-01,3.456277245822504951e-01
+7.525590739486605063e-01,2.479405855587577256e-01,-6.965781584433328177e-01,-5.557528677268242304e-03,2.779946977654421808e-02,-1.278986212309241033e-01,3.469997859565936982e-01
+7.539375512780798205e-01,2.693821268896287036e-01,-6.988798547777975712e-01,-5.628819501607351325e-03,2.778570059516374119e-02,-1.384902994353127137e-01,3.479930303262124824e-01
+7.553127170546113378e-01,2.910560118674273555e-01,-7.004239695626024442e-01,-5.700589437248178466e-03,2.776950066198757838e-02,-1.491872525099322833e-01,3.486005899823371168e-01
+7.566845652380541631e-01,3.129305356704149554e-01,-7.011977250504888293e-01,-5.772781123052851161e-03,2.775082900728581201e-02,-1.599731228602978561e-01,3.488163549318202272e-01
+7.580530898027795228e-01,3.349733484515136639e-01,-7.011898464017448651e-01,-5.845335591651271492e-03,2.772964671058822383e-02,-1.708312414622561448e-01,3.486349932491152326e-01
+7.594182847377570766e-01,3.571515346346019193e-01,-7.003905928673253012e-01,-5.918192355596876668e-03,2.770591697712301502e-02,-1.817446686230491382e-01,3.480519664973741323e-01
+7.607801440465815634e-01,3.794316932673672538e-01,-6.987917859039340929e-01,-5.991289507819665995e-03,2.767960521082256029e-02,-1.926962362007074714e-01,3.470635400540983406e-01
+7.621386617474984471e-01,4.017800202925613240e-01,-6.963868298661858303e-01,-6.064563820302531875e-03,2.765067908020727230e-02,-2.036685890038230640e-01,3.456667963643343944e-01
+7.634938318734313389e-01,4.241623905903087466e-01,-6.931707309925291272e-01,-6.137950847314359824e-03,2.761910858168548089e-02,-2.146442287322393727e-01,3.438596403020371395e-01
+7.648456484720069781e-01,4.465444422992255191e-01,-6.891401092121284266e-01,-6.211385032630302797e-03,2.758486609484049265e-02,-2.256055550896446660e-01,3.416408068434254597e-01
+7.661941056055823207e-01,4.688916592364656966e-01,-6.842932070087707208e-01,-6.284799816218169209e-03,2.754792643454662127e-02,-2.365349112399459108e-01,3.390098630935487467e-01
+7.675391973512698529e-01,4.911694560509112240e-01,-6.786298928036910150e-01,-6.358127746224749403e-03,2.750826689695783436e-02,-2.474146242573231624e-01,3.359672094502713646e-01
+7.688809178009646805e-01,5.133432600705993831e-01,-6.721516599432637573e-01,-6.431300591185603113e-03,2.746586729995142279e-02,-2.582270512391051098e-01,3.325140779358277121e-01
+7.702192610613689538e-01,5.353785960041873571e-01,-6.648616209822654222e-01,-6.504249454066061802e-03,2.742071002136185071e-02,-2.689546188873283006e-01,3.286525283816582044e-01
+7.715542212540194011e-01,5.572411674147623728e-01,-6.567644979783675563e-01,-6.576904889087143921e-03,2.737278002407590727e-02,-2.795798673297605341e-01,3.243854412693722433e-01
+7.728857925153117536e-01,5.788969381334445607e-01,-6.478666070796127441e-01,-6.649197018862942378e-03,2.732206488379270923e-02,-2.900854916237732284e-01,3.197165106325503547e-01
+7.742139689965273908e-01,6.003122124957698258e-01,-6.381758398661425691e-01,-6.721055649251062719e-03,2.726855480564180020e-02,-3.004543824362367888e-01,3.146502326226516266e-01
+7.755387448638585424e-01,6.214537137731178662e-01,-6.277016404267161676e-01,-6.792410388638605266e-03,2.721224263910194605e-02,-3.106696664943078834e-01,3.091918919046592884e-01
+7.768601142984342678e-01,6.422886616135077276e-01,-6.164549776670062986e-01,-6.863190766804795791e-03,2.715312388325948414e-02,-3.207147447187003730e-01,3.033475471124135736e-01
+7.781780714963458800e-01,6.627848464043606391e-01,-6.044483136014263680e-01,-6.933326351262289293e-03,2.709119668758271787e-02,-3.305733314154092328e-01,2.971240142725593003e-01
+7.794926106686721479e-01,6.829107017496691689e-01,-5.916955686941056669e-01,-7.002746864361615933e-03,2.702646185072270224e-02,-3.402294917756508719e-01,2.905288465475783233e-01
+7.808037260415053860e-01,7.026353760733401455e-01,-5.782120824809164894e-01,-7.071382301039361162e-03,2.695892280977461516e-02,-3.496676752995990234e-01,2.835703139890816682e-01
+7.821114118559759909e-01,7.219287987740369283e-01,-5.640145715463171783e-01,-7.139163044625981315e-03,2.688858562509165509e-02,-3.588727532617032590e-01,2.762573798151170545e-01
+7.834156623682784204e-01,7.407617469487944861e-01,-5.491210827089636481e-01,-7.206019982327041030e-03,2.681545896297358664e-02,-3.678300485039894818e-01,2.685996776610638515e-01
+7.847164718496960623e-01,7.591059062550206127e-01,-5.335509440972745487e-01,-7.271884612230110519e-03,2.673955406651178437e-02,-3.765253692135269725e-01,2.606074852244993356e-01
+7.860138345866267695e-01,7.769339308266665078e-01,-5.173247145851060935e-01,-7.336689158868166423e-03,2.666088473049170460e-02,-3.849450370980679326e-01,2.522916938892030836e-01
+7.873077448806070633e-01,7.942194990410431643e-01,-5.004641267854561537e-01,-7.400366681881730135e-03,2.657946726012429262e-02,-3.930759156282569999e-01,2.436637838167442782e-01
+7.885981970483383341e-01,8.109373651814404393e-01,-4.829920313406964594e-01,-7.462851179411697684e-03,2.649532043761961345e-02,-4.009054379575764382e-01,2.347357904948531582e-01
+7.898851854217111557e-01,8.270634103067260323e-01,-4.649323365144044651e-01,-7.524077697055743530e-03,2.640846547294365548e-02,-4.084216280398086374e-01,2.255202744955397687e-01
+7.911687043478299319e-01,8.425746858262657124e-01,-4.463099454863786453e-01,-7.583982423811093387e-03,2.631892595653619049e-02,-4.156131268972171888e-01,2.160302904623360998e-01
+7.924487481890386542e-01,8.574494570257178427e-01,-4.271506941467073659e-01,-7.642502791772386173e-03,2.622672780534866799e-02,-4.224692102495987145e-01,2.062793506153198786e-01
+7.937253113229446599e-01,8.716672396337584727e-01,-4.074812840734240993e-01,-7.699577572238246355e-03,2.613189920651913176e-02,-4.289798102911926425e-01,1.962813932775482328e-01
+7.949983881424437238e-01,8.852088357862697432e-01,-3.873292164002694604e-01,-7.755146964652956561e-03,2.603447055693137654e-02,-4.351355295603863138e-01,1.860507462637016329e-01
+7.962679730557449265e-01,8.980563636017671048e-01,-3.667227233951789733e-01,-7.809152687517999757e-03,2.593447439634885901e-02,-4.409276576493223465e-01,1.756020916350204408e-01
+7.975340604863950800e-01,9.101932842675084512e-01,-3.456906987445367285e-01,-7.861538060409720141e-03,2.583194534056571651e-02,-4.463481833727414028e-01,1.649504304968070767e-01
+7.987966448733025970e-01,9.216044245073047314e-01,-3.242626281287616097e-01,-7.912248085074419929e-03,2.572692001087570693e-02,-4.513898063523661963e-01,1.541110454052248835e-01
+8.000557206707632485e-01,9.322759965423206285e-01,-3.024685180424109832e-01,-7.961229521682744101e-03,2.561943695903641863e-02,-4.560459440535487019e-01,1.430994647560939470e-01
+8.013112823484830338e-01,9.421956124470586280e-01,-2.803388253723940737e-01,-8.008430961425125685e-03,2.550953658800748231e-02,-4.603107401107622287e-01,1.319314249714528542e-01
+8.025633243916036053e-01,9.513522956682081144e-01,-2.579043856538646429e-01,-8.053802895040713949e-03,2.539726107361358828e-02,-4.641790688107446661e-01,1.206228349050475213e-01
+8.038118413007259155e-01,9.597364882003091013e-01,-2.351963425054621470e-01,-8.097297775284317475e-03,2.528265428310484675e-02,-4.676465386451497674e-01,1.091897382866242466e-01
+8.050568275919344208e-01,9.673400551879572928e-01,-2.122460767480701394e-01,-8.138870078094466096e-03,2.516576169155794462e-02,-4.707094917311720472e-01,9.764827750798597827e-02
+8.062982777968216164e-01,9.741562846190556790e-01,-1.890851358044381703e-01,-8.178476355415681756e-03,2.504663029245855418e-02,-4.733650036235998448e-01,8.601465811625019775e-02
+8.075361864625111297e-01,9.801798829841713578e-01,-1.657451651593589981e-01,-8.216075287247385367e-03,2.492530851363480154e-02,-4.756108820482198296e-01,7.430511114793551808e-02
+8.087705481516828110e-01,9.854069694237573840e-01,-1.422578385765292108e-01,-8.251627729125408581e-03,2.480184612900436167e-02,-4.774456600236547943e-01,6.253585995269574094e-02
+8.100013574425958263e-01,9.898350647892654175e-01,-1.186547907638914739e-01,-8.285096750044292774e-03,2.467629416592492797e-02,-4.788685902923340842e-01,5.072308502153977799e-02
+8.112286089291126379e-01,9.934630776021626808e-01,-9.496755173228249580e-02,-8.316447671841311132e-03,2.454870481525637041e-02,-4.798796375820301718e-01,3.888288865364012553e-02
+8.124522972207226523e-01,9.962912870433002777e-01,-7.122748082601915165e-02,-8.345648101495403010e-03,2.441913133952221954e-02,-4.804794687118335017e-01,2.703126364999995992e-02
+8.136724169425666453e-01,9.983213224249321005e-01,-4.746570398731066870e-02,-8.372667957551356219e-03,2.428762798267646711e-02,-4.806694418511772149e-01,1.518405973366668965e-02
+8.148889627354590770e-01,9.995561407968461820e-01,-2.371305185430090667e-02,-8.397479494595546171e-03,2.415424987703311543e-02,-4.804515921344985618e-01,3.356952552691055535e-03
+8.161019292559127392e-01,1.000000000000000000e+00,-0.000000000000000000e+00,-8.420057321244801482e-03,2.401905295009302069e-02,-4.798286189842131400e-01,-8.434586850958382745e-03
+8.173113111761618477e-01,9.996584301630696556e-01,2.364338877968385350e-02,-8.440378415498443060e-03,2.388209383483639453e-02,-4.788038698558251816e-01,-2.017532162331663395e-02
+8.185171031841850242e-01,9.985382024868035833e-01,4.718752034919261595e-02,-8.458422134651026603e-03,2.374342977698206542e-02,-4.773813229131531610e-01,-3.185027703002016319e-02
+8.197192999837294991e-01,9.966472951115886714e-01,7.060332284249659640e-02,-8.474170220738491341e-03,2.360311854458921679e-02,-4.755655693719481603e-01,-4.344476730848053497e-02
+8.209178962943334268e-01,9.939948570511553116e-01,9.386229771623846863e-02,-8.487606800784208011e-03,2.346121833839791976e-02,-4.733617940323752715e-01,-5.494442172933758362e-02
+8.221128868513501997e-01,9.905911698770779594e-01,1.169365681883387104e-01,-8.498718385722645388e-03,2.331778770386953165e-02,-4.707757549635836480e-01,-6.633520976063464469e-02
+8.233042664059700977e-01,9.864476077246173702e-01,1.397989261181688536e-01,-8.507493863227988742e-03,2.317288544305274103e-02,-4.678137616491102602e-01,-7.760346316335207939e-02
+8.244920297252450458e-01,9.815765948427841447e-01,1.624228751616237920e-01,-8.513924486816331141e-03,2.302657052890415626e-02,-4.644826534334213219e-01,-8.873589891290514786e-02
+8.256761715921098199e-01,9.759915622365923360e-01,1.847826722447456216e-01,-8.518003861943058280e-03,2.287890202116781707e-02,-4.607897757542142037e-01,-9.971963897210844863e-02
+8.268566868054064711e-01,9.697069022194109156e-01,2.068533658883802140e-01,-8.519727926831416678e-03,2.272993898342593605e-02,-4.567429567655271194e-01,-1.105422295472391070e-01
+8.280335701799065307e-01,9.627379221583812763e-01,2.286108317444562610e-01,-8.519094930661110301e-03,2.257974040245790473e-02,-4.523504823359265625e-01,-1.211916590559819623e-01
+8.292068165463334362e-01,9.551007966624428525e-01,2.500318053984010302e-01,-8.516105409770656151e-03,2.242836510834482142e-02,-4.476210712166284189e-01,-1.316563746322477746e-01
+8.303764207513859574e-01,9.468125185033209323e-01,2.710939133159477321e-01,-8.510762157691302565e-03,2.227587169890327470e-02,-4.425638502220876069e-01,-1.419252954718103199e-01
+8.315423776577602899e-01,9.378908494068251001e-01,2.917756990706703024e-01,-8.503070193344926600e-03,2.212231846670051696e-02,-4.371883276730921253e-01,-1.519878283605715086e-01
+8.327046821441730362e-01,9.283542701103493222e-01,3.120566491490311956e-01,-8.493036727531416002e-03,2.196776332555731848e-02,-4.315043666127083011e-01,-1.618338773231687289e-01
+8.338633291053832997e-01,9.182219290278059720e-01,3.319172140007431993e-01,-8.480671122081959515e-03,2.181226374125259590e-02,-4.255221594030303423e-01,-1.714538555304901402e-01
+8.350183134522153328e-01,9.075135914260984427e-01,3.513388269876194792e-01,-8.465984853070176314e-03,2.165587666525701019e-02,-4.192522004968812777e-01,-1.808386945265886558e-01
+8.361696301115808527e-01,8.962495876944783779e-01,3.703039213981604294e-01,-8.448991466011327930e-03,2.149865847140559894e-02,-4.127052604018106030e-01,-1.899798505164995710e-01
+8.373172740265014680e-01,8.844507626774944509e-01,3.887959436740527353e-01,-8.429706530115094534e-03,2.134066489395827423e-02,-4.058923574889829466e-01,-1.988693112646179040e-01
+8.384612401561303274e-01,8.721384232088920108e-01,4.067993642195798509e-01,-8.408147589841834593e-03,2.118195096853252649e-02,-3.988247333524813643e-01,-2.074996011247569327e-01
+8.396015234757753243e-01,8.593342876586412826e-01,4.242996857009611045e-01,-8.384334117118013435e-03,2.102257097898150434e-02,-3.915138248978705660e-01,-2.158637844841471976e-01
+8.407381189769198571e-01,8.460604349491780418e-01,4.412834492590722579e-01,-8.358287457069516618e-03,2.086257840216782303e-02,-3.839712382457373674e-01,-2.239554669520552321e-01
+8.418710216672458113e-01,8.323392537394583846e-01,4.577382368071956642e-01,-8.330030776066792847e-03,2.070202586020136390e-02,-3.762087234432471838e-01,-2.317687978603640842e-01
+8.430002265706553199e-01,8.181933930916099884e-01,4.736526729334472052e-01,-8.299589005204071127e-03,2.054096507309825098e-02,-3.682381478089406524e-01,-2.392984676301580238e-01
+8.441257287272921905e-01,8.036457128393650340e-01,4.890164220346039925e-01,-8.266988783307897132e-03,2.037944681549978126e-02,-3.600714715265372901e-01,-2.465397083703161074e-01
+8.452475231935638877e-01,7.887192355263481502e-01,5.038201852643148237e-01,-8.232258399822797376e-03,2.021752087666432371e-02,-3.517207222479979967e-01,-2.534882892935767518e-01
+8.463656050421637378e-01,7.734370990442956950e-01,5.180556939367054747e-01,-8.195427733833830428e-03,2.005523602207719830e-02,-3.431979705854124041e-01,-2.601405134371228556e-01
+8.474799693620920227e-01,7.578225095311232895e-01,5.317157011441935310e-01,-8.156528194229540132e-03,1.989263995992216968e-02,-3.345153075509421092e-01,-2.664932125891539272e-01
+8.485906112586771854e-01,7.418986968634790902e-01,5.447939713290174124e-01,-8.115592658731657361e-03,1.972977931087058906e-02,-3.256848197860306993e-01,-2.725437411177461522e-01
+8.496975258535985898e-01,7.256888699893576655e-01,5.572852681478320802e-01,-8.072655410992547048e-03,1.956669957769072501e-02,-3.167185679482795169e-01,-2.782899682819627851e-01
+8.508007082849067260e-01,7.092161740343645437e-01,5.691853397148720628e-01,-8.027752078067305203e-03,1.940344512236883909e-02,-3.076285648188966104e-01,-2.837302709622501196e-01
+8.519001537070450825e-01,6.925036492309335490e-01,5.804909028824958295e-01,-7.980919567442198645e-03,1.924005914148742627e-02,-2.984267532113398791e-01,-2.888635239287807854e-01
+8.529958572908716841e-01,6.755741898221653274e-01,5.911996253128813938e-01,-7.932196003254468608e-03,1.907658364822786140e-02,-2.891249873044618446e-01,-2.936890902178919127e-01
+8.540878142236794091e-01,6.584505060840007973e-01,6.013101063068617691e-01,-7.881620661588002036e-03,1.891305945655206988e-02,-2.797350116479078719e-01,-2.982068100427276436e-01
+8.551760197092181937e-01,6.411550870164003380e-01,6.108218554524216248e-01,-7.829233907365248588e-03,1.874952616692391466e-02,-2.702684425594826245e-01,-3.024169902365073659e-01
+8.562604689677154601e-01,6.237101645877068945e-01,6.197352710949484145e-01,-7.775077129572419664e-03,1.858602215468417237e-02,-2.607367503682180288e-01,-3.063203907170365947e-01
+8.573411572358974331e-01,6.061376793339033897e-01,6.280516155630997410e-01,-7.719192678374710292e-03,1.842258456411947909e-02,-2.511512429724577222e-01,-3.099182139651301959e-01
+8.584180797670095675e-01,5.884592490089123329e-01,6.357729917294061561e-01,-7.661623803167939492e-03,1.825924930172423757e-02,-2.415230476144548943e-01,-3.132120891754029413e-01
+8.594912318308379762e-01,5.706961367618071934e-01,6.429023167975184139e-01,-7.602414586022354821e-03,1.809605103268063439e-02,-2.318630975475257650e-01,-3.162040590255776329e-01
+8.605606087137299687e-01,5.528692226538561849e-01,6.494432948891505353e-01,-7.541609880598387078e-03,1.793302318213814070e-02,-2.221821165005092025e-01,-3.188965663576120435e-01
+8.616262057186147016e-01,5.349989761450204551e-01,6.554003897809317358e-01,-7.479255251185191328e-03,1.777019793677054801e-02,-2.124906054238398234e-01,-3.212924384791391308e-01
+8.626880181650236068e-01,5.171054307332098032e-01,6.607787966788722933e-01,-7.415396909808485808e-03,1.760760624947969172e-02,-2.027988293258015939e-01,-3.233948714088031862e-01
+8.637460413891117073e-01,4.992081596885938333e-01,6.655844123095930875e-01,-7.350081656059247355e-03,1.744527784591137776e-02,-1.931168061959828153e-01,-3.252074156515417092e-01
+8.648002707436770464e-01,4.813262543246990544e-01,6.698238059534658007e-01,-7.283356817343013961e-03,1.728324123424898953e-02,-1.834542953145038846e-01,-3.267339588119829163e-01
+8.658507015981822263e-01,4.634783032825852689e-01,6.735041884125623213e-01,-7.215270189175378678e-03,1.712152371502409856e-02,-1.738207877936525370e-01,-3.279787109103751708e-01
+8.668973293387736145e-01,4.456823738443556615e-01,6.766333818539053135e-01,-7.145869978233177627e-03,1.696015139586735249e-02,-1.642254974247767674e-01,-3.289461871083230182e-01
+8.679401493683025492e-01,4.279559951575275800e-01,6.792197883625598065e-01,-7.075204744722232535e-03,1.679914920502256770e-02,-1.546773520201885910e-01,-3.296411921199092210e-01
+8.689791571063447684e-01,4.103161424508994792e-01,6.812723591809227708e-01,-7.003323347362268823e-03,1.663854090987183440e-02,-1.451849869307272334e-01,-3.300688028115828510e-01
+8.700143479892213927e-01,3.927792243417290163e-01,6.828005630928510872e-01,-6.930274887012193019e-03,1.647834913339953078e-02,-1.357567367494965371e-01,-3.302343520835478441e-01
+8.710457174700181326e-01,3.753610692926037662e-01,6.838143547792756571e-01,-6.856108654369834875e-03,1.631859537550185354e-02,-1.264006325303649592e-01,-3.301434126785205980e-01
+8.720732610186056055e-01,3.580769167653047313e-01,6.843241434111733135e-01,-6.780874078304592947e-03,1.615930003543106924e-02,-1.171243940494517688e-01,-3.298017806079392988e-01
+8.730969741216594304e-01,3.409414071255714718e-01,6.843407617660495212e-01,-6.704620674934293895e-03,1.600048243372345289e-02,-1.079354281800540688e-01,-3.292154578401711995e-01
+8.741168522826797682e-01,3.239685754130192108e-01,6.838754346675073315e-01,-6.627397996800146913e-03,1.584216083824506113e-02,-9.884082370570680676e-02,-3.283906366747441630e-01
+8.751328910220110835e-01,3.071718449200338541e-01,6.829397481561285677e-01,-6.549255585557381583e-03,1.568435248748404134e-02,-8.984735008392019662e-02,-3.273336833278033819e-01
+8.761450858768620176e-01,2.905640230448373607e-01,6.815456187387771703e-01,-6.470242924642139078e-03,1.552707361919573176e-02,-8.096145543885736873e-02,-3.260511221100576007e-01
+8.771534324013248174e-01,2.741572981056251135e-01,6.797052632885556278e-01,-6.390409394534433442e-03,1.537033949775147047e-02,-7.218926614040521716e-02,-3.245496192729344642e-01
+8.781579261663950975e-01,2.579632383960722697e-01,6.774311690558071364e-01,-6.309804227832551023e-03,1.521416444424086682e-02,-6.353658545850741912e-02,-3.228359676756273333e-01
+8.791585627599906028e-01,2.419927916157117165e-01,6.747360641340477416e-01,-6.228476468120382019e-03,1.505856186375165683e-02,-5.500889467800403054e-02,-3.209170717600384748e-01
+8.801553377869717476e-01,2.262562857598159061e-01,6.716328890409748453e-01,-6.146474928306472352e-03,1.490354427842003129e-02,-4.661135452146469155e-02,-3.187999317193923043e-01
+8.811482468691598235e-01,2.107634320274463791e-01,6.681347682339500160e-01,-6.063848150450397954e-03,1.474912335681848562e-02,-3.834880571002414335e-02,-3.164916291889133815e-01
+8.821372856453569833e-01,1.955233277589504270e-01,6.642549822853418551e-01,-5.980644367811404835e-03,1.459530994617087331e-02,-3.022577226593019598e-02,-3.139993132053370273e-01
+8.831224497713646704e-01,1.805444612706539331e-01,6.600069409806418319e-01,-5.896911468929478053e-03,1.444211410409662437e-02,-2.224646392691237082e-02,-3.113301860397398313e-01
+8.841037349200038253e-01,1.658347174267698620e-01,6.554041571083696338e-01,-5.812696962385888004e-03,1.428954513203991444e-02,-1.441477971765287198e-02,-3.084914894267608565e-01
+8.850811367811323160e-01,1.514013845411945891e-01,6.504602211129256739e-01,-5.728047942392538137e-03,1.413761160685848145e-02,-6.734311436021186807e-03,-3.054904909502892396e-01
+8.860546510616654770e-01,1.372511615142588959e-01,6.451887756674407060e-01,-5.643011056766074340e-03,1.398632141517575783e-02,7.916510255236069776e-04,-3.023344724269155792e-01
+8.870242734855938727e-01,1.233901669215756447e-01,6.396034924006066547e-01,-5.557632476989259572e-03,1.383568178639229748e-02,8.160115875540678923e-03,-2.990307161634236932e-01
+8.879899997940023937e-01,1.098239482501272846e-01,6.337180482847739604e-01,-5.471957867235294377e-03,1.368569932568816823e-02,1.536838388522017082e-02,-2.955864938193450975e-01
+8.889518257450892413e-01,9.655749202434019129e-02,6.275461033836498004e-01,-5.386032357915410650e-03,1.353638004660604084e-02,2.241404264140352959e-02,-2.920090547398457304e-01
+8.899097471141842464e-01,8.359523466665989833e-02,6.211012793912016328e-01,-5.299900517958953255e-03,1.338772940515452030e-02,2.929496043140230807e-02,-2.883056148665352758e-01
+8.908637596937675207e-01,7.094107417804927651e-02,6.143971387814910479e-01,-5.213606331435059922e-03,1.323975233205671766e-02,3.600928005098042589e-02,-2.844833465992374633e-01
+8.918138592934876652e-01,5.859838207520070424e-02,6.074471658699489574e-01,-5.127193171907821687e-03,1.309245326713255782e-02,4.255541160929561528e-02,-2.805493671977438708e-01
+8.927600417401808652e-01,4.657001672368455553e-02,6.002647469495695498e-01,-5.040703780137770901e-03,1.294583619107447965e-02,4.893202659455523490e-02,-2.765107310632766713e-01
+8.937023028778884326e-01,3.485833632416520528e-02,5.928631530274989236e-01,-4.954180243560689714e-03,1.279990465788219574e-02,5.513805005892639943e-02,-2.723744193036329864e-01
+8.946406385678755679e-01,2.346521306986682279e-02,5.852555224457017813e-01,-4.867663976049569664e-03,1.265466182547976895e-02,6.117265389300534373e-02,-2.681473316833188347e-01
+8.955750446886495686e-01,1.239204670997110652e-02,5.774548452165796375e-01,-4.781195698860733409e-03,1.251011048785269711e-02,6.703524800094669278e-02,-2.638362775278939587e-01
+8.965055171359771480e-01,1.639779691705765428e-03,5.694739470033393092e-01,-4.694815422964298754e-03,1.236625310603800543e-02,7.272547356892757053e-02,-2.594479694607622622e-01
+8.974320518229038646e-01,-8.791088251639095699e-03,5.613254753165481947e-01,-4.608562434656101985e-03,1.222309183846632952e-02,7.824319454353050041e-02,-2.549890151416415729e-01
+8.983546446797705531e-01,-1.890048470533193040e-02,5.530218863050053724e-01,-4.522475278352723442e-03,1.208062857058673535e-02,8.358848998503255745e-02,-2.504659100338452737e-01
+8.992732916542326427e-01,-2.868874912548549996e-02,5.445754316786395322e-01,-4.436591743215543941e-03,1.193886494373440528e-02,8.876164598895747160e-02,-2.458850320477788676e-01
+9.001879887112764766e-01,-3.815661744646463294e-02,5.359981474661980050e-01,-4.350948849451385415e-03,1.179780238282519128e-02,9.376314708004128418e-02,-2.412526348447199265e-01
+9.010987318332384088e-01,-4.730520627114222915e-02,5.273018423486995898e-01,-4.265582838014476789e-03,1.165744212515671362e-02,9.859366806577239684e-02,-2.365748440083721704e-01
+9.020055170198213457e-01,-5.613599683507438515e-02,5.184980890092623085e-01,-4.180529159435309242e-03,1.151778524581847582e-02,1.032540660895563678e-01,-2.318576496012124677e-01
+9.029083402881132869e-01,-6.465081943080062954e-02,5.095982138835991604e-01,-4.095822462947809259e-03,1.137883268483947351e-02,1.077453716910684450e-01,-2.271069037244571875e-01
+9.038071976726038681e-01,-7.285183704550857176e-02,5.006132896738594296e-01,-4.011496588339020543e-03,1.124058527092835005e-02,1.120687813016259110e-01,-2.223283148623407535e-01
+9.047020852252026790e-01,-8.074153012307870581e-02,4.915541269367061372e-01,-3.927584557375816098e-03,1.110304374737509751e-02,1.162256476777421277e-01,-2.175274458859590976e-01
+9.055929990152560283e-01,-8.832267968771555233e-02,4.824312681542416437e-01,-3.844118568445401094e-03,1.096620879389233541e-02,1.202174735353882251e-01,-2.127097092800652178e-01
+9.064799351295638186e-01,-9.559835299520672014e-02,4.732549814648944198e-01,-3.761129988424077603e-03,1.083008105059672307e-02,1.240459006517171253e-01,-2.078803648382117342e-01
+9.073628896723984205e-01,-1.025718859916759235e-01,4.640352555751937613e-01,-3.678649348336809032e-03,1.069466113905809604e-02,1.277127052332515666e-01,-2.030445169243938197e-01
+9.082418587655194386e-01,-1.092468693649708555e-01,4.547817950332960213e-01,-3.596706337649027523e-03,1.055994968137847871e-02,1.312197868986738347e-01,-1.982071127850537906e-01
+9.091168385481928071e-01,-1.156271324115511479e-01,4.455040165373365113e-01,-3.515329802126415833e-03,1.042594732180697654e-02,1.345691621734736165e-01,-1.933729405952666902e-01
+9.099878251772063331e-01,-1.217167279804717978e-01,4.362110459512985594e-01,-3.434547739204133268e-03,1.029265474422326812e-02,1.377629564432291676e-01,-1.885466277975703098e-01
+9.108548148268875710e-01,-1.275199174954327330e-01,4.269117152828679207e-01,-3.354387297624682619e-03,1.016007268998418873e-02,1.408033962956322938e-01,-1.837326410741997595e-01
+9.117178036891198101e-01,-1.330411563553189902e-01,4.176145618817269090e-01,-3.274874774742753993e-03,1.002820197467464876e-02,1.436928017419323578e-01,-1.789352836602340902e-01
+9.125767879733592824e-01,-1.382850793048271132e-01,4.083278265464425627e-01,-3.196035614414291428e-03,9.897043503640677239e-03,1.464335791328062186e-01,-1.741586961484841434e-01
+9.134317639066518169e-01,-1.432564864706469543e-01,3.990594524754489947e-01,-3.117894407663666673e-03,9.766598287219596664e-03,1.490282135104962769e-01,-1.694068570974589794e-01
+9.142827277336491587e-01,-1.479603288172466891e-01,3.898170858346209910e-01,-3.040474894305885897e-03,9.636867453124618038e-03,1.514792627749290133e-01,-1.646835819339003693e-01
+9.151296757166257345e-01,-1.524016957007766071e-01,3.806080762961042696e-01,-2.963799961206071802e-03,9.507852259559917626e-03,1.537893486797934639e-01,-1.599925231745967791e-01
+9.159726041354947501e-01,-1.565858005896807736e-01,3.714394770284063507e-01,-2.887891645459275940e-03,9.379554107571934016e-03,1.559611520642976701e-01,-1.553371728818624398e-01
+9.168115092878250660e-01,-1.605179688128360316e-01,3.623180473504115096e-01,-2.812771135703733341e-03,9.251974550949145296e-03,1.579974051121963985e-01,-1.507208612686314897e-01
+9.176463874888565186e-01,-1.642036244413731805e-01,3.532502535990732495e-01,-2.738458773914310713e-03,9.125115306056873904e-03,1.599008860914110930e-01,-1.461467597372574367e-01
+9.184772350715175726e-01,-1.676482787680801667e-01,3.442422718669390092e-01,-2.664974059148935391e-03,8.998978260264193368e-03,1.616744119783241340e-01,-1.416178814367783367e-01
+9.193040483864395318e-01,-1.708575176457699518e-01,3.352999904656112640e-01,-2.592335650865089120e-03,8.873565479796754335e-03,1.633208341222507354e-01,-1.371370832992748201e-01
+9.201268238019744139e-01,-1.738369909496380594e-01,3.264290133254206250e-01,-2.520561372812246074e-03,8.748879215510698901e-03,1.648430308456283466e-01,-1.327070672274082708e-01
+9.209455577042096053e-01,-1.765924010650332387e-01,3.176346632839875395e-01,-2.449668216551350100e-03,8.624921910414723447e-03,1.662439028040637590e-01,-1.283303823389783660e-01
+9.217602464969846254e-01,-1.791294922442286941e-01,3.089219855661096692e-01,-2.379672346599161475e-03,8.501696203480194058e-03,1.675263676612351038e-01,-1.240094276444919374e-01
+9.225708866019060039e-01,-1.814540401883845411e-01,3.002957523103205184e-01,-2.310589105833674769e-03,8.379204933745195219e-03,1.686933552942932180e-01,-1.197464534981968892e-01
+9.233774744583637117e-01,-1.835718424344017496e-01,2.917604668210679875e-01,-2.242433019425932924e-03,8.257451142434370575e-03,1.697478024487538983e-01,-1.155435642444470612e-01
+9.241800065235469264e-01,-1.854887088994196798e-01,2.833203675125431364e-01,-2.175217802204101013e-03,8.136438076057643867e-03,1.706926480601041463e-01,-1.114027220696607035e-01
+9.249784792724585758e-01,-1.872104527034345878e-01,2.749794338113802361e-01,-2.108956364633565901e-03,8.016169187348404224e-03,1.715308290171937977e-01,-1.073257477563118872e-01
+9.257728891979321029e-01,-1.887428813862113419e-01,2.667413904670608016e-01,-2.043660818498028563e-03,7.896648135676349531e-03,1.722652761481464068e-01,-1.033143249465595520e-01
+9.265632328106458981e-01,-1.900917890284878897e-01,2.586097132659423425e-01,-1.979342484318138799e-03,7.777878786451144014e-03,1.728989094692044148e-01,-9.937000224827913231e-02
+9.273495066391393982e-01,-1.912629479080211192e-01,2.505876343377373416e-01,-1.916011897458985832e-03,7.659865210759278162e-03,1.734346352587338436e-01,-9.549419653148710774e-02
+9.281317072298272963e-01,-1.922621010766110972e-01,2.426781475648709940e-01,-1.853678816103258202e-03,7.542611681640473513e-03,1.738753423497217399e-01,-9.168819641133441056e-02
+9.289098311470160851e-01,-1.930949557546425832e-01,2.348840143741670605e-01,-1.792352229336167989e-03,7.426122672621373808e-03,1.742238978256999693e-01,-8.795316534874482450e-02
+9.296838749729178231e-01,-1.937671759197199528e-01,2.272077700047417659e-01,-1.732040365029136053e-03,7.310402853586281217e-03,1.744831451527208421e-01,-8.429014414890752260e-02
+9.304538353076660107e-01,-1.942843767431918778e-01,2.196517292817754541e-01,-1.672750696713515631e-03,7.195457086866678502e-03,1.746558997593916640e-01,-8.070005463540790014e-02
+9.312197087693302455e-01,-1.946521179364291143e-01,2.122179921616098308e-01,-1.614489953378613453e-03,7.081290422668571650e-03,1.747449475595425350e-01,-7.718370396174224768e-02
+9.319814919939308773e-01,-1.948758985990578785e-01,2.049084506665383998e-01,-1.557264128101244717e-03,6.967908092798312525e-03,1.747530415420111005e-01,-7.374178648731349872e-02
+9.327391816354543286e-01,-1.949611519292948070e-01,1.977247945803971008e-01,-1.501078486473088249e-03,6.855315505350186023e-03,1.746828994857199391e-01,-7.037488811005117306e-02
+9.334927743658669730e-01,-1.949132400365332918e-01,1.906685179382310025e-01,-1.445937575858166277e-03,6.743518237176920893e-03,1.745372023319389232e-01,-6.708348920798051784e-02
+9.342422668751307890e-01,-1.947374500825358179e-01,1.837409248795004446e-01,-1.391845235098184471e-03,6.632522026843648034e-03,1.743185908955466878e-01,-6.386796883837286831e-02
+9.349876558712167940e-01,-1.944389894645556782e-01,1.769431365710172344e-01,-1.338804604031453840e-03,6.522332767534042008e-03,1.740296651833854180e-01,-6.072860696336539188e-02
+9.357289380801203649e-01,-1.940229820482553269e-01,1.702760966488337802e-01,-1.286818133162892508e-03,6.412956497103232748e-03,1.736729825342563649e-01,-5.766558945431533856e-02
+9.364661102458753383e-01,-1.934944652275905619e-01,1.637405783463696529e-01,-1.235887594466133403e-03,6.304399390458631167e-03,1.732510549773092767e-01,-5.467900990777905396e-02
+9.371991691305681105e-01,-1.928583857917375854e-01,1.573371901115500071e-01,-1.186014090263509332e-03,6.196667749741303216e-03,1.727663495743467503e-01,-5.176887424051968695e-02
+9.379281115143518477e-01,-1.921195980937540626e-01,1.510663820235492727e-01,-1.137198065186692053e-03,6.089767994697716826e-03,1.722212851710678005e-01,-4.893510370576030499e-02
+9.386529341954613637e-01,-1.912828603200459610e-01,1.449284520008997934e-01,-1.089439315610463751e-03,5.983706653219341559e-03,1.716182334112936192e-01,-4.617753819727464515e-02
+9.393736339902262200e-01,-1.903528328872317321e-01,1.389235515844517987e-01,-1.042737001930395691e-03,5.878490349170595319e-03,1.709595164410261936e-01,-4.349594022468322024e-02
+9.400902077330849371e-01,-1.893340764190848957e-01,1.330516926879910800e-01,-9.970896585274251298e-04,5.774125793507615262e-03,1.702474060552501001e-01,-4.088999692763178873e-02
+9.408026522765996491e-01,-1.882310493485909453e-01,1.273127528634944994e-01,-9.524952050963948714e-04,5.670619771929497760e-03,1.694841241790762121e-01,-3.835932470879149098e-02
+9.415109644914688714e-01,-1.870481069872487467e-01,1.217064814251768018e-01,-9.089509584132232992e-04,5.567979133673583297e-03,1.686718412056879557e-01,-3.590347207235720234e-02
+9.422151412665417114e-01,-1.857894999361514032e-01,1.162325051737153891e-01,-8.664536446487697968e-04,5.466210779670113186e-03,1.678126761520606358e-01,-3.352192301121836165e-02
+9.429151795088319687e-01,-1.844593734610499025e-01,1.108903346716127725e-01,-8.249994102955867650e-04,5.365321651509856633e-03,1.669086955672672290e-01,-3.121409914469278415e-02
+9.436110761435310135e-01,-1.830617658744735643e-01,1.056793692481227953e-01,-7.845838337712304896e-04,5.265318718887734666e-03,1.659619148381835940e-01,-2.897936400805519591e-02
+9.443028281140214419e-01,-1.816006087285474135e-01,1.005989024819246014e-01,-7.452019384723490092e-04,5.166208966993410950e-03,1.649742966125075372e-01,-2.681702620356332320e-02
+9.449904323818909546e-01,-1.800797260202371963e-01,9.564812815936511725e-02,-7.068482052327397036e-04,5.067999385031644703e-03,1.639477515616180880e-01,-2.472634140540753284e-02
+9.456738859269452346e-01,-1.785028336713423980e-01,9.082614541342129422e-02,-6.695165833972835986e-04,4.970696952625352875e-03,1.628841390934254174e-01,-2.270651563624430380e-02
+9.463531857472214925e-01,-1.768735402461454609e-01,8.613196321259919408e-02,-6.332005048330555629e-04,4.874308627026598117e-03,1.617852661861181041e-01,-2.075670948886995659e-02
+9.470283288590011228e-01,-1.751953469978727584e-01,8.156450636364204199e-02,-5.978928970553859610e-04,4.778841332100053005e-03,1.606528878991312359e-01,-1.887603922780110161e-02
+9.476993122968236927e-01,-1.734716476266185137e-01,7.712262028019932492e-02,-5.635861950140156045e-04,4.684301944968938258e-03,1.594887088212467163e-01,-1.706357991575583807e-02
+9.483661331134992656e-01,-1.717057292593879014e-01,7.280507541672472105e-02,-5.302723543867243197e-04,4.590697283240031609e-03,1.582943826009083066e-01,-1.531836887316582151e-02
+9.490287883801215019e-01,-1.699007727913896260e-01,6.861057219489337389e-02,-4.979428654122211844e-04,4.498034092185188883e-03,1.570715130260588732e-01,-1.363940791434748237e-02
+9.496872751860808703e-01,-1.680598540809154495e-01,6.453774552447144597e-02,-4.665887661955643909e-04,4.406319032432155607e-03,1.558216538340264634e-01,-1.202566604012104447e-02
+9.503415906390768608e-01,-1.661859444787519846e-01,6.058516962951127072e-02,-4.362006560660228291e-04,4.315558667604597193e-03,1.545463100784810750e-01,-1.047608125079883302e-02
+9.509917318651310847e-01,-1.642819119853526355e-01,5.675136178585819119e-02,-4.067687086330956225e-04,4.225759451653625029e-03,1.532469386080482221e-01,-8.989564090698830626e-03
+9.516376960085998205e-01,-1.623505224222832610e-01,5.303478686490203237e-02,-3.782826867193092601e-04,4.136927716286425451e-03,1.519249487870483473e-01,-7.564999369694755914e-03
+9.522794802321865593e-01,-1.603944409303059426e-01,4.943386140415667596e-02,-3.507319549721486817e-04,4.049069659495491830e-03,1.505817028528857182e-01,-6.201248459701528930e-03
+9.529170817169542174e-01,-1.584162328479029325e-01,4.594695715361138411e-02,-3.241054951771396770e-04,3.962191333115126952e-03,1.492185176653260947e-01,-4.897152261566090937e-03
+9.535504976623380147e-01,-1.564183657679067418e-01,4.257240554640722568e-02,-2.983919193768773187e-04,3.876298631373199192e-03,1.478366644585673850e-01,-3.651532126763376573e-03
+9.541797252861573542e-01,-1.544032102729479272e-01,3.930850060553650605e-02,-2.735794847527081661e-04,3.791397278846183726e-03,1.464373712640813785e-01,-2.463193400494336719e-03
+9.548047618246282564e-01,-1.523730426095019352e-01,3.615350339585250705e-02,-2.496561072117121742e-04,3.707492819114257925e-03,1.450218218751985477e-01,-1.330925757817993417e-03
+9.554256045323753499e-01,-1.503300455093910803e-01,3.310564446215518569e-02,-2.266093758659202412e-04,3.624590604424660946e-03,1.435911584446021572e-01,-2.535070084402308655e-04
+9.560422506824440836e-01,-1.482763103180773645e-01,3.016312776531379983e-02,-2.044265678404828433e-04,3.542695783927528146e-03,1.421464817751483811e-01,7.702962119774890140e-04
+9.566546975663126062e-01,-1.462138386678963342e-01,2.732413350099176241e-02,-1.830946627703439699e-04,3.461813293416640593e-03,1.406888525914521959e-01,1.741725957274185008e-03
+9.572629424939035347e-01,-1.441445445745539733e-01,2.458682152139844007e-02,-1.626003570539846538e-04,3.381947844870222993e-03,1.392192921307212616e-01,2.662031915022696101e-03
+9.578669827935962777e-01,-1.420702561213416648e-01,2.194933381610261400e-02,-1.429300785943104374e-04,3.303103916805741509e-03,1.377387836047703484e-01,3.532468881081621704e-03
+9.584668158122380266e-01,-1.399927175533140500e-01,1.940979771929949790e-02,-1.240700012066416893e-04,3.225285744408463735e-03,1.362482729777233226e-01,4.354295886363292005e-03
+9.590624389151560791e-01,-1.379135910306555357e-01,1.696632802775262239e-02,-1.060060597153502195e-04,3.148497310155397560e-03,1.347486704536853819e-01,5.128773566867901243e-03
+9.596538494861691637e-01,-1.358344591050542061e-01,1.461703028164529697e-02,-8.872396449260142384e-05,3.072742334900794865e-03,1.332408506672465576e-01,5.857163995006566155e-03
+9.602410449275984305e-01,-1.337568262092010407e-01,1.236000262599368932e-02,-7.220921589903356785e-05,2.998024269611552876e-03,1.317256547705384917e-01,6.540728125426087045e-03
+9.608240226602797751e-01,-1.316821211052982832e-01,1.019333846526914063e-02,-5.644711914481633088e-05,2.924346286577122252e-03,1.302038907566581416e-01,7.180724998522237425e-03
+9.614027801235747184e-01,-1.296116986832824003e-01,8.115128389862894670e-03,-4.142279921594362632e-05,2.851711272022226622e-03,1.286763350261238836e-01,7.778409843797648958e-03
+9.619773147753815090e-01,-1.275468422514836331e-01,6.123462807294144383e-03,-2.712121518670796737e-05,2.780121818141386651e-03,1.271437330431002821e-01,8.335033763987259245e-03
+9.625476240921463367e-01,-1.254887655555265713e-01,4.216433296350016041e-03,-1.352717498000631392e-05,2.709580216676606406e-03,1.256068005044071179e-01,8.851841278263369861e-03
+9.631137055688746562e-01,-1.234386147911452486e-01,2.392135183002803110e-03,-6.253499703793493915e-07,2.640088451834550314e-03,1.240662245138556968e-01,9.330070403927149744e-03
+9.636755567191421790e-01,-1.213974708472432135e-01,6.486686497880124903e-04,1.159971048320689637e-05,2.571648194487479236e-03,1.225226643099234930e-01,9.770950229934861550e-03
+9.642331750751055308e-01,-1.193663512327061416e-01,-1.015858858224933520e-03,2.316356426724240950e-05,2.504260796290594566e-03,1.209767525630558865e-01,1.017570115701082502e-02
+9.647865581875129104e-01,-1.173462120445598889e-01,-2.603331433318089271e-03,3.408186266571769032e-05,2.437927284280141851e-03,1.194290965644017044e-01,1.054553310604178923e-02
+9.653357036257157464e-01,-1.153379502548589897e-01,-4.115622962859768683e-03,4.437033586079226576e-05,2.372648355601379710e-03,1.178802787784468536e-01,1.088164496605883123e-02
+9.658806089776785786e-01,-1.133424057973587140e-01,-5.554596055352411209e-03,5.404477857335806683e-05,2.308424373823773045e-03,1.163308577257676502e-01,1.118522300590746137e-02
+9.664212718499893828e-01,-1.113603631544592271e-01,-6.922100092334411425e-03,6.312103590673340486e-05,2.245255364373369405e-03,1.147813697512120035e-01,1.145744113539892868e-02
+9.669576898678714505e-01,-1.093925539657085444e-01,-8.219970536067893255e-03,7.161498974769894171e-05,2.183141011152558032e-03,1.132323288189845401e-01,1.169945901612072514e-02
+9.674898606751921593e-01,-1.074396585648374342e-01,-9.450027399537114930e-03,7.954254431166474281e-05,2.122080653804607822e-03,1.116842282537611336e-01,1.191242194084298975e-02
+9.680177819344745194e-01,-1.055023079927968205e-01,-1.061407413450522579e-02,8.691961298038537590e-05,2.062073284651960627e-03,1.101375415211612169e-01,1.209746012078949558e-02
+9.685414513269068326e-01,-1.035810860777054143e-01,-1.171389669466195958e-02,9.376210468982722939e-05,2.003117546660173297e-03,1.085927228100868058e-01,1.225568783016748285e-02
+9.690608665523531284e-01,-1.016765311506106634e-01,-1.275126270147943809e-02,1.000859103453454943e-04,1.945211731713915228e-03,1.070502082208778516e-01,1.238820253748421953e-02
+9.695760253293631559e-01,-9.978913807360020871e-02,-1.372792019535836253e-02,1.059068896149217934e-04,1.888353779292404862e-03,1.055104162871660783e-01,1.249608498784900157e-02
+9.700869253951824867e-01,-9.791935983275935895e-02,-1.464559703947219092e-02,1.112408582670870922e-04,1.832541275655264704e-03,1.039737492292384796e-01,1.258039822873017501e-02
+9.705935645057625072e-01,-9.606760959424034962e-02,-1.550600014992538356e-02,1.161035752131961895e-04,1.777771453179710160e-03,1.024405932518081819e-01,1.264218712282936564e-02
+9.710959404357700775e-01,-9.423426245046126470e-02,-1.631081495668240330e-02,1.205107295730647592e-04,1.724041190970057201e-03,1.009113193326008767e-01,1.268247758381741211e-02
+9.715940509785973012e-01,-9.241965692007791944e-02,-1.706170417891769014e-02,1.244779285677251606e-04,1.671347015058695720e-03,9.938628438016111621e-02,1.270227724738715383e-02
+9.720878939463715174e-01,-9.062409684418311351e-02,-1.776030784013840624e-02,1.280206857843619261e-04,1.619685099432326353e-03,9.786583155398347056e-02,1.270257396198082485e-02
+9.725774671699647378e-01,-8.884785286967439299e-02,-1.840824248067103250e-02,1.311544088351213272e-04,1.569051267244078555e-03,9.635029126229538443e-02,1.268433592312958810e-02
+9.730627684990029724e-01,-8.709116432291404142e-02,-1.900710080210995731e-02,1.338943878583291017e-04,1.519440992829274462e-03,9.483998133011815879e-02,1.264851113112413529e-02
+9.735437958018759996e-01,-8.535424043421498130e-02,-1.955845096543746067e-02,1.362557842070165467e-04,1.470849403864231755e-03,9.333520828525362911e-02,1.259602762607485611e-02
+9.740205469657464699e-01,-8.363726218332842577e-02,-2.006383669553925622e-02,1.382536194098737535e-04,1.423271283934669654e-03,9.183626737981248622e-02,1.252779233374866179e-02
+9.744930198965594537e-01,-8.194038345491064246e-02,-2.052477636931990604e-02,1.399027640950918216e-04,1.376701075592391644e-03,9.034344380461951929e-02,1.244469193488291318e-02
+9.749612125190515455e-01,-8.026373278992793292e-02,-2.094276305669904789e-02,1.412179282297627337e-04,1.331132883508943151e-03,8.885701268489278049e-02,1.234759206063162397e-02
+9.754251227767596344e-01,-7.860741471210733189e-02,-2.131926442528314300e-02,1.422136502796456710e-04,1.286560478690885502e-03,8.737723977111563900e-02,1.223733685167596730e-02
+9.758847486320304521e-01,-7.697151093477229422e-02,-2.165572227610442976e-02,1.429042873808277793e-04,1.242977302372752849e-03,8.590438225259276750e-02,1.211474931231634050e-02
+9.763400880660293435e-01,-7.535608187482892295e-02,-2.195355221747949392e-02,1.433040060568270730e-04,1.200376470578684920e-03,8.443868889355787832e-02,1.198063150430874260e-02
+9.767911390787489267e-01,-7.376116795964299255e-02,-2.221414403145884758e-02,1.434267731486816514e-04,1.158750779179484663e-03,8.298040045663601649e-02,1.183576353651922576e-02
+9.772378996890179748e-01,-7.218679057202598282e-02,-2.243886120241352089e-02,1.432863463461592278e-04,1.118092709154497062e-03,8.152975070414629488e-02,1.168090421542125265e-02
+9.776803679345105191e-01,-7.063295342633114926e-02,-2.262904086803003978e-02,1.428962663556743553e-04,1.078394431586197650e-03,8.008696649137046764e-02,1.151679098955783395e-02
+9.781185418717538438e-01,-6.909964419976825922e-02,-2.278599417034530075e-02,1.422698480370362267e-04,1.039647814424772727e-03,7.865226728217923624e-02,1.134413922558374054e-02
+9.785524195761373667e-01,-6.758683461023555494e-02,-2.291100548705004777e-02,1.414201730191362745e-04,1.001844428397207897e-03,7.722586747951006025e-02,1.116364366175887600e-02
+9.789819991419210776e-01,-6.609448244911146630e-02,-2.300533321471956438e-02,1.403600824492869722e-04,9.649755530949449215e-04,7.580797499348909674e-02,1.097597702556824678e-02
+9.794072786822439758e-01,-6.462253207429742574e-02,-2.307020951063511183e-02,1.391021694067329424e-04,9.290321843698426266e-04,7.439879260096343705e-02,1.078179067433761990e-02
+9.798282563291319525e-01,-6.317091560158319408e-02,-2.310684029080126156e-02,1.376587724717365355e-04,8.940050407951710774e-04,7.299851790155574560e-02,1.058171482245758716e-02
+9.802449302335066728e-01,-6.173955399156676876e-02,-2.311640550244647291e-02,1.360419693115274403e-04,8.598845716119039645e-04,7.160734337685585593e-02,1.037635831712776027e-02
+9.806572985651929031e-01,-6.032835754600310990e-02,-2.310005907599839078e-02,1.342635707447663068e-04,8.266609638410530887e-04,7.022545746584983595e-02,1.016630907532106720e-02
+9.810653595129272819e-01,-5.893722737366090952e-02,-2.305892919952682793e-02,1.323351152222306546e-04,7.943241503435526096e-04,6.885304373312502169e-02,9.952133972878968518e-03
+9.814691112843658694e-01,-5.756605566400107427e-02,-2.299411834219272990e-02,1.302678636552249915e-04,7.628638178529504200e-04,6.749028218477173824e-02,9.734379258036812005e-03
+9.818685521060921406e-01,-5.621472699857221061e-02,-2.290670361216727915e-02,1.280727946774648269e-04,7.322694151867419933e-04,6.613734855273267765e-02,9.513570388603729763e-03
+9.822636802236245357e-01,-5.488311851895467247e-02,-2.279773684150825444e-02,1.257606000931753623e-04,7.025301617070970782e-04,6.479441563925587844e-02,9.290212428964453317e-03
+9.826544939014246749e-01,-5.357110130990804658e-02,-2.266824489404798554e-02,1.233416809044216695e-04,6.736350555817300929e-04,6.346165230089345632e-02,9.064790074116403640e-03
+9.830409914229045754e-01,-5.227854093321979839e-02,-2.251922971489836792e-02,1.208261436095893200e-04,6.455728834445788496e-04,6.213922394491317586e-02,8.837768200615974540e-03
+9.834231710904342005e-01,-5.100529752848368542e-02,-2.235166854508515571e-02,1.182237973668139025e-04,6.183322286631732232e-04,6.082729375991757781e-02,8.609592147834726344e-03
+9.838010312253490097e-01,-4.975122715524348677e-02,-2.216651446828321950e-02,1.155441510640530618e-04,5.919014803882638193e-04,5.952602154304467014e-02,8.380687413922355972e-03
+9.841745701679575076e-01,-4.851618216386038157e-02,-2.196469659615397918e-02,1.127964106269628612e-04,5.662688430663631199e-04,5.823556427459311569e-02,8.151460064419050952e-03
+9.845437862775482385e-01,-4.730001186032094934e-02,-2.174712012109456666e-02,1.099894770446563342e-04,5.414223459915674897e-04,5.695607606996401506e-02,7.922297422269410430e-03
+9.849086779323972252e-01,-4.610256243089171785e-02,-2.151466670893004141e-02,1.071319449570685414e-04,5.173498523899328983e-04,5.568770945000853928e-02,7.693568150982014672e-03
+9.852692435297746298e-01,-4.492367816834432620e-02,-2.126819476770326969e-02,1.042321014365203848e-04,4.940390684979584354e-04,5.443061410367644459e-02,7.465622598439858315e-03
+9.856254814859526370e-01,-4.376320216521992906e-02,-2.100854000672876601e-02,1.012979250896689200e-04,4.714775541100035263e-04,5.318493658486505432e-02,7.238792623035209331e-03
+9.859773902362116704e-01,-4.262097597544267646e-02,-2.073651561834519499e-02,9.833708517929503137e-05,4.496527322227478586e-04,5.195082188080083624e-02,7.013392151489614493e-03
+9.863249682348473879e-01,-4.149684030128544809e-02,-2.045291227899684663e-02,9.535694185159839442e-05,4.285518982700010221e-04,5.072841299712994856e-02,6.789718098080991387e-03
+9.866682139551777864e-01,-4.039063542631832898e-02,-2.015849879134840039e-02,9.236454657638758291e-05,4.081622298080385381e-04,4.951785096076506687e-02,6.568050102695554941e-03
+9.870071258895497524e-01,-3.930220196621843354e-02,-1.985402245837172841e-02,8.936664263560296017e-05,3.884707965071546379e-04,4.831927417036931127e-02,6.348650790320617930e-03
+9.873417025493456123e-01,-3.823138066697611553e-02,-1.954020931800482669e-02,8.636966602601870520e-05,3.694645702034762445e-04,4.713281947874953942e-02,6.131766309395721148e-03
+9.876719424649894608e-01,-3.717801305734641515e-02,-1.921776441807956232e-02,8.337974695886821478e-05,3.511304342813406599e-04,4.595862162139083967e-02,5.917626810321809268e-03
+9.879978441859542659e-01,-3.614194165947108572e-02,-1.888737229941587353e-02,8.040271158377787053e-05,3.334551939522057210e-04,4.479681342347893724e-02,5.706446547856398703e-03
+9.883194062807676428e-01,-3.512301007061307406e-02,-1.854969733131955900e-02,7.744408396680040366e-05,3.164255854253827656e-04,4.364752620062732624e-02,5.498424272916540081e-03
+9.886366273370181812e-01,-3.412106405250689423e-02,-1.820538398389700760e-02,7.450908849750016835e-05,3.000282864507256421e-04,4.251088822446034282e-02,5.293743754137957870e-03
+9.889495059613619965e-01,-3.313595020538635111e-02,-1.785505714074498221e-02,7.160265274491005414e-05,2.842499255817324964e-04,4.138702767309830610e-02,5.092574233586316841e-03
+9.892580407795287245e-01,-3.216751780891270035e-02,-1.749932247879869979e-02,6.872941067582534925e-05,2.690770915072855261e-04,4.027606960181813467e-02,4.895070765153091558e-03
+9.895622304363274058e-01,-3.121561811342249762e-02,-1.713876714601679160e-02,6.589370587392432159e-05,2.544963434269734886e-04,3.917813765448904806e-02,4.701374001764525509e-03
+9.898620735956522587e-01,-3.028010465160565698e-02,-1.677395961800718163e-02,6.309959513607583731e-05,2.404942199263213000e-04,3.809335382372301682e-02,4.511611528177928918e-03
+9.901575689404893410e-01,-2.936083377620486018e-02,-1.640545030899743031e-02,6.035085289328305410e-05,2.270572488889371747e-04,3.702183775109069153e-02,4.325897784312627420e-03
+9.904487151729213235e-01,-2.845766416186903611e-02,-1.603377229904767720e-02,5.765097501690136245e-05,2.141719567048303539e-04,3.596370793533271853e-02,4.144333774313019997e-03
+9.907355110141340404e-01,-2.757045737094739046e-02,-1.565944110695459413e-02,5.500318337249199730e-05,2.018248772474946686e-04,3.491908091315302359e-02,3.967008567718174721e-03
+9.910179552044214857e-01,-2.669907814459913931e-02,-1.528295516863957884e-02,5.241043094999326221e-05,1.900025614391593113e-04,3.388807092441101232e-02,3.793999479470020267e-03
+9.912960465031916968e-01,-2.584339398727954554e-02,-1.490479638608829815e-02,4.987540705011484600e-05,1.786915862199740400e-04,3.287079087309786180e-02,3.625372118178188559e-03
+9.915697836889719730e-01,-2.500327568480187682e-02,-1.452543052124221669e-02,4.740054240330185902e-05,1.678785634042842122e-04,3.186735151061943772e-02,3.461180724886131922e-03
+9.918391655594147593e-01,-2.417859702113750106e-02,-1.414530725996708191e-02,4.498801497479015609e-05,1.575501485140583453e-04,3.087786209562524281e-02,3.301469129056599193e-03
+9.921041909313020879e-01,-2.336923525690464201e-02,-1.376486108605283129e-02,4.263975564047607701e-05,1.476930494171602886e-04,2.990242960156964946e-02,3.146270183657264366e-03
+9.923648586405513505e-01,-2.257507075731322865e-02,-1.338451097316083541e-02,4.035745426515153659e-05,1.382940349915145562e-04,2.894115949631782814e-02,2.995607413477657452e-03
+9.926211675422206282e-01,-2.179598737427575900e-02,-1.300466129470104031e-02,3.814256616874578518e-05,1.293399433408379154e-04,2.799415508656714902e-02,2.849494375953142474e-03
+9.928731165105130207e-01,-2.103187228991064764e-02,-1.262570186575545997e-02,3.599631839525270076e-05,1.208176902712320517e-04,2.706151785237928542e-02,2.707935644734240678e-03
+9.931207044387818650e-01,-2.028261608927149282e-02,-1.224800852975619671e-02,3.391971649676000578e-05,1.127142772180906243e-04,2.614334733348551432e-02,2.570926766330971624e-03
+9.933639302395360637e-01,-1.954811268775598007e-02,-1.187194323217670086e-02,3.191355122856113244e-05,1.050167992765286147e-04,2.523974127107253204e-02,2.438455172391381081e-03
+9.936027928444443047e-01,-1.882825942550740705e-02,-1.149785461064750658e-02,2.997840577894910082e-05,9.771245280532341732e-05,2.435079542060543042e-02,2.310500116708639768e-03
+9.938372912043398344e-01,-1.812295713425914750e-02,-1.112607832828713637e-02,2.811466258227638383e-05,9.078854325371596391e-05,2.347660340180397071e-02,2.187033086073249775e-03
+9.940674242892252321e-01,-1.743210990420360901e-02,-1.075693710925728838e-02,2.632251090702755720e-05,8.423249247159166671e-05,2.261725709763925257e-02,2.068018761097285482e-03
+9.942931910882769619e-01,-1.675562527392303672e-02,-1.039074150500869133e-02,2.460195412627679673e-05,7.803184602644162393e-05,2.177284624788227568e-02,1.953414597984882636e-03
+9.945145906098497024e-01,-1.609341404632142966e-02,-1.002778980718211331e-02,2.295281737304863249e-05,7.217428026869416159e-05,2.094345873268208327e-02,1.843172000725134055e-03
+9.947316218814804545e-01,-1.544539034014216425e-02,-9.668368857219264423e-03,2.137475506260408414e-05,6.664760924819830568e-05,2.012918040429068772e-02,1.737235801708923158e-03
+9.949442839498933155e-01,-1.481147155888976746e-02,-9.312753808373548658e-03,1.986725869943623693e-05,6.143979144877438918e-05,1.933009506451538462e-02,1.635545695782528782e-03
+9.951525758810034761e-01,-1.419157832225636899e-02,-8.961209067180165438e-03,1.842966466640209614e-05,5.653893641129894513e-05,1.854628450425669858e-02,1.538035451386865048e-03
+9.953564967599211055e-01,-1.358563432426024113e-02,-8.613987908228518467e-03,1.706116209795179256e-05,5.193331101474173064e-05,1.777782867311672454e-02,1.444634597336275908e-03
+9.955560456909556821e-01,-1.299356643465783730e-02,-8.271333528829966433e-03,1.576080095027484885e-05,4.761134569664633265e-05,1.702480538645283203e-02,1.355267397281080055e-03
+9.957512217976196567e-01,-1.241530457239088669e-02,-7.933478843031950556e-03,1.452749967994392974e-05,4.356164048549677722e-05,1.628729045416770979e-02,1.269854174962800272e-03
+9.959420242226324493e-01,-1.185078167552634960e-02,-7.600646772854617074e-03,1.336005365126459944e-05,3.977297086970644525e-05,1.556535762373962087e-02,1.188311693584624034e-03
+9.961284521279242243e-01,-1.129993355425476173e-02,-7.273051079898272889e-03,1.225714290955972323e-05,3.623429341285483657e-05,1.485907873828575670e-02,1.110552508089381442e-03
+9.963105046946395538e-01,-1.076269897815272311e-02,-6.950895919279506154e-03,1.121734028051062000e-05,3.293475126005698496e-05,1.416852345196205863e-02,1.036486698598667938e-03
+9.964881811231409703e-01,-1.023901936245098267e-02,-6.634376614696700647e-03,1.023911959859312390e-05,2.986367942871721114e-05,1.349375969437385803e-02,9.660212993219736326e-04
+9.966614806330126308e-01,-9.728838812789568358e-03,-6.323679976257461609e-03,9.320863524037732159e-06,2.701060978282888644e-05,1.283485346058796557e-02,8.990605695048242956e-04
+9.968304024630633142e-01,-9.232104349056110063e-03,-6.018984213489051750e-03,8.460871635440704765e-06,2.436527613955787988e-05,1.219186826372438935e-02,8.355070076356367875e-04
+9.969949458713304180e-01,-8.748765309325749404e-03,-5.720459649461205956e-03,7.657368409958847030e-06,2.191761903182578271e-05,1.156486612373027953e-02,7.752608473843208619e-04
+9.971551101350828450e-01,-8.278773609536289185e-03,-5.428268581158579575e-03,6.908511057999187795e-06,1.965779016906932063e-05,1.095390695103116638e-02,7.182211378383504411e-04
+9.973108945508237788e-01,-7.822083523423936646e-03,-5.142565669704866199e-03,6.212397601178030333e-06,1.757615692088821565e-05,1.035904882953972601e-02,6.642858133065830239e-04
+9.974622984342947918e-01,-7.378651898616417348e-03,-4.863498523242124928e-03,5.567074468886495102e-06,1.566330656748850616e-05,9.780347482144428548e-03,6.133513846051392299e-04
+9.976093211204778433e-01,-6.948437646650437206e-03,-4.591207367798139961e-03,4.970544279610468817e-06,1.391005048851608361e-05,9.217857098287691661e-03,5.653143227406439179e-04
+9.977519619635988324e-01,-6.531401874763990457e-03,-4.325825882668780237e-03,4.420773487492777672e-06,1.230742784260145955e-05,8.671629959238518620e-03,5.200702411381425208e-04
+9.978902203371300406e-01,-6.127507989920371288e-03,-4.067480890988649234e-03,3.915699824198285166e-06,1.084670956297098056e-05,8.141716116873198850e-03,4.775152060853734550e-04
+9.980240956337933511e-01,-5.736720986144222545e-03,-3.816292824655071435e-03,3.453239933127513338e-06,9.519401708787280408e-06,7.628164605448173145e-03,4.375455766449899632e-04
+9.981535872655622477e-01,-5.359008368580337432e-03,-3.572376328054645885e-03,3.031296435056826571e-06,8.317249012432557793e-06,7.131021586517123471e-03,4.000575654016282892e-04
+9.982786946636650338e-01,-4.994339185706967582e-03,-3.335839791568366141e-03,2.647765127996628392e-06,7.232238240998270458e-06,6.650332043374117784e-03,3.649487866226967521e-04
+9.983994172785870536e-01,-4.642684465226085307e-03,-3.106785981385238194e-03,2.300542115273978128e-06,6.256601216170443058e-06,6.186138846088796994e-03,3.321177293825311439e-04
+9.985157545800730228e-01,-4.304016884212515624e-03,-2.885312241752144031e-03,1.987530594501292145e-06,5.382817876479706515e-06,5.738483255074413800e-03,3.014640125717156266e-04
+9.986277060571295827e-01,-3.978310819645307014e-03,-2.671510565929782408e-03,1.706647564623055681e-06,4.603619063070790429e-06,5.307404714388958521e-03,2.728888660812052815e-04
+9.987352712180274095e-01,-3.665542210473101119e-03,-2.465467708335562780e-03,1.455830451539447255e-06,3.911989239549590146e-06,4.892941001537783399e-03,2.462955144935107281e-04
+9.988384495903031013e-01,-3.365688646769479046e-03,-2.267265786095052846e-03,1.233043423051855739e-06,3.301169077564892413e-06,4.495127954789562320e-03,2.215886225900322022e-04
+9.989372407207620652e-01,-3.078728972927052485e-03,-2.076981902901714645e-03,1.036283549243232362e-06,2.764657866998271338e-06,4.114000115082276643e-03,1.986755533331593937e-04
+9.990316441754796273e-01,-2.804643691976337668e-03,-1.894688636901552389e-03,8.635869251551509009e-07,2.296215812153098987e-06,3.749589869434042343e-03,1.774659848327026076e-04
+9.991216595398032529e-01,-2.543414396914920120e-03,-1.720454331509790056e-03,7.130343463425631678e-07,1.889866252060460025e-06,3.401928422375527158e-03,1.578718756697975172e-04
+9.992072864183545455e-01,-2.295024133554283193e-03,-1.554342886247533050e-03,5.827569563531444280e-07,1.539897629834340417e-06,3.071045024589097363e-03,1.398083463007169440e-04
+9.992885244350309115e-01,-2.059457107960801104e-03,-1.396414296304939037e-03,4.709416180030933114e-07,1.240865529832216701e-06,2.756967434317333758e-03,1.231931351121621923e-04
+9.993653732330070039e-01,-1.836698607879872847e-03,-1.246724440430505487e-03,3.758360643265014741e-07,9.875943825947473918e-07,2.459721981514412931e-03,1.079474420815683351e-04
+9.994378324747364983e-01,-1.626735172701814685e-03,-1.105325412682650957e-03,2.957538874722902748e-07,7.751792452629376594e-07,2.179333169995226974e-03,9.399573086658502303e-05
+9.995059018419534258e-01,-1.429554247641565872e-03,-9.722656868078960804e-04,2.290792008661241901e-07,5.989873394557187879e-07,1.915824250792590036e-03,8.126582264516812024e-05
+9.995695810356738376e-01,-1.245144488634565647e-03,-8.475901103220174256e-04,1.742711225405102311e-07,4.546595777539535790e-07,1.669216579561803556e-03,6.968928707724419535e-05
+9.996288697761966935e-01,-1.073495244864381207e-03,-7.313401876252177995e-04,1.298679900878838228e-07,3.381119064844291873e-07,1.439530520070086885e-03,5.920126900654717203e-05
+9.996837678031056384e-01,-9.145969749582459141e-04,-6.235540054429734511e-04,9.449133012437933857e-08,2.455365711596457151e-07,1.226784601308217489e-03,4.974096332241033412e-05
+9.997342748752695574e-01,-7.684409899849103552e-04,-5.242663981669936366e-04,6.684961034703529577e-08,1.734033237201767020e-07,1.030995941073458355e-03,4.125161743727475837e-05
+9.997803907708440185e-01,-6.350194250793644076e-04,-4.335092111871701273e-04,4.574167470858940454e-08,1.184604863064570160e-07,8.521802451393129451e-04,3.368032732294278057e-05
+9.998221152872722728e-01,-5.143251664124431173e-04,-3.513110494462930188e-04,3.005995513519191915e-08,7.773592113264225598e-08,6.903518944425371439e-04,2.697877596109041090e-05
+9.998594482412856976e-01,-4.063519102345642081e-04,-2.776976634104294907e-04,1.879342967883480850e-08,4.853792215126924754e-08,5.455237889310121137e-04,2.110275373545859824e-05
+9.998923894689052405e-01,-3.110941117152204484e-04,-2.126918760101745970e-04,1.103026164395963603e-08,2.845601532535955110e-08,4.177074023491449584e-04,1.601251651657159116e-05
+9.999209388254417519e-01,-2.285469527285761157e-04,-1.563135655309518673e-04,5.960228782963606721e-09,1.536167519899422237e-08,3.069128058186614775e-04,1.167301578754831288e-05
+9.999450961854965403e-01,-1.587062173448171873e-04,-1.085800918782262395e-04,2.876816059576935398e-09,7.408930812848458131e-09,2.131488687944680348e-04,8.053274298460492197e-06
+9.999648614429623716e-01,-1.015685083271660865e-04,-6.950580241239230183e-05,1.179067087713578288e-09,3.034911746389828074e-09,1.364228249625791002e-04,5.127464331123271171e-06
+9.999802345110231361e-01,-5.713097211520982576e-05,-3.910252678857763643e-05,3.731588654434305282e-10,9.602973493015872819e-10,7.674076406058295114e-05,2.874108337536535870e-06
+9.999912153221552913e-01,-2.539143461237474631e-05,-1.737940420300304942e-05,7.366828647285503959e-11,1.896798212121883235e-10,3.410735776743149507e-05,1.276508016380705142e-06
+9.999978038281271964e-01,-6.348308155885659235e-06,-4.342936922034114525e-06,4.580574948692394836e-12,1.184942236291030780e-11,8.526018966727354561e-06,3.227257789899316567e-07
+1.000000000000000000e+00,-7.239436301828963784e-10,2.953069315321496946e-09,-1.811720345834967971e-16,3.373818578172117256e-17,-1.357394309818077933e-09,5.537004964380798958e-09
diff --git a/examples/TS_channel/box.nmsh b/examples/TS_channel/box.nmsh
new file mode 100644
index 00000000000..230ce148757
Binary files /dev/null and b/examples/TS_channel/box.nmsh differ
diff --git a/examples/advecting_cone/.gitignore b/examples/advecting_cone/.gitignore
new file mode 100644
index 00000000000..afed0735dc9
--- /dev/null
+++ b/examples/advecting_cone/.gitignore
@@ -0,0 +1 @@
+*.csv
diff --git a/examples/advecting_cone/README.md b/examples/advecting_cone/README.md
new file mode 100644
index 00000000000..60bdf376c34
--- /dev/null
+++ b/examples/advecting_cone/README.md
@@ -0,0 +1,41 @@
+# Advecting cone
+
+This is a classical test case for demonstrating the benefit of a high order
+discretization. It can be found in this classical reference
+https://epubs.siam.org/doi/book/10.1137/1.9781611970425 in chapter 11. We
+consider scalar advection of a cone of diameter 1 and height 1, with its center
+initially positioned at (1, 0). The advecting velocity is a vortex with $u=-x
+\pi$ and $v = y  \pi$, meaning that the cone makes a full rotation in 2 seconds.
+
+The case is run for 1 rotation and the shape of the cone is examined. Ideally it
+should be preserved, but in practice numerical error lead to the distortion of
+the shape. The idea is to see how the order of the polynomial basis affects the
+error, while keeping the total number of DoFs roughly the same.
+
+## Case setup
+To keep the advecting velocity constant, we enable `freeze` for the `fluid`. The
+initial velocity distribution is set in the user file, and similarly for the
+scalar. We output generously: 100 data dumps during one rotation, so that one
+can examine the dynamics during post-processing.
+
+To analyze the results, we use the `probes` simcomp to extract the values of the
+scalar along a line aligned with $y$ in the center of the domain. A Python
+script to create the probes is provided.
+
+The mesh is generated with `genmeshbox` with periodic conditions in all
+directions, and a single element across $z$, to mimic a 2D case.
+
+The script `run.sh` can be used to run the study, computing the results for
+several meshes and polynomial orders. We suggest inspecting the script and
+modifying it to your liking. Note that the `change_order.py` script is used to
+change the polynomial order in the case file.
+
+A Python script to plot the results is also provided for convenience.
+
+
+## Expected results
+The following curves will be obtained with the settings in the `run.sh` script.
+One can see that the results improve with higher order, even if the number of
+DoFs stays roughly the same.
+
+<img src="results.png" alt="Results" width="500"/>
diff --git a/examples/advecting_cone/advecting_cone.case b/examples/advecting_cone/advecting_cone.case
new file mode 100755
index 00000000000..9ad04ed7a66
--- /dev/null
+++ b/examples/advecting_cone/advecting_cone.case
@@ -0,0 +1,61 @@
+{
+  "version": 1.0,
+  "case": {
+    "mesh_file": "box.nmsh",
+    "output_at_end": false,
+    "output_boundary": false,
+    "output_checkpoints": false,
+    "end_time": 2,
+    "timestep": 0.001,
+    "numerics": {
+      "time_order": 2,
+      "polynomial_order": 5,
+      "dealias": true
+    },
+    "fluid": {
+      "scheme": "pnpn",
+      "mu": 1,
+      "rho": 1,
+      "freeze": true,
+      "initial_condition": {
+        "type": "user"
+      },
+      "velocity_solver": {
+        "type": "cg",
+        "preconditioner": "jacobi",
+        "projection_space_size": 0,
+        "absolute_tolerance": 1e-07,
+        "max_iterations": 800
+      },
+      "pressure_solver": {
+        "type": "gmres",
+        "preconditioner": "hsmg",
+        "projection_space_size": 4,
+        "absolute_tolerance": 1e-07,
+        "max_iterations": 800
+      },
+      "output_control": "nsamples",
+      "output_value": 100
+    },
+    "scalar": {
+      "enabled": true,
+      "lambda": 1e-16,
+      "cp": 1.0,
+      "initial_condition": {
+        "type": "user"
+      }
+    },
+    "simulation_components": [
+      {
+        "type": "probes",
+        "compute_control": "nsamples",
+        "compute_value": 1,
+        "points_file": "probes.csv",
+        "output_file": "output.csv",
+        "fields": [
+          "s"
+        ]
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/examples/advecting_cone/advecting_cone.f90 b/examples/advecting_cone/advecting_cone.f90
new file mode 100644
index 00000000000..77ec4f8d66f
--- /dev/null
+++ b/examples/advecting_cone/advecting_cone.f90
@@ -0,0 +1,77 @@
+module user
+  use neko
+  implicit none
+
+contains
+  !> Register user defined functions (see user_intf.f90)
+  subroutine user_setup(u)
+    type(user_t), intent(inout) :: u
+    u%scalar_user_ic => set_s_ic
+    u%fluid_user_ic => set_velocity
+  end subroutine user_setup
+
+  !> User initial condition for the scalar
+  subroutine set_s_ic(s, params)
+    type(field_t), intent(inout) :: s
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: cone_radius, mux, muy, x, y, r, theta
+
+    ! Center of the cone
+    mux = 1
+    muy = 0
+
+    cone_radius = 0.5
+
+    do i = 1, s%dof%size()
+       x = s%dof%x(i,1,1,1) - mux
+       y = s%dof%y(i,1,1,1) - muy
+
+       r = sqrt(x**2 + y**2)
+       theta = atan2(y, x)
+
+       ! Check if the point is inside the cone's base
+       if (r > cone_radius) then
+         s%x(i,1,1,1) = 0.0
+       else
+         s%x(i,1,1,1) = 1.0 - r / cone_radius
+       endif
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+  end subroutine set_s_ic
+
+  !> Set the advecting velocity field.
+  subroutine set_velocity(u, v, w, p, params)
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: x, y
+
+    do i = 1, u%dof%size()
+       x = u%dof%x(i,1,1,1)
+       y = u%dof%y(i,1,1,1)
+
+       ! Angular velocity is pi, giving a full rotation in 2 sec
+       u%x(i,1,1,1) = -y*pi
+       v%x(i,1,1,1) = x*pi
+       w%x(i,1,1,1) = 0
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+  end subroutine set_velocity
+
+end module user
diff --git a/examples/advecting_cone/box.nmsh b/examples/advecting_cone/box.nmsh
new file mode 100644
index 00000000000..c9a36ca816f
Binary files /dev/null and b/examples/advecting_cone/box.nmsh differ
diff --git a/examples/advecting_cone/change_order.py b/examples/advecting_cone/change_order.py
new file mode 100644
index 00000000000..c1015afaa7a
--- /dev/null
+++ b/examples/advecting_cone/change_order.py
@@ -0,0 +1,28 @@
+import argparse
+import json
+
+def modify_polynomial_order(json_file_path, new_polynomial_order):
+    # Open the JSON file for reading
+    with open(json_file_path, 'r') as file:
+        data = json.load(file)
+
+    # Update the polynomial_order value under the specified path
+    path = ["case", "numerics", "polynomial_order"]
+    current_node = data
+    for key in path[:-1]:
+        current_node = current_node[key]
+    current_node[path[-1]] = new_polynomial_order
+
+    # Save the modified data back to the JSON file
+    with open(json_file_path, 'w') as file:
+        json.dump(data, file, indent=2)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Modify polynomial order in the case file")
+    parser.add_argument("new_polynomial_order", type=int, help="New polynomial order value")
+
+    args = parser.parse_args()
+
+    new_polynomial_order = args.new_polynomial_order
+
+    modify_polynomial_order("advecting_cone.case", new_polynomial_order)
diff --git a/examples/advecting_cone/create_probes.py b/examples/advecting_cone/create_probes.py
new file mode 100644
index 00000000000..42027cfecc7
--- /dev/null
+++ b/examples/advecting_cone/create_probes.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+x = np.linspace(-0.5, 1.75, 200)
+
+p = np.column_stack((x, np.zeros(x.shape) + 1e-10, 0.05*np.ones(x.shape)))
+
+np.savetxt("probes.csv", p, delimiter=',')
diff --git a/examples/advecting_cone/results.png b/examples/advecting_cone/results.png
new file mode 100755
index 00000000000..5a0d4a19887
Binary files /dev/null and b/examples/advecting_cone/results.png differ
diff --git a/examples/advecting_cone/run.sh b/examples/advecting_cone/run.sh
new file mode 100755
index 00000000000..b1f5aa8d63e
--- /dev/null
+++ b/examples/advecting_cone/run.sh
@@ -0,0 +1,28 @@
+# Compile the case
+makeneko advecting_cone.f90
+# Generate probes
+python create_probes.py
+
+# Remove results if exist
+rm output.csv
+
+# Set order
+python change_order.py 2
+# Generate mesh
+genmeshbox -2 2 -2 2 0 0.1 30 30 1 .true .true. .true.
+# Run the code
+mpirun -n 8 ./neko advecting_cone.case
+# Save result
+mv output.csv output_2.csv
+
+# Order 3
+python change_order.py 3
+genmeshbox -2 2 -2 2 0 0.1 22 22 1 .true .true. .true.
+mpirun -n 8 ./neko advecting_cone.case
+mv output.csv output_3.csv
+
+# Order 5
+python change_order.py 5
+genmeshbox -2 2 -2 2 0 0.1 15 15 1 .true .true. .true.
+mpirun -n 8 ./neko advecting_cone.case
+mv output.csv output_5.csv
diff --git a/examples/cyl_boundary_layer/cyl_bl.f90 b/examples/cyl_boundary_layer/cyl_bl.f90
index a89dc83dab1..d3291b10cbf 100644
--- a/examples/cyl_boundary_layer/cyl_bl.f90
+++ b/examples/cyl_boundary_layer/cyl_bl.f90
@@ -25,8 +25,7 @@ subroutine user_setup(u)
     type(user_t), intent(inout) :: u
     u%fluid_user_ic => user_ic
     u%fluid_user_if => user_inflow_eval
-    !u%user_check => user_do_stuff
-    u%user_mesh_setup => cylinder_deform
+    u%user_dirichlet_update => dirichlet_update
   end subroutine user_setup
  
   subroutine cylinder_deform(msh)
@@ -149,4 +148,84 @@ subroutine user_ic(u, v, w, p, params)
     end do
   end subroutine user_ic
 
+  ! Initial example of using user specified dirichlet bcs
+  ! Note: This subroutine will be called two times, once in the fluid solver, and once
+  ! in the scalar solver (if enabled).
+  !! Parameters:
+  !! -----------
+  !! field_bc_list:     List of fields from which the BC conditions zill be extracted.
+  !!                    If which_solver = "fluid", contains (u,v,w,p).
+  !!                    If which_solver = "scalar", contains (s).
+  !! bc_bc_list:        List of BCs containing field_dirichlet_t BC objects only.
+  !!                    If which_solver = "fluid", contains the bc objects
+  !!                    (d_vel_u, d_vel_v, d_vel_w, d_pres).
+  !!                    If which_solver = "scalar", contains the bc object (d_s).
+  !! coef:              Coef object.
+  !! t:                 Current time.
+  !! tstep:             Current time step.
+  !! which_solver:      Indicates wether the fields provided come from "fluid" or "scalar".
+  subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver)
+    type(field_list_t), intent(inout) :: field_bc_list
+    type(bc_list_t), intent(inout) :: bc_bc_list
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    character(len=*), intent(in) :: which_solver
+
+    integer :: i
+    real(kind=rp) :: y,z
+
+    ! Only do this at the first time step since our BCs are constants.
+    if (tstep .ne. 1) return
+
+    ! Check that we are being called by `fluid`
+    if (trim(which_solver) .eq. "fluid") then
+
+       associate(u => field_bc_list%fields(1)%f, &
+            v => field_bc_list%fields(2)%f, &
+            w => field_bc_list%fields(3)%f, &
+            p => field_bc_list%fields(4)%f)
+
+         !
+         ! Perform operations on u%x, v%x, w%x and p%x here
+         ! Note that we are checking if fields are allocated. If the
+         ! boundary type only contains e.g. "d_vel_u/d_pres", the fields
+         ! v%x and w%x will not be allocated.
+         !
+         ! Here we are applying very simple uniform boundaries (u,v,w) = (1,0,0)
+         ! and nonsensical pressure outlet of p = -1
+         !
+         if (allocated(u%x)) u = 1.0_rp
+         if (allocated(v%x)) v = 0.0_rp
+         if (allocated(w%x)) w = 0.0_rp
+         if (allocated(p%x)) p = -1.0_rp
+
+       end associate
+
+    ! Check that we are being called by `scalar`
+    else if (trim(which_solver) .eq. "scalar") then
+
+       associate( s => field_bc_list%fields(1)%f, &
+            s_bc => bc_bc_list%bc(1)%bcp)
+
+         !
+         ! Perform operations on the scalar field here
+         ! Note that we are checking if the field is allocated, in
+         ! case the boundary is empty.
+         !
+         if (allocated(s%x)) then
+
+            do i = 1, s_bc%msk(0)
+               y = s_bc%dof%y(s_bc%msk(i), 1, 1, 1)
+               z = s_bc%dof%z(s_bc%msk(i), 1, 1, 1)
+               s%x(s_bc%msk(i), 1, 1, 1) = sin(y)*sin(z)
+            end do
+
+         end if
+       end associate
+
+    end if
+
+  end subroutine dirichlet_update
+
 end module user
diff --git a/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case b/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case
new file mode 100644
index 00000000000..282e3a65793
--- /dev/null
+++ b/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case
@@ -0,0 +1,72 @@
+{
+"version": 1.0,
+"case": {
+    "mesh_file": "cyl.nmsh",
+    "output_at_end": false,
+    "output_boundary": true,
+    "output_checkpoints": false,
+    "end_time": 100,
+    "output_directory": "results_dirichlet_bcs",
+    "timestep": 8e-4,
+    "numerics": {
+        "time_order": 3,
+        "polynomial_order": 5,
+        "dealias": true
+    },
+    "fluid": {
+        "scheme": "pnpn",
+        "Re": 500,
+        "source_term": {
+            "type": "noforce"
+        },
+        "initial_condition": {
+            "type": "uniform",
+            "value": [1.0, 0.0, 0.0]
+        },
+        "inflow_condition": {
+            "type": "user",
+        },
+        "velocity_solver": {
+            "type": "cg",
+            "preconditioner": "jacobi",
+            "projection_space_size": 3,
+            "absolute_tolerance": 1e-8,
+            "max_iterations": 800
+        },
+        "pressure_solver": {
+            "type": "gmres",
+            "preconditioner": "hsmg",
+            "projection_space_size": 10,
+            "absolute_tolerance": 1e-5,
+            "max_iterations": 800
+        },
+        "boundary_types": [
+            "d_vel_u/d_vel_v/d_vel_w", 
+            "d_vel_u/d_vel_v/d_vel_w/d_pres",
+            "sym",
+            "w",
+            "on", 
+            "on", 
+            "w"
+        ],
+        "output_control": "nsamples",
+        "output_value": 20
+    },
+    "scalar": {
+	    "enabled": true,
+            "Pe": 0.71,
+            "boundary_types": [
+                "d_s",
+                "d_s",
+                "",
+                "",
+                "",
+                ""
+            ],
+            "initial_condition": {
+                "type": "uniform",
+		"value": 0.0
+            }
+    }
+}
+}
diff --git a/examples/immersed_bunny/.gitignore b/examples/immersed_bunny/.gitignore
new file mode 100644
index 00000000000..1df57c6ce3e
--- /dev/null
+++ b/examples/immersed_bunny/.gitignore
@@ -0,0 +1,7 @@
+# Only include the files needed for the example
+*
+!.gitignore
+!*.case
+!readme.md
+!run.sh
+!bunny.stl
diff --git a/examples/immersed_bunny/bunny.stl b/examples/immersed_bunny/bunny.stl
new file mode 100644
index 00000000000..e879a37fba4
Binary files /dev/null and b/examples/immersed_bunny/bunny.stl differ
diff --git a/examples/immersed_bunny/immersed_bunny.case b/examples/immersed_bunny/immersed_bunny.case
new file mode 100644
index 00000000000..70b04ea172c
--- /dev/null
+++ b/examples/immersed_bunny/immersed_bunny.case
@@ -0,0 +1,95 @@
+{
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_directory": "fields",
+        "output_at_end": true,
+        "output_boundary": true,
+        "output_checkpoints": false,
+        "end_time": 20.0,
+        "timestep": 1e-4,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 8,
+            "dealias": true
+        },
+        "fluid": {
+            "scheme": "pnpn",
+            "Re": 1000.0,
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "inflow_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-8,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-7,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 250,
+            "boundary_types": [
+                "v",
+                "o",
+                "w",
+                "w",
+                "w",
+                "w"
+            ],
+            "source_terms": [
+                {
+                    "type": "brinkman",
+                    "objects": [
+                        {
+                            "type": "boundary_mesh",
+                            "name": "bunny.stl",
+                            "distance_transform": {
+                                "type": "smooth_step",
+                                "value": 0.05
+                            },
+                            "mesh_transform": {
+                                "type": "bounding_box",
+                                "box_min": [
+                                    0.75,
+                                    0.25,
+                                    0.00
+                                ],
+                                "box_max": [
+                                    1.25,
+                                    0.75,
+                                    0.50
+                                ],
+                                "keep_aspect_ratio": true
+                            }
+                        }
+                    ],
+                    "brinkman": {
+                        "limits": [
+                            0.0,
+                            1000.0
+                        ],
+                        "penalty": 1.0
+                    },
+                }
+            ]
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/immersed_bunny/readme.md b/examples/immersed_bunny/readme.md
new file mode 100644
index 00000000000..c48f5b1051d
--- /dev/null
+++ b/examples/immersed_bunny/readme.md
@@ -0,0 +1,19 @@
+# Immersed boundary of a Stanford Bunny
+
+This example illustrates the brinkman condition source term used to simulate an
+immersed boundary. A Stanford Bunny model is placed in side a square duct and
+flow is simulated.
+
+```
+Bunny url:              https://www.thingiverse.com/thing:151081
+Bunny domain:           [ [-24, 85], [-41, 45], [5, 112] ]
+Design domain:          [ [-200, 600], [-100, 100], [0, 200] ]
+```
+
+The run script will construct a design domain as above with 16 x 8 x 8 elements.
+The user can decide to specify another resolution from the terminal.
+
+The user can change how sharp the interface between fluid and solid is by
+changing the `distance_transform.value`, the value specifies the width of the
+transition region from no to full resistance. The size of the Brinkman
+resistance limits should be adjusted if the Reynolds number is modified.
diff --git a/examples/immersed_bunny/run.sh b/examples/immersed_bunny/run.sh
new file mode 100755
index 00000000000..e857c2b2b07
--- /dev/null
+++ b/examples/immersed_bunny/run.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/bash
+
+# ============================================================================ #
+# Define the help function
+
+function help() {
+    echo -e "run.sh [Nx] [Ny] [Nz]"
+    echo -e "  Generate a mesh and run the immersed_bunny case."
+    echo -e "  The input arguments are the number of cells in the x, y, and z"
+    echo -e "  directions, respectively."
+    echo -e ""
+    echo -e "  If no input arguments are provided, the default mesh size is"
+    echo -e "  16x8x8."
+    echo -e ""
+    echo -e "  See Readme for additional details."
+    exit 0
+}
+
+# Search for "-h" or "--help" in the input arguments
+for arg in $@; do
+    if [[ $arg == "-h" ]] || [[ $arg == "--help" ]]; then
+        help
+    fi
+done
+
+# ============================================================================ #
+# Ensure Neko can be found and set default mesh size
+
+if [ "$NEKO_DIR" ]; then
+    export PATH=$NEKO_DIR:$PATH
+fi
+
+if [[ -z $(which neko) ]]; then
+    echo -e "Neko not found." >&2
+    echo -e "Please ensure Neko is installed and in your PATH." >&2
+    echo -e "Alternatively, set the NEKO_DIR environment variable." >&2
+    exit 1
+fi
+
+if [ $# == 0 ]; then
+    Nx=16 && Ny=8 && Nz=8
+elif [ $# == 3 ]; then
+    Nx=$1 && Ny=$2 && Nz=$3
+else
+    echo -e "Invalid number of input arguments." >&2
+    help
+fi
+
+# ============================================================================ #
+# Generate mesh and run case
+
+echo "Generating mesh with dimensions: $Nx $Ny $Nz"
+genmeshbox 0 4 0 1 0 1 $Nx $Ny $Nz .false. .false. .false.
+neko immersed_bunny.case
+
+# End of file
+# ============================================================================ #
diff --git a/examples/immersed_zones/.gitignore b/examples/immersed_zones/.gitignore
new file mode 100644
index 00000000000..11a072298f4
--- /dev/null
+++ b/examples/immersed_zones/.gitignore
@@ -0,0 +1,6 @@
+# Only include the files needed for the example
+*
+!.gitignore
+!*.case
+!readme.md
+!run.sh
diff --git a/examples/immersed_zones/block.case b/examples/immersed_zones/block.case
new file mode 100644
index 00000000000..6539de1e7bf
--- /dev/null
+++ b/examples/immersed_zones/block.case
@@ -0,0 +1,95 @@
+{
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_directory": "block_fields",
+        "output_at_end": false,
+        "output_boundary": false,
+        "output_checkpoints": false,
+        "end_time": 0.01,
+        "timestep": 5e-5,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 8,
+            "dealias": true
+        },
+        "fluid": {
+            "scheme": "pnpn",
+            "Re": 2000.0,
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "inflow_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-8,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-7,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 250,
+            "boundary_types": [
+                "v",
+                "o",
+                "w",
+                "w",
+                "w",
+                "w"
+            ],
+            "source_terms": [
+                {
+                    "type": "brinkman",
+                    "objects": [
+                        {
+                            "type": "point_zone",
+                            "name": "block"
+                        }
+                    ],
+                    "brinkman": {
+                        "limits": [
+                            0.0,
+                            1500.0
+                        ],
+                        "penalty": 1.0
+                    }
+                }
+            ]
+        },
+        "point_zones": [
+            {
+                "name": "block",
+                "geometry": "box",
+                "x_bounds": [
+                    0.90,
+                    1.10
+                ],
+                "y_bounds": [
+                    0.0,
+                    1.0
+                ],
+                "z_bounds": [
+                    0.0,
+                    0.5
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/immersed_zones/block_sphere.case b/examples/immersed_zones/block_sphere.case
new file mode 100644
index 00000000000..83da59c37dc
--- /dev/null
+++ b/examples/immersed_zones/block_sphere.case
@@ -0,0 +1,109 @@
+{
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_directory": "block_sphere_fields",
+        "output_at_end": false,
+        "output_boundary": false,
+        "output_checkpoints": false,
+        "end_time": 0.01,
+        "timestep": 5e-5,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 8,
+            "dealias": true
+        },
+        "fluid": {
+            "scheme": "pnpn",
+            "Re": 2000.0,
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "inflow_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-8,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-7,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 250,
+            "boundary_types": [
+                "v",
+                "o",
+                "w",
+                "w",
+                "w",
+                "w"
+            ],
+            "source_terms": [
+                {
+                    "type": "brinkman",
+                    "objects": [
+                        {
+                            "type": "point_zone",
+                            "name": "block"
+                        },
+                        {
+                            "type": "point_zone",
+                            "name": "ball"
+                        }
+                    ],
+                    "brinkman": {
+                        "limits": [
+                            0.0,
+                            1500.0
+                        ],
+                        "penalty": 1.0
+                    }
+                },
+            ]
+        },
+        "point_zones": [
+            {
+                "name": "ball",
+                "geometry": "sphere",
+                "center": [
+                    1.0,
+                    0.5,
+                    0.5
+                ],
+                "radius": 0.1
+            },
+            {
+                "name": "block",
+                "geometry": "box",
+                "x_bounds": [
+                    0.90,
+                    1.10
+                ],
+                "y_bounds": [
+                    0.0,
+                    1.0
+                ],
+                "z_bounds": [
+                    0.0,
+                    0.5
+                ]
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/immersed_zones/cylinder.case b/examples/immersed_zones/cylinder.case
new file mode 100644
index 00000000000..b897c1f0030
--- /dev/null
+++ b/examples/immersed_zones/cylinder.case
@@ -0,0 +1,94 @@
+{
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_directory": "cylinder_fields",
+        "output_at_end": false,
+        "output_boundary": false,
+        "output_checkpoints": false,
+        "end_time": 20.0,
+        "timestep": 5e-5,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 8,
+            "dealias": true
+        },
+        "fluid": {
+            "scheme": "pnpn",
+            "Re": 2000.0,
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "inflow_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-8,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-7,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 250,
+            "boundary_types": [
+                "v",
+                "o",
+                "w",
+                "w",
+                "w",
+                "w"
+            ],
+            "source_terms": [
+                {
+                    "type": "brinkman",
+                    "objects": [
+                        {
+                            "type": "point_zone",
+                            "name": "cylinder"
+                        }
+                    ],
+                    "brinkman": {
+                        "limits": [
+                            0.0,
+                            1500.0
+                        ],
+                        "penalty": 1.0
+                    }
+                }
+            ]
+        },
+        "point_zones": [
+            {
+                "name": "cylinder",
+                "geometry": "cylinder",
+                "start": [
+                    1.0,
+                    0.0,
+                    0.5
+                ],
+                "end": [
+                    1.0,
+                    1.0,
+                    0.5
+                ],
+                "radius": 0.1
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/immersed_zones/readme.md b/examples/immersed_zones/readme.md
new file mode 100644
index 00000000000..c3a36b631fc
--- /dev/null
+++ b/examples/immersed_zones/readme.md
@@ -0,0 +1,29 @@
+# Immersed Zones
+
+This set of examples highlights the use of the Brinkman body force to model
+immersed zones in a flow. The Brinkman body force is a penalization method
+that is used to model the presence of solid objects in a flow. The method
+applies a force to the fluid in the vicinity of the solid object to mimic the
+effect of the solid object on the flow. The force is proportional to the fluid 
+velocity and is directed opposite to the fluid velocity.
+
+## Examples
+
+All the examples will be conducted in a 3D domain with a size of 4x1x1. The
+domain is discretized with a uniform grid of 32x8x8 elements. The number of
+elements can be modified by using the `-x`, `-y`, and `-z` command line
+arguments. The domain have an inflow boundary condition at the surface at
+x=0, and an outflow boundary condition at the surface at x=4. All other
+boundaries are treated as walls.
+
+The following examples are included in this directory:
+
+- `block`: A simple example of a flow past a block placed as a half height wall
+  half way through the domain.
+- `sphere`: A flow past a sphere with radius 0.1 placed in the middle of the
+  domain.
+- `block_sphere`: A flow past a block and a sphere both with size and placement
+  as above.
+- `cylinder`: A flow past a cylinder with radius 0.1 placed horizontally in the
+  middle of the domain.
+  
\ No newline at end of file
diff --git a/examples/immersed_zones/run.sh b/examples/immersed_zones/run.sh
new file mode 100755
index 00000000000..e32bd6e2320
--- /dev/null
+++ b/examples/immersed_zones/run.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/bash
+
+# ============================================================================ #
+# Define the help function
+
+function help() {
+    echo -e "run.sh case"
+    echo -e "  Generate a mesh and run all the desired case."
+    echo -e "  The input arguments are the number of cells in the x, y, and z"
+    echo -e "  directions, respectively."
+    echo -e ""
+    echo -e "  If no input arguments are provided, the default mesh size is"
+    echo -e "  32x8x8."
+    echo -e ""
+    echo -e "  Example usage:"
+    echo -e "    run.sh -x32 -y8 -z8 case_name.case"
+    echo -e ""
+    echo -e " Options:"
+    echo -e "  -h, --help  Show this help message and exit."
+    echo -e "  -x#         Number of cells in the x direction."
+    echo -e "  -y#         Number of cells in the y direction."
+    echo -e "  -z#         Number of cells in the z direction."
+    echo -e "  -a, --all   Run all cases."
+    echo -e "  -q, --quiet Suppress output."
+    echo -e ""
+    echo -e "  See Readme for additional details."
+    exit 0
+}
+if [ $# -eq 0 ]; then help; fi
+
+# Handle options
+Nx=32 && Ny=8 && Nz=8
+for arg in "$@"; do
+    if [ "${arg:0:2}" == "--" ]; then
+        case ${arg:2} in
+        help) help ;;
+        all) ALL=1 ;;
+        quiet) QUIET=1 ;;
+        *) echo -e "Invalid option: $arg" >&2 && help ;;
+        esac
+    elif [ "${arg:0:1}" == "-" ]; then
+        case ${arg:1:1} in
+        h) help ;;
+        x) Nx=${arg:2} ;;
+        y) Ny=${arg:2} ;;
+        z) Nz=${arg:2} ;;
+        a) ALL=1 ;;
+        q) QUIET=1 ;;
+        *) echo -e "Invalid option: ${arg:1}" >&2 && help ;;
+        esac
+    else
+        cases="$cases $arg"
+    fi
+done
+
+if [ "$ALL" ]; then
+    cases=$(find $(dirname $0) -name "*.case")
+fi
+
+# ============================================================================ #
+# Ensure Neko can be found and set default mesh size
+
+if [ "$NEKO_DIR" ]; then
+    PATH=$NEKO_DIR/bin:$PATH
+fi
+
+if [[ -z $(which neko) ]]; then
+    echo -e "Neko not found." >&2
+    echo -e "Please ensure Neko is installed and in your PATH." >&2
+    echo -e "Alternatively, set the NEKO_DIR environment variable." >&2
+    exit 1
+fi
+
+# ============================================================================ #
+# Generate mesh and run case
+
+echo "Generating mesh with dimensions: $Nx $Ny $Nz"
+genmeshbox 0 4 0 1 0 1 $Nx $Ny $Nz .false. .false. .false.
+
+for case in $cases; do
+    echo "Running case: $case"
+    if [ "$QUIET" ]; then
+        neko $case >/dev/null
+    else
+        neko $case >${case%.case}.log
+    fi
+done
+
+# End of file
+# ============================================================================ #
diff --git a/examples/immersed_zones/sphere.case b/examples/immersed_zones/sphere.case
new file mode 100644
index 00000000000..b6395db583f
--- /dev/null
+++ b/examples/immersed_zones/sphere.case
@@ -0,0 +1,89 @@
+{
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_directory": "sphere_fields",
+        "output_at_end": false,
+        "output_boundary": false,
+        "output_checkpoints": false,
+        "end_time": 20.0,
+        "timestep": 5e-5,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 8,
+            "dealias": true
+        },
+        "fluid": {
+            "scheme": "pnpn",
+            "Re": 2000.0,
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "inflow_condition": {
+                "type": "uniform",
+                "value": [
+                    1.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-8,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-7,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 250,
+            "boundary_types": [
+                "v",
+                "o",
+                "w",
+                "w",
+                "w",
+                "w"
+            ],
+            "source_terms": [
+                {
+                    "type": "brinkman",
+                    "objects": [
+                        {
+                            "type": "point_zone",
+                            "name": "ball"
+                        }
+                    ],
+                    "brinkman": {
+                        "limits": [
+                            0.0,
+                            1500.0
+                        ],
+                        "penalty": 1.0
+                    }
+                }
+            ]
+        },
+        "point_zones": [
+            {
+                "name": "ball",
+                "geometry": "sphere",
+                "center": [
+                    1.0,
+                    0.5,
+                    0.5
+                ],
+                "radius": 0.1
+            }
+        ]
+    }
+}
\ No newline at end of file
diff --git a/examples/poisson/driver.f90 b/examples/poisson/driver.f90
index 128da2a63b3..400f21350f1 100644
--- a/examples/poisson/driver.f90
+++ b/examples/poisson/driver.f90
@@ -52,7 +52,7 @@ program poisson
 
   n = Xh%lx * Xh%ly * Xh%lz * msh%nelv
 
-  call dir_bc%init(dm)
+  call dir_bc%init(coef)
   call dir_bc%set_g(real(0.0d0,rp))
  
   !user specified
@@ -61,7 +61,7 @@ program poisson
   call dir_bc%finalize()
   call bc_list_init(bclst)
   call bc_list_add(bclst,dir_bc)
-  call solver%init(n, abs_tol = tol)
+  call solver%init(n, niter, abs_tol = tol)
 
   allocate(f(n))
 
diff --git a/examples/rayleigh-benard-cylinder/rayleigh.case b/examples/rayleigh-benard-cylinder/rayleigh.case
index caafd9b2070..f60ea65040c 100644
--- a/examples/rayleigh-benard-cylinder/rayleigh.case
+++ b/examples/rayleigh-benard-cylinder/rayleigh.case
@@ -1,66 +1,78 @@
 {
-"version": 1.0,
-"case": {
-    "mesh_file": "cylinder.nmsh",
-    "output_at_end": true,
-    "output_boundary": true,
-    "end_time": 250,
-    "timestep": 2e-3,
-    "output_checkpoints": true,
-    "checkpoint_control": "simulationtime",
-    "checkpoint_value": 50,
-    "job_timelimit": "12:00:00",
-    "numerics": {
-        "time_order": 3,
-        "polynomial_order": 7,
-        "dealias": true
-    },
-    "fluid": {
-        "scheme": "pnpn",
-        "Ra": 1e8,
-        "source_terms": 
-        [
-            {
-                "type": "user_vector"
-            }
-        ],
-        "initial_condition": {
-            "type": "user"
+    "version": 1.0,
+    "case": {
+        "mesh_file": "cylinder.nmsh",
+        "output_at_end": true,
+        "output_boundary": true,
+        "end_time": 250,
+        "timestep": 2e-3,
+        "output_checkpoints": true,
+        "checkpoint_control": "simulationtime",
+        "checkpoint_value": 50,
+        "job_timelimit": "12:00:00",
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 7,
+            "dealias": true
         },
-        "velocity_solver": {
-            "type": "cg",
-            "preconditioner": "jacobi",
-            "projection_space_size": 0,
-            "absolute_tolerance": 1e-6,
-            "max_iterations": 800
+        "fluid": {
+            "scheme": "pnpn",
+            "Ra": 1e8,
+            "source_terms": [
+                {
+                    "type": "boussinesq",
+                    "g" : [0, 0, 1],
+                    "reference_value": 0,
+                    "beta": 1
+                }
+            ],
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    0.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-6,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 0,
+                "absolute_tolerance": 1e-4,
+                "max_iterations": 800
+            },
+            "output_control": "nsamples",
+            "output_value": 10,
+            "boundary_types": [
+                "w"
+            ]
         },
-        "pressure_solver": {
-            "type": "gmres",
-            "preconditioner": "hsmg",
-            "projection_space_size": 0,
-            "absolute_tolerance": 1e-4,
-            "max_iterations": 800
+        "scalar": {
+            "enabled": true,
+            "Pr": 1.0,
+            "initial_condition": {
+                "type": "user"
+            }
         },
-        "output_control": "simulationtime",
-        "output_value": 1,
-        "boundary_types": [
-            "w"
+        "simulation_components": [
+            {
+                "type": "probes",
+                "compute_control": "simulationtime",
+                "compute_value": 1,
+                "points_file": "probes.csv",
+                "output_file": "output.csv",
+                "fields": [
+                    "w",
+                    "s"
+                ]
+            }
         ]
-    },
-    "scalar": {
-        "enabled": true,
-        "Pr": 1.0
-    },
-    "simulation_components": 
-    [
-      {
-        "type": "probes",
-        "compute_control": "simulationtime",
-        "compute_value"    : 1,
-        "points_file":  "probes.csv",
-        "output_file":  "output.csv",
-        "fields": ["w","s"]
-       }
-     ]
-}
-}
+    }
+}
\ No newline at end of file
diff --git a/examples/rayleigh-benard-cylinder/rayleigh.f90 b/examples/rayleigh-benard-cylinder/rayleigh.f90
index 19cd856aa86..1c85d608963 100644
--- a/examples/rayleigh-benard-cylinder/rayleigh.f90
+++ b/examples/rayleigh-benard-cylinder/rayleigh.f90
@@ -13,9 +13,8 @@ module user
   ! Register user defined functions (see user_intf.f90)
   subroutine user_setup(u)
     type(user_t), intent(inout) :: u
-    u%fluid_user_ic => set_initial_conditions_for_u_and_s
+    u%scalar_user_ic => set_initial_conditions_for_s
     u%scalar_user_bc => set_scalar_boundary_conditions
-    u%fluid_user_f_vector => set_bousinesq_forcing_term
     u%material_properties => set_material_properties
   end subroutine user_setup
 
@@ -36,8 +35,8 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params)
     rho = 1.0_rp
     cp = 1.0_rp
   end subroutine set_material_properties
-   
- 
+
+
   subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
     real(kind=rp), intent(inout) :: s
     real(kind=rp), intent(in) :: x
@@ -55,33 +54,23 @@ subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie
 
     !> Variables for bias
     real(kind=rp) :: arg, bias
-    
+
     ! This will be used on all zones without labels
     ! e.g. the ones hardcoded to 'v', 'w', etcetc
     s = 1.0_rp - z
 
   end subroutine set_scalar_boundary_conditions
 
-  subroutine set_initial_conditions_for_u_and_s(u, v, w, p, params)
-    type(field_t), intent(inout) :: u
-    type(field_t), intent(inout) :: v
-    type(field_t), intent(inout) :: w
-    type(field_t), intent(inout) :: p
+  subroutine set_initial_conditions_for_s(s, params)
+    type(field_t), intent(inout) :: s
     type(json_file), intent(inout) :: params
-    type(field_t), pointer :: s
     integer :: i, j, k, e
     real(kind=rp) :: rand, r,z
-    s => neko_field_registry%get_field('s')
-
-    !> Initialize with zero velocity
-    call rzero(u%x,u%dof%size())
-    call rzero(v%x,v%dof%size())
-    call rzero(w%x,w%dof%size())
 
     !> Initialize with rand perturbations on temperature
-    call rzero(s%x,w%dof%size())
+    call rzero(s%x,s%dof%size())
     do i = 1, s%dof%size()
-       s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1) 
+       s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1)
     end do
     ! perturb not on element boundaries
     ! Maybe not necessary, but lets be safe
@@ -102,30 +91,9 @@ subroutine set_initial_conditions_for_u_and_s(u, v, w, p, params)
        end do
     end do
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE)
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
-  end subroutine set_initial_conditions_for_u_and_s
-
-  subroutine set_bousinesq_forcing_term(f, t)
-    class(fluid_user_source_term_t), intent(inout) :: f
-    real(kind=rp), intent(in) :: t
-    integer :: i
-    type(field_t), pointer :: u, v, w, s
-    real(kind=rp) :: rapr, ta2pr
-    u => neko_field_registry%get_field('u')
-    v => neko_field_registry%get_field('v')
-    w => neko_field_registry%get_field('w')
-    s => neko_field_registry%get_field('s')
-
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_rzero(f%u_d,f%dm%size())
-       call device_rzero(f%v_d,f%dm%size())
-       call device_copy(f%w_d,s%x_d,f%dm%size())
-    else
-       call rzero(f%u,f%dm%size())
-       call rzero(f%v,f%dm%size())
-       call copy(f%w,s%x,f%dm%size())
-    end if
-  end subroutine set_bousinesq_forcing_term
+  end subroutine set_initial_conditions_for_s
 end module user
diff --git a/examples/rayleigh-benard/rayleigh.case b/examples/rayleigh-benard/rayleigh.case
index fc93138b7ea..0681be28e8b 100644
--- a/examples/rayleigh-benard/rayleigh.case
+++ b/examples/rayleigh-benard/rayleigh.case
@@ -1,64 +1,71 @@
 {
-"version": 1.0,
-"case": {
-    "mesh_file": "box.nmsh",
-    "output_at_end": true,
-    "output_boundary": true,
-    "end_time": 100,
-    "timestep": 0.01,
-    "numerics": {
-        "time_order": 3,
-        "polynomial_order": 7,
-        "dealias": true
-    },
-    "fluid": {
-        "scheme": "pnpn",
-        "Ra": 1750,
-        "source_terms": 
-        [
-            {
-                "type": "user_vector"
-            }
-        ],
-        "initial_condition": {
-            "type": "user"
-        },
-        "velocity_solver": {
-            "type": "cg",
-            "preconditioner": "jacobi",
-            "projection_space_size": 4,
-            "absolute_tolerance": 1e-6,
-            "max_iterations": 800
+    "version": 1.0,
+    "case": {
+        "mesh_file": "box.nmsh",
+        "output_at_end": true,
+        "output_boundary": true,
+        "end_time": 100,
+        "timestep": 0.01,
+        "numerics": {
+            "time_order": 3,
+            "polynomial_order": 7,
+            "dealias": true
         },
-        "pressure_solver": {
-            "type": "gmres",
-            "preconditioner": "hsmg",
-            "projection_space_size": 4,
-            "absolute_tolerance": 1e-6,
-            "max_iterations": 800
+        "fluid": {
+            "scheme": "pnpn",
+            "Ra": 1750,
+            "source_terms": [
+                {
+                    "type": "user_vector"
+                }
+            ],
+            "initial_condition": {
+                "type": "uniform",
+                "value": [
+                    0.0,
+                    0.0,
+                    0.0
+                ]
+            },
+            "velocity_solver": {
+                "type": "cg",
+                "preconditioner": "jacobi",
+                "projection_space_size": 4,
+                "absolute_tolerance": 1e-6,
+                "max_iterations": 800
+            },
+            "pressure_solver": {
+                "type": "gmres",
+                "preconditioner": "hsmg",
+                "projection_space_size": 4,
+                "absolute_tolerance": 1e-6,
+                "max_iterations": 800
+            },
+            "output_control": "simulationtime",
+            "output_value": 0.1,
+            "boundary_types": [
+                "",
+                "",
+                "",
+                "",
+                "w",
+                "w"
+            ]
         },
-        "output_control": "simulationtime",
-        "output_value": 0.1,
-        "boundary_types": [
-            "",
-            "",
-            "",
-            "",
-            "w",
-            "w"
-        ]
-    },
-    "scalar": {
-        "enabled": true,
-        "Pr": 0.71,
-        "boundary_types": [
-            "",
-            "",
-            "",
-            "",
-            "d=1",
-            "d=0"
-        ]
+        "scalar": {
+            "enabled": true,
+            "Pr": 0.71,
+            "boundary_types": [
+                "",
+                "",
+                "",
+                "",
+                "d=1",
+                "d=0"
+            ],
+            "initial_condition": {
+                "type": "user"
+            }
+        }
     }
-}
-}
+}
\ No newline at end of file
diff --git a/examples/rayleigh-benard/rayleigh.f90 b/examples/rayleigh-benard/rayleigh.f90
index f580f5099cf..9b43b10f225 100644
--- a/examples/rayleigh-benard/rayleigh.f90
+++ b/examples/rayleigh-benard/rayleigh.f90
@@ -10,7 +10,7 @@ module user
   ! Register user defined functions (see user_intf.f90)
   subroutine user_setup(u)
     type(user_t), intent(inout) :: u
-    u%fluid_user_ic => set_ic
+    u%scalar_user_ic => set_ic
     u%fluid_user_f_vector => forcing
     u%scalar_user_bc => scalar_bc
     u%material_properties => set_material_properties
@@ -27,7 +27,7 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params)
     call json_get(params, "case.scalar.Pr", Pr)
 
     Re = 1.0_rp / Pr
-    
+
     mu = 1.0_rp / Re
     lambda = mu / Pr
     rho = 1.0_rp
@@ -48,27 +48,18 @@ subroutine scalar_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
     integer, intent(in) :: ie
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
-    ! If we set scalar_bcs(*) = 'user' instead 
+    ! If we set scalar_bcs(*) = 'user' instead
     ! this will be used instead on that zone
     s = 1.0_rp-z
   end subroutine scalar_bc
- 
+
   !> User initial condition
-  subroutine set_ic(u, v, w, p, params)
-    type(field_t), intent(inout) :: u
-    type(field_t), intent(inout) :: v
-    type(field_t), intent(inout) :: w
-    type(field_t), intent(inout) :: p
+  subroutine set_ic(s, params)
+    type(field_t), intent(inout) :: s
     type(json_file), intent(inout) :: params
-    type(field_t), pointer :: s
     integer :: i, e, k, j
     real(kind=rp) :: rand, z
-    s => neko_field_registry%get_field('s')
 
-    call rzero(u%x,u%dof%size())
-    call rzero(v%x,v%dof%size())
-    call rzero(w%x,w%dof%size())
-    
     do i = 1, s%dof%size()
        s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1)
     end do
@@ -92,9 +83,10 @@ subroutine set_ic(u, v, w, p, params)
        end do
     end do
 
-    if ((NEKO_BCKND_CUDA .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) &
+    if ((NEKO_BCKND_DEVICE .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) &
        .or. (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE)
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
 
diff --git a/examples/scalar_mms/.gitignore b/examples/scalar_mms/.gitignore
new file mode 100644
index 00000000000..00aa28d964f
--- /dev/null
+++ b/examples/scalar_mms/.gitignore
@@ -0,0 +1,2 @@
+*.csv
+*.nmsh
diff --git a/examples/scalar_mms/README.md b/examples/scalar_mms/README.md
new file mode 100644
index 00000000000..784b0e27f61
--- /dev/null
+++ b/examples/scalar_mms/README.md
@@ -0,0 +1,53 @@
+# Method of manufactured solutions for the scalar
+
+
+## The method of manufactured solutions
+
+The MMS is a verification technique based on the following. Consider some
+equation $L(s) = 0$. We come up with some desired analytical solution $\hat
+s(x,t)$, and  then analytically compute $\hat f = L(\hat s)$. If we now set
+$\hat f$ as the source term: $L(s) = \hat f$, we expect the solution to follow
+$\hat s(x, t)$.
+Initial conditions are obtained as $\hat s(x, 0)$ and boundary conditions should
+be selected to be compatible with the behaviour of $\hat s$.
+
+## The case
+
+Here we provide a very simple example, where $\hat s(x, t) = \sin(x)$ is used
+for the scalar advection-diffusion equation. The domain is fully periodic with
+10 elements across $x$ and 1 in the other directions. The idea is to make this
+example more interesting later. An additional purpose is to provide a test case
+for scalar source terms.
+
+The advecting velocity is set to 1 in $x$. The source term $\hat f$ is easily
+obtained as $\rho c_p u \cdot \cos(x) - \lambda \sin(x)$.
+Here, we set $\rho = c_p =u = 1$, and $\lambda = 0.01$.
+
+
+## Case setup
+To keep the advecting velocity constant, we enable `freeze` for the `fluid`. The
+initial velocity distribution is set in the user file, and similarly for the
+scalar.
+
+The source term is split into two components for testing purposes. The user
+source term is used to set the source to $\hat f - 1$ and a constant source term
+is additionally added, equal to $1$. Therefore, the sum is equal to $\hat f$.
+
+To analyze the results, we use the `probes` simcomp to extract the values of the
+scalar along a line across $x$ in the center of the domain. A Python script to
+create the probes is provided.
+
+The mesh is generated with `genmeshbox` with periodic conditions in all
+directions, and a single element across $z$ and $y$, to mimic a 1D case.
+
+The script `run.sh` can be used to run the study. We suggest inspecting the
+script and the case file and modifying them to your liking.
+
+A Python script to plot the results is also provided for convenience.
+
+## Expected results
+The following curves will be obtained with the settings in the `run.sh` script.
+One can see that the solution in the end is the same as the initial conditions,
+which is expected based on our form of $\hat s$.
+
+<img src="results.png" alt="Results" width="500"/>
diff --git a/examples/scalar_mms/create_probes.py b/examples/scalar_mms/create_probes.py
new file mode 100644
index 00000000000..22881c6cde7
--- /dev/null
+++ b/examples/scalar_mms/create_probes.py
@@ -0,0 +1,7 @@
+import numpy as np
+
+x = np.linspace(1e-6, 2*np.pi - 1e-6, 200)
+
+p = np.column_stack((x, np.zeros(x.shape) + 1e-10, 0.05*np.ones(x.shape)))
+
+np.savetxt("probes.csv", p, delimiter=',')
diff --git a/examples/scalar_mms/plot.py b/examples/scalar_mms/plot.py
new file mode 100644
index 00000000000..a8e3bea214d
--- /dev/null
+++ b/examples/scalar_mms/plot.py
@@ -0,0 +1,32 @@
+import matplotlib.pyplot as plt
+import numpy as np
+from os.path import join
+
+DATA_PATH = "."
+
+n_probes = 200
+
+
+# %% read data
+data = np.genfromtxt(join(DATA_PATH, "output.csv"), skip_header=n_probes+1, delimiter=",")
+
+
+x = np.linspace(0, np.pi * 2, n_probes)
+ic = np.sin(x)
+fig, (ax1, ax2) = plt.subplots(1, 2)
+
+ax1.plot(x, ic, '--k', label="ICs")
+ax1.plot(x, data[200:, 1], label=r"Final solution")
+
+ax1.set_xlabel(r"$x$")
+ax1.legend()
+ax1.set_xlim(0, np.pi * 2)
+
+
+
+ax2.plot(x, ic - data[200:, 1], '--k', label="Absolute error")
+ax2.set_xlabel(r"$x$")
+ax2.set_ylabel(r"$absolute error$")
+
+plt.tight_layout()
+plt.savefig(join(DATA_PATH, "results.png"), dpi=300)
diff --git a/examples/scalar_mms/results.png b/examples/scalar_mms/results.png
new file mode 100644
index 00000000000..fdefb219b4f
Binary files /dev/null and b/examples/scalar_mms/results.png differ
diff --git a/examples/scalar_mms/run.sh b/examples/scalar_mms/run.sh
new file mode 100755
index 00000000000..985d5f2e385
--- /dev/null
+++ b/examples/scalar_mms/run.sh
@@ -0,0 +1,12 @@
+# Compile the case
+makeneko scalar_mms.f90
+# Generate probes
+python create_probes.py
+
+# Remove results if exist
+rm output.csv
+
+# Generate mesh
+genmeshbox 0 1 0 1 0 1 20 1 1 .false. .true. .true.
+# Run the code
+./neko scalar_mms.case
diff --git a/examples/scalar_mms/scalar_mms.case b/examples/scalar_mms/scalar_mms.case
new file mode 100755
index 00000000000..0a7da6072f9
--- /dev/null
+++ b/examples/scalar_mms/scalar_mms.case
@@ -0,0 +1,75 @@
+{
+  "version": 1,
+  "case": {
+    "mesh_file": "box.nmsh",
+    "output_at_end": false,
+    "output_boundary": false,
+    "output_checkpoints": false,
+    "end_time": 0.1,
+    "timestep": 0.001,
+    "numerics": {
+      "time_order": 2,
+      "polynomial_order": 5,
+      "dealias": true
+    },
+    "fluid": {
+      "scheme": "pnpn",
+      "mu": 1,
+      "rho": 1,
+      "freeze": true,
+      "initial_condition": {
+        "type": "user"
+      },
+      "velocity_solver": {
+        "type": "cg",
+        "preconditioner": "jacobi",
+        "projection_space_size": 0,
+        "absolute_tolerance": 1e-7,
+        "max_iterations": 800
+      },
+      "pressure_solver": {
+        "type": "gmres",
+        "preconditioner": "hsmg",
+        "projection_space_size": 4,
+        "absolute_tolerance": 1e-7,
+        "max_iterations": 800
+      },
+      "output_control": "nsamples",
+      "output_value": 1
+    },
+    "scalar": {
+      "enabled": true,
+      "lambda": 0.01,
+      "cp": 1,
+      "initial_condition": {
+        "type": "user"
+      },
+      "source_terms": [
+        {
+          "type": "user_vector"
+        },
+        {
+          "type": "constant",
+          "values": [
+            "1.0"
+          ]
+        }
+      ],
+      "boundary_types": [
+          "d=0", "n=0.01"
+      ]
+    },
+    "simulation_components": [
+      {
+        "type": "probes",
+        "compute_control": "nsamples",
+        "compute_value": 1,
+        "points_file": "probes.csv",
+        "output_file": "output.csv",
+        "fields": [
+          "s"
+        ]
+      }
+    ]
+  }
+}
diff --git a/examples/scalar_mms/scalar_mms.f90 b/examples/scalar_mms/scalar_mms.f90
new file mode 100644
index 00000000000..0e6eba35c4a
--- /dev/null
+++ b/examples/scalar_mms/scalar_mms.f90
@@ -0,0 +1,97 @@
+module user
+  use neko
+  implicit none
+
+contains
+  !> Register user defined functions (see user_intf.f90)
+  subroutine user_setup(u)
+    type(user_t), intent(inout) :: u
+    u%scalar_user_ic => set_s_ic
+    u%fluid_user_ic => set_velocity
+    u%scalar_user_f_vector => set_source
+    u%user_mesh_setup => user_mesh_scale
+  end subroutine user_setup
+
+  ! Stretch bounds to 2pi
+  subroutine user_mesh_scale(msh)
+    type(mesh_t), intent(inout) :: msh
+    integer :: i, p, nvert
+
+    nvert = size(msh%points)
+    do i = 1, nvert
+       msh%points(i)%x(1) = msh%points(i)%x(1)* pi *2
+       msh%points(i)%x(2) = msh%points(i)%x(2)* pi *2
+    end do
+  end subroutine user_mesh_scale
+
+  !> Set source term
+  subroutine set_source(f, t)
+    class(scalar_user_source_term_t), intent(inout) :: f
+    real(kind=rp), intent(in) :: t
+    real(kind=rp) :: x, y
+    integer :: i
+
+    do i = 1, f%dm%size()
+       x = f%dm%x(i,1,1,1)
+       y = f%dm%y(i,1,1,1)
+
+       ! 0.01 is the viscosity
+       f%s(i,1,1,1) = cos(x) - 0.01 * sin(x) - 1.0_rp
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(f%s, f%s_d, f%dm%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine set_source
+
+  !> User initial condition for the scalar
+  subroutine set_s_ic(s, params)
+    type(field_t), intent(inout) :: s
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: x, y
+
+    do i = 1, s%dof%size()
+       x = s%dof%x(i,1,1,1)
+       y = s%dof%y(i,1,1,1)
+
+       s%x(i,1,1,1) = sin(x)
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+  end subroutine set_s_ic
+
+  !> Set the advecting velocity field.
+  subroutine set_velocity(u, v, w, p, params)
+    type(field_t), intent(inout) :: u
+    type(field_t), intent(inout) :: v
+    type(field_t), intent(inout) :: w
+    type(field_t), intent(inout) :: p
+    type(json_file), intent(inout) :: params
+    integer :: i, e, k, j
+    real(kind=rp) :: x, y
+
+    do i = 1, u%dof%size()
+
+      ! Simple advection in x
+       u%x(i,1,1,1) = 1
+       v%x(i,1,1,1) = 0
+       w%x(i,1,1,1) = 0
+    end do
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+  end subroutine set_velocity
+
+end module user
diff --git a/examples/turb_channel/README.md b/examples/turb_channel/README.md
index 30fb9b5b115..4c00979885d 100644
--- a/examples/turb_channel/README.md
+++ b/examples/turb_channel/README.md
@@ -1,3 +1,9 @@
 #Simple turbulent channel case at Re_tau=180
 The channel starts with a slightly perturbed initial condition which becomes turbulent after around 10 time units.
-If one wants to change the mesh and play around, please do so, by generating a new mesh with genmeshbox. 
+If one wants to change the mesh and play around, please do so, by generating a new mesh with genmeshbox.
+To compute thte spatial and temporal averages of the output fields, mean_fields and stats one can use the following contrib scripts:
+- average_fields_in_time, averages a fld series in time
+- average_field_in_space, for channelflow we can directly average in xz
+- postprocess_fluid_stats, computes the mean gradients and the reynolds stresses. 
+
+To see the options for each postprocessing script, run it without any command line arguments, for example: ./average_field_in_space
diff --git a/m4/ax_compiler_vendor.m4 b/m4/ax_compiler_vendor.m4
index 1dd168697b4..616ba69a0f0 100644
--- a/m4/ax_compiler_vendor.m4
+++ b/m4/ax_compiler_vendor.m4
@@ -58,7 +58,7 @@
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.
 
-#serial 30
+#serial 33
 
 AC_DEFUN([AX_COMPILER_VENDOR], [dnl
     AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [dnl
@@ -79,6 +79,7 @@ AC_DEFUN([AX_COMPILER_VENDOR], [dnl
 		sdcc:		SDCC,__SDCC
 		sx:		_SX
 		sx-aurora:	__ve__
+                nvhpc:          __NVCOMPILER
 		portland:	__PGI
 		gnu:		__GNUC__
 		sun:		__SUNPRO_C,__SUNPRO_CC,__SUNPRO_F90,__SUNPRO_F95
diff --git a/m4/ax_cray.m4 b/m4/ax_cray.m4
index 8fcc747904f..612d4e00cb6 100644
--- a/m4/ax_cray.m4
+++ b/m4/ax_cray.m4
@@ -195,8 +195,12 @@ AC_DEFUN([AX_CRAY_ROCM],[
 	    hip_bcknd="1"
 	  else
 	    AC_MSG_RESULT([no])
-            AC_MSG_ERROR([Cray ROCm Toolkit not found])
-	    have_hip="no"
+            if test "${ROCM_PATH}"; then
+              AX_HIP
+            else
+              AC_MSG_ERROR([Cray ROCm Toolkit not found])
+              have_hip="no"
+            fi
 	  fi
 	fi
 	AC_SUBST(hip_bcknd)
diff --git a/makeneko.in b/makeneko.in
index 2d9b88067c5..a9af13d260a 100644
--- a/makeneko.in
+++ b/makeneko.in
@@ -22,7 +22,7 @@ cat >> usr_driver.f90 << _ACEOF
 program usrneko
   use neko
   use user
-  type(case_t) :: C
+  type(case_t), target :: C
   
   call user_setup(C%usr)
   call neko_init(C)
@@ -33,7 +33,7 @@ program usrneko
 end program usrneko
 _ACEOF
 
-$FC $FCFLAGS -I$includedir_pkg -L$libdir $1 usr_driver.f90\
+$FC $FCFLAGS -I$includedir_pkg -L$libdir $@ usr_driver.f90\
     -lneko @LDFLAGS@ @LIBS@ -o neko
 
 rm -f usr_driver.f90
diff --git a/patches/nvhpc_bge.patch b/patches/nvhpc_bge.patch
index 8fb242baf61..3deed99186b 100644
--- a/patches/nvhpc_bge.patch
+++ b/patches/nvhpc_bge.patch
@@ -3,9 +3,9 @@ index 0793c48a0..1419e9378 100644
 --- a/src/common/signal.f90
 +++ b/src/common/signal.f90
 @@ -99,11 +99,11 @@ contains
- 
+
      usr12 = sighdl_usr()
-     
+
 -    if (bge(usr12, usr)) then
 -       raised = .true.
 -    else
@@ -16,6 +16,6 @@ index 0793c48a0..1419e9378 100644
 +  !  else
 +  !     raised = .false.
 +  !  end if
-     
+
    end function signal_usr
-   
+
diff --git a/reframe/src/rayleigh.case.template b/reframe/src/rayleigh.case.template
index d6c002118ba..d7d6ddd51a3 100644
--- a/reframe/src/rayleigh.case.template
+++ b/reframe/src/rayleigh.case.template
@@ -14,14 +14,15 @@
     "fluid": {
         "scheme": "pnpn",
         "Ra": 1750,
-        "source_terms": 
+        "source_terms":
         [
             {
                 "type": "user_vector"
             }
         ],
         "initial_condition": {
-            "type": "user"
+            "type": "uniform",
+            "value": [0,0,0]
         },
         "velocity_solver": {
             "type": "cg",
@@ -51,6 +52,9 @@
     "scalar": {
         "enabled": true,
         "Pr": 0.71,
+        "initial_condition": {
+            "type": "user"
+        },
         "boundary_types": [
             "",
             "",
diff --git a/reframe/src/rayleigh.f90 b/reframe/src/rayleigh.f90
index 7e846a5e69a..1b1bc7db137 100644
--- a/reframe/src/rayleigh.f90
+++ b/reframe/src/rayleigh.f90
@@ -12,8 +12,8 @@ module user
   ! Register user defined functions (see user_intf.f90)
   subroutine user_setup(u)
     type(user_t), intent(inout) :: u
-    u%fluid_user_ic => set_ic
     u%fluid_user_f_vector => forcing
+    u%scalar_user_ic => set_ic
     u%scalar_user_bc => scalar_bc
     u%material_properties => set_material_properties
   end subroutine user_setup
@@ -29,7 +29,7 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params)
     call json_get(params, "case.scalar.Pr", Pr)
 
     Re = 1.0_rp / Pr
-    
+
     mu = 1.0_rp / Re
     lambda = mu / Pr
     rho = 1.0_rp
@@ -50,27 +50,18 @@ subroutine scalar_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
     integer, intent(in) :: ie
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
-    ! If we set scalar_bcs(*) = 'user' instead 
+    ! If we set scalar_bcs(*) = 'user' instead
     ! this will be used instead on that zone
     s = 1.0_rp-z
   end subroutine scalar_bc
- 
-  !> User initial condition
-  subroutine set_ic(u, v, w, p, params)
-    type(field_t), intent(inout) :: u
-    type(field_t), intent(inout) :: v
-    type(field_t), intent(inout) :: w
-    type(field_t), intent(inout) :: p
+
+  !> User initial condition for the scalar field
+  subroutine set_ic(s, params)
+    type(field_t), intent(inout) :: s
     type(json_file), intent(inout) :: params
-    type(field_t), pointer :: s
     integer :: i, e, k, j
     real(kind=rp) :: rand, z
-    s => neko_field_registry%get_field('s')
 
-    call rzero(u%x,u%dof%size())
-    call rzero(v%x,v%dof%size())
-    call rzero(w%x,w%dof%size())
-    
     do i = 1, s%dof%size()
        s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1)
     end do
@@ -96,15 +87,14 @@ subroutine set_ic(u, v, w, p, params)
 
     if ((NEKO_BCKND_CUDA .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) &
        .or. (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE)
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
-
   end subroutine set_ic
 
 
 
-
   !> Forcing
   subroutine forcing(f, t)
     class(fluid_user_source_term_t), intent(inout) :: f
diff --git a/src/.depends b/src/.depends
index ce1e11eaecd..c96c2625ed2 100644
--- a/src/.depends
+++ b/src/.depends
@@ -15,18 +15,20 @@ common/json_utils.o : common/json_utils.f90 common/utils.o config/num_types.o
 common/system.o : common/system.f90 
 math/mxm_wrapper.o : math/mxm_wrapper.F90 common/utils.o config/num_types.o 
 sem/speclib.o : sem/speclib.f90 common/utils.o config/num_types.o 
+qoi/drag_torque.o : qoi/drag_torque.f90 math/operators.o config/num_types.o sem/space.o math/math.o comm/comm.o mesh/facet_zone.o mesh/mesh.o sem/coef.o field/field.o 
 sem/local_interpolation.o : sem/local_interpolation.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o field/field_list.o field/field.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o 
 math/math.o : math/math.f90 comm/comm.o config/num_types.o 
 math/mathops.o : math/mathops.f90 config/num_types.o 
 math/fast3d.o : math/fast3d.f90 math/math.o sem/speclib.o config/num_types.o 
 sem/space.o : sem/space.f90 math/mxm_wrapper.o math/tensor.o math/math.o math/fast3d.o common/utils.o device/device.o sem/speclib.o config/num_types.o config/neko_config.o 
+sem/map_1d.o : sem/map_1d.f90 comm/mpi_types.o math/math.o common/utils.o common/log.o comm/comm.o device/device.o mesh/mesh.o gs/gather_scatter.o sem/dofmap.o sem/space.o config/num_types.o 
 sem/dofmap.o : sem/dofmap.f90 mesh/hex.o mesh/quad.o mesh/element.o math/math.o device/device.o math/tensor.o math/fast3d.o common/utils.o config/num_types.o adt/tuple.o sem/space.o mesh/mesh.o config/neko_config.o 
 sem/coef.o : sem/coef.f90 device/device.o math/mxm_wrapper.o sem/bcknd/device/device_coef.o math/bcknd/device/device_math.o mesh/mesh.o math/math.o sem/space.o sem/dofmap.o config/num_types.o config/neko_config.o gs/gs_ops.o gs/gather_scatter.o 
 sem/cpr.o : sem/cpr.f90 sem/dofmap.o common/log.o math/mxm_wrapper.o math/tensor.o sem/coef.o mesh/mesh.o math/math.o sem/space.o field/field.o config/num_types.o config/neko_config.o gs/gather_scatter.o 
-sem/spectral_error_indicator.o : sem/spectral_error_indicator.f90 common/utils.o comm/comm.o device/device.o config/neko_config.o gs/gather_scatter.o math/bcknd/device/device_math.o math/tensor.o io/file.o math/math.o field/field_list.o sem/coef.o field/field.o common/log.o config/num_types.o 
+sem/spectral_error_indicator.o : sem/spectral_error_indicator.f90 common/utils.o comm/comm.o device/device.o config/neko_config.o gs/gather_scatter.o math/bcknd/device/device_math.o math/tensor.o io/file.o math/math.o field/field_list.o sem/coef.o field/field.o config/num_types.o 
 common/time_interpolator.o : common/time_interpolator.f90 common/utils.o math/math.o math/bcknd/device/device_math.o config/neko_config.o field/field.o config/num_types.o 
-sem/interpolation.o : sem/interpolation.f90 mesh/point.o sem/coef.o math/mxm_wrapper.o sem/space.o math/bcknd/cpu/tensor_cpu.o math/tensor.o field/field.o math/fast3d.o device/device.o config/num_types.o config/neko_config.o 
-sem/point_interpolator.o : sem/point_interpolator.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o field/field_list.o field/field.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o 
+sem/interpolation.o : sem/interpolation.f90 sem/space.o math/bcknd/cpu/tensor_cpu.o math/tensor.o math/fast3d.o device/device.o config/num_types.o config/neko_config.o 
+sem/point_interpolator.o : sem/point_interpolator.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o 
 sem/bcknd/device/device_coef.o : sem/bcknd/device/device_coef.F90 common/utils.o config/num_types.o 
 gs/gs_bcknd.o : gs/gs_bcknd.f90 config/num_types.o 
 gs/bcknd/cpu/gs_cpu.o : gs/bcknd/cpu/gs_cpu.f90 gs/gs_ops.o gs/gs_bcknd.o config/num_types.o 
@@ -40,7 +42,7 @@ gs/gather_scatter.o : gs/gather_scatter.f90 device/device.o common/profiler.o co
 mesh/entity.o : mesh/entity.f90 
 mesh/point.o : mesh/point.f90 mesh/entity.o math/math.o config/num_types.o 
 mesh/element.o : mesh/element.f90 mesh/point.o adt/tuple.o mesh/entity.o config/num_types.o 
-math/ax.o : math/ax.f90 mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o 
+math/ax.o : math/ax.f90 mesh/mesh.o sem/space.o sem/coef.o config/num_types.o 
 mesh/quad.o : mesh/quad.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o 
 mesh/hex.o : mesh/hex.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o 
 mesh/tet.o : mesh/tet.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o 
@@ -53,14 +55,17 @@ mesh/facet_zone.o : mesh/facet_zone.f90 common/utils.o adt/stack.o adt/tuple.o
 mesh/point_zone.o : mesh/point_zone.f90 device/device.o config/neko_config.o sem/dofmap.o common/utils.o config/num_types.o adt/stack.o 
 mesh/point_zones/sphere_point_zone.o : mesh/point_zones/sphere_point_zone.f90 math/math.o common/json_utils.o config/num_types.o mesh/point_zone.o 
 mesh/point_zones/box_point_zone.o : mesh/point_zones/box_point_zone.f90 math/math.o common/json_utils.o config/num_types.o mesh/point_zone.o 
-mesh/point_zone_factory.o : mesh/point_zone_factory.f90 common/utils.o sem/dofmap.o common/json_utils.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o 
-mesh/point_zone_registry.o : mesh/point_zone_registry.f90 common/json_utils.o common/utils.o sem/dofmap.o mesh/point_zone_factory.o mesh/point_zone.o 
+mesh/point_zones/cylinder_point_zone.o : mesh/point_zones/cylinder_point_zone.f90 common/utils.o common/json_utils.o config/num_types.o mesh/point_zone.o 
+mesh/point_zone_fctry.o : mesh/point_zone_fctry.f90 common/utils.o sem/dofmap.o common/json_utils.o mesh/point_zones/cylinder_point_zone.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o 
+mesh/point_zone_registry.o : mesh/point_zone_registry.f90 common/json_utils.o common/utils.o sem/space.o mesh/mesh.o sem/dofmap.o mesh/point_zone_fctry.o mesh/point_zone.o 
 mesh/mesh.o : mesh/mesh.f90 mesh/curve.o adt/uset.o math/math.o mesh/facet_zone.o comm/comm.o common/distdata.o common/datadist.o adt/htable.o adt/tuple.o adt/stack.o common/utils.o mesh/quad.o mesh/hex.o mesh/element.o mesh/point.o config/num_types.o 
 mesh/octree.o : mesh/octree.f90 common/utils.o mesh/point.o config/num_types.o 
+mesh/search_tree/aabb.o : mesh/search_tree/aabb.f90 common/utils.o mesh/tet_mesh.o mesh/tri_mesh.o mesh/mesh.o mesh/hex.o mesh/tet.o mesh/quad.o mesh/tri.o mesh/point.o mesh/element.o config/num_types.o 
+mesh/aabb_tree.o : mesh/aabb_tree.f90 adt/stack.o common/utils.o config/num_types.o mesh/tri.o mesh/search_tree/aabb.o 
 mesh/tet_mesh.o : mesh/tet_mesh.f90 common/utils.o mesh/point.o mesh/tet.o mesh/mesh.o 
 mesh/tri_mesh.o : mesh/tri_mesh.f90 mesh/point.o mesh/tri.o 
 field/field_registry.o : field/field_registry.f90 adt/htable.o common/utils.o sem/dofmap.o field/field.o 
-field/scratch_registry.o : field/scratch_registry.f90 common/utils.o sem/dofmap.o field/field.o config/num_types.o 
+field/scratch_registry.o : field/scratch_registry.f90 sem/dofmap.o field/field.o 
 field/field.o : field/field.f90 sem/dofmap.o sem/space.o mesh/mesh.o math/math.o device/device.o config/num_types.o math/bcknd/device/device_math.o config/neko_config.o 
 field/field_list.o : field/field_list.f90 field/field.o 
 field/field_series.o : field/field_series.f90 field/field.o config/num_types.o 
@@ -77,9 +82,10 @@ math/bcknd/sx/sx_conv1.o : math/bcknd/sx/sx_conv1.f90 config/num_types.o
 math/bcknd/sx/sx_dudxyz.o : math/bcknd/sx/sx_dudxyz.f90 math/math.o config/num_types.o 
 math/bcknd/sx/sx_opgrad.o : math/bcknd/sx/sx_opgrad.f90 config/num_types.o 
 math/bcknd/sx/sx_cfl.o : math/bcknd/sx/sx_cfl.f90 config/num_types.o 
-math/operators.o : math/operators.f90 math/bcknd/device/device_math.o device/device.o comm/comm.o math/math.o field/field.o sem/coef.o sem/space.o math/bcknd/device/opr_device.o math/bcknd/xsmm/opr_xsmm.o math/bcknd/sx/opr_sx.o math/bcknd/cpu/opr_cpu.o config/num_types.o config/neko_config.o 
-math/bcknd/cpu/opr_cpu.o : math/bcknd/cpu/opr_cpu.f90 math/mathops.o gs/gather_scatter.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o config/num_types.o math/bcknd/cpu/conv1.o math/bcknd/cpu/cdtp.o math/bcknd/cpu/opgrad.o math/bcknd/cpu/dudxyz.o 
-math/bcknd/sx/opr_sx.o : math/bcknd/sx/opr_sx.f90 math/mathops.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o config/num_types.o gs/gather_scatter.o math/bcknd/sx/sx_cfl.o math/bcknd/sx/sx_cdtp.o math/bcknd/sx/sx_conv1.o math/bcknd/sx/sx_opgrad.o math/bcknd/sx/sx_dudxyz.o 
+math/bcknd/sx/sx_lambda2.o : math/bcknd/sx/sx_lambda2.f90 math/math.o config/num_types.o 
+math/operators.o : math/operators.f90 comm/comm.o math/bcknd/device/device_math.o device/device.o math/math.o field/field.o sem/coef.o sem/space.o math/bcknd/device/opr_device.o math/bcknd/xsmm/opr_xsmm.o math/bcknd/sx/opr_sx.o math/bcknd/cpu/opr_cpu.o config/num_types.o config/neko_config.o 
+math/bcknd/cpu/opr_cpu.o : math/bcknd/cpu/opr_cpu.f90 math/mathops.o gs/gather_scatter.o field/field.o math/math.o sem/coef.o sem/space.o config/num_types.o math/bcknd/cpu/conv1.o math/bcknd/cpu/cdtp.o math/bcknd/cpu/opgrad.o math/bcknd/cpu/dudxyz.o 
+math/bcknd/sx/opr_sx.o : math/bcknd/sx/opr_sx.f90 math/mathops.o field/field.o math/math.o sem/coef.o sem/space.o config/num_types.o gs/gather_scatter.o math/bcknd/sx/sx_lambda2.o math/bcknd/sx/sx_cfl.o math/bcknd/sx/sx_cdtp.o math/bcknd/sx/sx_conv1.o math/bcknd/sx/sx_opgrad.o math/bcknd/sx/sx_dudxyz.o 
 math/bcknd/xsmm/opr_xsmm.o : math/bcknd/xsmm/opr_xsmm.F90 math/mathops.o gs/gather_scatter.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o math/mxm_wrapper.o config/num_types.o 
 math/bcknd/device/opr_device.o : math/bcknd/device/opr_device.F90 comm/comm.o common/utils.o field/field.o mesh/mesh.o sem/coef.o sem/space.o device/device.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o config/num_types.o gs/gather_scatter.o 
 math/tensor.o : math/tensor.f90 device/device.o config/neko_config.o math/mxm_wrapper.o config/num_types.o math/bcknd/device/tensor_device.o math/bcknd/sx/tensor_sx.o math/bcknd/cpu/tensor_cpu.o math/bcknd/xsmm/tensor_xsmm.o 
@@ -90,49 +96,54 @@ math/fdm.o : math/fdm.f90 comm/comm.o common/utils.o device/device.o math/bcknd/
 math/bcknd/cpu/fdm_cpu.o : math/bcknd/cpu/fdm_cpu.f90 math/bcknd/cpu/tensor_cpu.o config/num_types.o 
 math/bcknd/sx/fdm_sx.o : math/bcknd/sx/fdm_sx.f90 math/bcknd/sx/tensor_sx.o config/num_types.o 
 math/bcknd/xsmm/fdm_xsmm.o : math/bcknd/xsmm/fdm_xsmm.f90 math/bcknd/xsmm/tensor_xsmm.o config/num_types.o 
-math/schwarz.o : math/schwarz.f90 config/neko_config.o device/device.o math/fdm.o math/bcknd/device/device_math.o math/bcknd/device/device_schwarz.o gs/gather_scatter.o bc/dirichlet.o bc/bc.o sem/dofmap.o sem/space.o mesh/mesh.o math/math.o config/num_types.o 
+math/schwarz.o : math/schwarz.f90 config/neko_config.o device/device.o math/fdm.o math/bcknd/device/device_math.o math/bcknd/device/device_schwarz.o gs/gather_scatter.o bc/bc.o sem/dofmap.o sem/space.o mesh/mesh.o math/math.o config/num_types.o 
 math/vector.o : math/vector.f90 common/utils.o math/bcknd/device/device_math.o device/device.o config/num_types.o config/neko_config.o 
 math/matrix.o : math/matrix.f90 common/utils.o math/bcknd/device/device_math.o device/device.o config/num_types.o config/neko_config.o 
+math/signed_distance.o : math/signed_distance.f90 adt/stack.o mesh/search_tree/aabb.o mesh/point.o common/utils.o mesh/aabb_tree.o mesh/tri_mesh.o mesh/tri.o field/field.o config/num_types.o 
 common/checkpoint.o : common/checkpoint.f90 mesh/mesh.o common/utils.o field/field.o device/device.o sem/space.o field/field_series.o config/num_types.o config/neko_config.o 
-io/generic_file.o : io/generic_file.f90 config/num_types.o 
+io/generic_file.o : io/generic_file.f90 comm/comm.o common/utils.o config/num_types.o 
 io/map_file.o : io/map_file.f90 io/format/map.o comm/comm.o common/utils.o io/generic_file.o 
 io/re2_file.o : io/re2_file.f90 common/log.o adt/htable.o io/map_file.o io/format/map.o io/format/re2.o common/datadist.o comm/mpi_types.o comm/comm.o mesh/point.o mesh/mesh.o common/utils.o config/num_types.o io/generic_file.o 
 io/rea_file.o : io/rea_file.f90 common/log.o adt/htable.o common/datadist.o comm/comm.o io/map_file.o io/re2_file.o io/format/rea.o io/format/map.o mesh/point.o mesh/mesh.o common/utils.o config/num_types.o io/generic_file.o 
-io/fld_file.o : io/fld_file.f90 comm/mpi_types.o common/datadist.o comm/comm.o common/utils.o mesh/mesh.o fluid/mean_sqr_flow.o fluid/mean_flow.o io/fld_file_data.o math/vector.o common/structs.o sem/space.o sem/dofmap.o field/field_list.o field/field.o io/generic_file.o 
+io/fld_file.o : io/fld_file.f90 comm/mpi_types.o math/math.o common/datadist.o comm/comm.o common/utils.o mesh/mesh.o fluid/mean_sqr_flow.o fluid/mean_flow.o io/fld_file_data.o math/vector.o common/structs.o sem/space.o sem/dofmap.o field/field_list.o field/field.o io/generic_file.o 
 io/fld_file_data.o : io/fld_file_data.f90 math/vector.o math/math.o config/num_types.o 
 io/vtk_file.o : io/vtk_file.f90 comm/comm.o common/log.o mesh/tri_mesh.o mesh/tet_mesh.o field/mesh_field.o sem/dofmap.o field/field.o mesh/mesh.o common/utils.o io/generic_file.o config/num_types.o 
-io/stl_file.o : io/stl_file.f90 io/format/stl.o comm/comm.o comm/mpi_types.o mesh/point.o common/log.o mesh/tri_mesh.o io/generic_file.o config/num_types.o 
+io/stl_file.o : io/stl_file.f90 io/format/stl.o comm/comm.o common/utils.o comm/mpi_types.o mesh/point.o common/log.o mesh/tri_mesh.o io/generic_file.o config/num_types.o 
 io/nmsh_file.o : io/nmsh_file.f90 common/log.o comm/mpi_types.o common/datadist.o mesh/element.o io/format/nmsh.o adt/tuple.o mesh/point.o common/utils.o mesh/mesh.o comm/comm.o io/generic_file.o 
-io/chkp_file.o : io/chkp_file.f90 common/global_interpolation.o comm/comm.o comm/mpi_types.o sem/interpolation.o math/math.o mesh/mesh.o sem/space.o common/utils.o sem/dofmap.o field/field.o config/num_types.o common/checkpoint.o field/field_series.o io/generic_file.o 
+io/chkp_file.o : io/chkp_file.f90 common/global_interpolation.o comm/comm.o comm/mpi_types.o sem/interpolation.o math/math.o mesh/mesh.o sem/space.o common/utils.o math/bcknd/device/device_math.o sem/dofmap.o field/field.o config/num_types.o common/checkpoint.o field/field_series.o io/generic_file.o 
 io/csv_file.o : io/csv_file.f90 comm/comm.o common/log.o config/num_types.o common/utils.o io/generic_file.o math/matrix.o math/vector.o 
-io/file.o : io/file.f90 io/csv_file.o io/stl_file.o io/vtk_file.o io/fld_file_data.o io/fld_file.o io/re2_file.o io/rea_file.o io/map_file.o io/chkp_file.o io/nmsh_file.o io/generic_file.o common/utils.o 
+io/file.o : io/file.f90 io/csv_file.o io/stl_file.o io/vtk_file.o io/fld_file_data.o io/fld_file.o io/re2_file.o io/rea_file.o io/map_file.o io/chkp_file.o io/nmsh_file.o io/generic_file.o config/num_types.o common/utils.o 
 io/output.o : io/output.f90 io/file.o config/num_types.o 
-io/fluid_output.o : io/fluid_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o scalar/scalar_scheme.o fluid/fluid_scheme.o 
+io/fluid_output.o : io/fluid_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o scalar/scalar_scheme.o fluid/fluid_scheme.o config/num_types.o 
+io/fld_file_output.o : io/fld_file_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o config/num_types.o 
 io/chkp_output.o : io/chkp_output.f90 config/num_types.o io/output.o common/checkpoint.o 
 io/mean_flow_output.o : io/mean_flow_output.f90 io/output.o device/device.o config/num_types.o fluid/mean_flow.o 
 io/fluid_stats_output.o : io/fluid_stats_output.f90 io/output.o device/device.o config/num_types.o config/neko_config.o fluid/fluid_stats.o 
 io/mean_sqr_flow_output.o : io/mean_sqr_flow_output.f90 io/output.o config/num_types.o fluid/mean_sqr_flow.o 
 io/data_streamer.o : io/data_streamer.F90 config/neko_config.o comm/mpi_types.o comm/comm.o device/device.o common/utils.o sem/coef.o field/field.o config/num_types.o 
-common/sampler.o : common/sampler.f90 common/time_based_controller.o config/num_types.o common/profiler.o common/utils.o common/log.o comm/comm.o io/output.o 
+common/sampler.o : common/sampler.f90 common/time_based_controller.o config/num_types.o common/profiler.o common/utils.o common/log.o comm/comm.o io/fld_file.o io/output.o 
 common/global_interpolation.o : common/global_interpolation.F90 comm/mpi_types.o math/math.o comm/comm.o sem/local_interpolation.o common/utils.o common/log.o mesh/mesh.o sem/dofmap.o sem/space.o config/num_types.o 
 common/profiler.o : common/profiler.F90 common/craypat.o device/hip/roctx.o device/cuda/nvtx.o device/device.o config/neko_config.o 
 common/craypat.o : common/craypat.F90 common/utils.o adt/stack.o 
-bc/bc.o : bc/bc.f90 common/utils.o adt/tuple.o adt/stack.o mesh/facet_zone.o mesh/mesh.o sem/space.o sem/dofmap.o device/device.o config/num_types.o config/neko_config.o 
+bc/bc.o : bc/bc.f90 common/utils.o adt/tuple.o adt/stack.o mesh/facet_zone.o mesh/mesh.o sem/space.o sem/coef.o sem/dofmap.o device/device.o config/num_types.o config/neko_config.o 
 bc/dirichlet.o : bc/dirichlet.f90 bc/bc.o config/num_types.o bc/bcknd/device/device_dirichlet.o 
-bc/dong_outflow.o : bc/dong_outflow.f90 bc/bcknd/device/device_dong_outflow.o common/utils.o sem/coef.o sem/dofmap.o field/field.o bc/bc.o config/num_types.o device/device.o bc/dirichlet.o config/neko_config.o 
+bc/neumann.o : bc/neumann.f90 sem/coef.o common/utils.o bc/bc.o config/num_types.o 
+bc/dong_outflow.o : bc/dong_outflow.f90 field/field_registry.o bc/bcknd/device/device_dong_outflow.o common/utils.o sem/coef.o sem/dofmap.o field/field.o bc/bc.o config/num_types.o device/device.o bc/dirichlet.o config/neko_config.o 
 bc/wall.o : bc/wall.f90 bc/dirichlet.o config/num_types.o bc/bcknd/device/device_wall.o 
 bc/inflow.o : bc/inflow.f90 bc/dirichlet.o config/num_types.o bc/bcknd/device/device_inflow.o 
+bc/field_dirichlet.o : bc/field_dirichlet.f90 sem/dofmap.o math/bcknd/device/device_math.o math/math.o field/field_list.o field/field.o common/utils.o device/device.o bc/bc.o bc/dirichlet.o sem/coef.o config/num_types.o 
+bc/field_dirichlet_vector.o : bc/field_dirichlet_vector.f90 bc/field_dirichlet.o sem/dofmap.o math/bcknd/device/device_math.o math/math.o field/field_list.o field/field.o common/utils.o device/device.o bc/bc.o bc/dirichlet.o sem/coef.o config/num_types.o 
 bc/usr_inflow.o : bc/usr_inflow.f90 common/utils.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/inflow.o sem/coef.o config/num_types.o 
 bc/usr_scalar.o : bc/usr_scalar.f90 common/utils.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/dirichlet.o sem/coef.o config/num_types.o 
 bc/facet_normal.o : bc/facet_normal.f90 common/utils.o bc/dirichlet.o sem/coef.o math/math.o config/num_types.o bc/bcknd/device/device_facet_normal.o 
-bc/symmetry.o : bc/symmetry.f90 adt/tuple.o adt/stack.o common/utils.o math/math.o sem/coef.o bc/bc.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/bcknd/device/device_symmetry.o 
+bc/symmetry.o : bc/symmetry.f90 sem/coef.o adt/tuple.o adt/stack.o common/utils.o math/math.o bc/bc.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/bcknd/device/device_symmetry.o 
 bc/non_normal.o : bc/non_normal.f90 adt/stack.o common/utils.o math/math.o sem/coef.o device/device.o adt/tuple.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/symmetry.o 
 bc/blasius.o : bc/blasius.f90 fluid/flow_profile.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/inflow.o common/utils.o sem/coef.o config/num_types.o 
 krylov/precon.o : krylov/precon.f90 config/num_types.o 
 krylov/krylov.o : krylov/krylov.f90 config/neko_config.o krylov/bcknd/device/pc_identity_device.o krylov/pc_identity.o bc/bc.o common/utils.o field/field.o mesh/mesh.o sem/coef.o krylov/precon.o config/num_types.o math/ax.o gs/gather_scatter.o 
 krylov/pc_identity.o : krylov/pc_identity.f90 config/num_types.o krylov/precon.o math/math.o 
 krylov/precon_fctry.o : krylov/precon_fctry.f90 config/neko_config.o common/utils.o krylov/pc_hsmg.o krylov/bcknd/device/pc_jacobi_device.o krylov/bcknd/sx/pc_jacobi_sx.o krylov/bcknd/cpu/pc_jacobi.o krylov/bcknd/device/pc_identity_device.o krylov/pc_identity.o krylov/precon.o 
-krylov/krylov_fctry.o : krylov/krylov_fctry.f90 config/neko_config.o common/utils.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/device/gmres_device.o krylov/bcknd/sx/gmres_sx.o krylov/bcknd/cpu/gmres.o krylov/bcknd/cpu/bicgstab.o krylov/bcknd/device/pipecg_device.o krylov/bcknd/sx/pipecg_sx.o krylov/bcknd/cpu/pipecg.o krylov/bcknd/cpu/cacg.o krylov/bcknd/device/cg_device.o krylov/bcknd/sx/cg_sx.o krylov/bcknd/cpu/cg.o 
+krylov/krylov_fctry.o : krylov/krylov_fctry.f90 config/neko_config.o common/utils.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/device/gmres_device.o krylov/bcknd/sx/gmres_sx.o krylov/bcknd/cpu/gmres.o krylov/bcknd/cpu/bicgstab.o krylov/bcknd/device/fusedcg_device.o krylov/bcknd/device/pipecg_device.o krylov/bcknd/sx/pipecg_sx.o krylov/bcknd/cpu/pipecg.o krylov/bcknd/cpu/cacg.o krylov/bcknd/device/cg_device.o krylov/bcknd/sx/cg_sx.o krylov/bcknd/cpu/cg.o 
 krylov/bcknd/cpu/cg.o : krylov/bcknd/cpu/cg.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o 
 krylov/bcknd/cpu/cacg.o : krylov/bcknd/cpu/cacg.f90 math/mxm_wrapper.o comm/comm.o common/utils.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o 
 krylov/bcknd/cpu/pipecg.o : krylov/bcknd/cpu/pipecg.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
@@ -143,12 +154,13 @@ krylov/bcknd/sx/cg_sx.o : krylov/bcknd/sx/cg_sx.f90 math/math.o bc/bc.o gs/gathe
 krylov/bcknd/sx/pipecg_sx.o : krylov/bcknd/sx/pipecg_sx.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
 krylov/bcknd/sx/gmres_sx.o : krylov/bcknd/sx/gmres_sx.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
 krylov/bcknd/sx/pc_jacobi_sx.o : krylov/bcknd/sx/pc_jacobi_sx.f90 gs/gather_scatter.o config/num_types.o sem/dofmap.o sem/coef.o krylov/precon.o math/math.o 
-krylov/bcknd/device/cg_device.o : krylov/bcknd/device/cg_device.f90 math/bcknd/device/device_math.o device/device.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o 
+krylov/bcknd/device/cg_device.o : krylov/bcknd/device/cg_device.f90 math/bcknd/device/device_math.o device/device.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o 
 krylov/bcknd/device/pipecg_device.o : krylov/bcknd/device/pipecg_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
+krylov/bcknd/device/fusedcg_device.o : krylov/bcknd/device/fusedcg_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
 krylov/bcknd/device/gmres_device.o : krylov/bcknd/device/gmres_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o krylov/bcknd/device/pc_identity_device.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o 
 krylov/bcknd/device/pc_jacobi_device.o : krylov/bcknd/device/pc_jacobi_device.F90 gs/gather_scatter.o device/device.o math/bcknd/device/device_math.o config/num_types.o sem/dofmap.o sem/coef.o krylov/precon.o 
 krylov/bcknd/device/pc_identity_device.o : krylov/bcknd/device/pc_identity_device.f90 config/num_types.o krylov/precon.o math/bcknd/device/device_math.o device/device.o 
-time_schemes/time_scheme.o : time_schemes/time_scheme.f90 common/utils.o config/num_types.o config/neko_config.o 
+time_schemes/time_scheme.o : time_schemes/time_scheme.f90 config/num_types.o config/neko_config.o 
 time_schemes/bdf_time_scheme.o : time_schemes/bdf_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o 
 time_schemes/ext_time_scheme.o : time_schemes/ext_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o 
 time_schemes/ab_time_scheme.o : time_schemes/ab_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o 
@@ -158,20 +170,21 @@ common/stats_quant.o : common/stats_quant.f90 config/num_types.o
 common/statistics.o : common/statistics.f90 comm/comm.o common/log.o common/stats_quant.o config/num_types.o 
 common/rhs_maker.o : common/rhs_maker.f90 field/field.o field/field_series.o config/num_types.o 
 common/rhs_maker_fctry.o : common/rhs_maker_fctry.f90 config/neko_config.o common/bcknd/device/rhs_maker_device.o common/bcknd/sx/rhs_maker_sx.o common/bcknd/cpu/rhs_maker_cpu.o common/rhs_maker.o 
-simulation_components/probes.o : simulation_components/probes.F90 case.o io/csv_file.o io/file.o device/device.o comm/comm.o math/tensor.o math/math.o common/global_interpolation.o common/json_utils.o sem/dofmap.o field/field_registry.o simulation_components/simulation_component.o field/field_list.o field/field.o common/utils.o common/log.o math/matrix.o config/num_types.o 
+simulation_components/probes.o : simulation_components/probes.F90 case.o io/csv_file.o io/file.o device/device.o comm/comm.o math/tensor.o common/global_interpolation.o common/json_utils.o sem/dofmap.o field/field_registry.o simulation_components/simulation_component.o field/field_list.o common/utils.o common/log.o math/matrix.o config/num_types.o 
+simulation_components/field_writer.o : simulation_components/field_writer.f90 common/json_utils.o io/fld_file_output.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
 common/bcknd/cpu/rhs_maker_cpu.o : common/bcknd/cpu/rhs_maker_cpu.f90 field/scratch_registry.o config/num_types.o field/field.o field/field_series.o common/rhs_maker.o 
 common/bcknd/sx/rhs_maker_sx.o : common/bcknd/sx/rhs_maker_sx.f90 field/scratch_registry.o config/num_types.o field/field.o field/field_series.o common/rhs_maker.o 
 common/bcknd/device/rhs_maker_device.o : common/bcknd/device/rhs_maker_device.F90 config/num_types.o field/field.o field/field_series.o common/utils.o device/device.o common/rhs_maker.o 
 common/material_properties.o : common/material_properties.f90 comm/comm.o common/utils.o common/user_intf.o common/log.o common/json_utils.o config/num_types.o 
 config/neko_config.o : config/neko_config.f90 
-case.o : case.f90 common/material_properties.o mesh/point_zone_registry.o field/scratch_registry.o common/json_utils.o scalar/scalar_pnpn.o common/user_intf.o common/jobctrl.o common/log.o time_schemes/time_scheme_controller.o comm/comm.o mesh/mesh.o common/utils.o io/file.o common/statistics.o fluid/flow_ic.o common/sampler.o comm/redist.o comm/parmetis.o field/mesh_field.o io/fluid_stats_output.o io/mean_flow_output.o io/mean_sqr_flow_output.o io/chkp_output.o io/fluid_output.o fluid/fluid_fctry.o config/num_types.o 
-common/user_intf.o : common/user_intf.f90 config/num_types.o bc/usr_scalar.o bc/usr_inflow.o mesh/mesh.o sem/coef.o scalar/source_scalar.o fluid/fluid_user_source_term.o field/field.o 
-fluid/fluid_scheme.o : fluid/fluid_scheme.f90 common/material_properties.o common/utils.o common/user_intf.o source_terms/const_source_term.o source_terms/source_term_factory.o source_terms/source_term.o field/scratch_registry.o common/json_utils.o field/field_registry.o common/log.o math/operators.o math/mathops.o time_schemes/time_scheme_controller.o math/math.o mesh/mesh.o bc/bc.o fluid/fluid_stats.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/non_normal.o bc/symmetry.o bc/dong_outflow.o bc/dirichlet.o bc/blasius.o bc/usr_inflow.o bc/inflow.o bc/wall.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o field/field_list.o fluid/fluid_source_term.o fluid/fluid_user_source_term.o config/num_types.o fluid/mean_flow.o common/checkpoint.o config/neko_config.o fluid/mean_sqr_flow.o gs/gather_scatter.o 
+case.o : case.f90 common/material_properties.o mesh/point_zone_registry.o field/scratch_registry.o common/json_utils.o scalar/scalar_pnpn.o common/user_intf.o common/jobctrl.o common/log.o time_schemes/time_scheme_controller.o comm/comm.o mesh/mesh.o common/utils.o io/file.o common/statistics.o scalar/scalar_ic.o fluid/flow_ic.o common/sampler.o comm/redist.o comm/parmetis.o field/mesh_field.o io/fluid_stats_output.o io/mean_flow_output.o io/mean_sqr_flow_output.o io/chkp_output.o io/fluid_output.o fluid/fluid_fctry.o config/num_types.o 
+common/user_intf.o : common/user_intf.f90 common/utils.o config/num_types.o bc/field_dirichlet.o bc/usr_scalar.o bc/usr_inflow.o mesh/mesh.o bc/bc.o sem/coef.o scalar/scalar_user_source_term.o fluid/fluid_user_source_term.o field/field_list.o field/field.o 
+fluid/fluid_scheme.o : fluid/fluid_scheme.f90 common/time_step_controller.o field/field_series.o common/material_properties.o common/utils.o common/user_intf.o field/scratch_registry.o common/json_utils.o field/field_registry.o common/log.o math/operators.o math/mathops.o time_schemes/time_scheme_controller.o math/math.o mesh/mesh.o bc/bc.o fluid/fluid_stats.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/field_dirichlet_vector.o bc/field_dirichlet.o bc/non_normal.o bc/symmetry.o bc/dong_outflow.o bc/dirichlet.o bc/blasius.o bc/usr_inflow.o bc/inflow.o bc/wall.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o field/field_list.o fluid/fluid_source_term.o fluid/fluid_user_source_term.o comm/comm.o config/num_types.o fluid/mean_flow.o common/checkpoint.o config/neko_config.o fluid/mean_sqr_flow.o gs/gather_scatter.o 
 fluid/fluid_aux.o : fluid/fluid_aux.f90 krylov/krylov.o config/num_types.o common/log.o 
-fluid/fluid_pnpn.o : fluid/fluid_pnpn.f90 common/material_properties.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o common/projection.o time_schemes/time_scheme_controller.o fluid/fluid_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o bc/facet_normal.o field/field_series.o fluid/fluid_scheme.o fluid/fluid_volflow.o common/rhs_maker_fctry.o math/ax_helm_fctry.o fluid/pnpn_res_fctry.o 
+fluid/fluid_pnpn.o : fluid/fluid_pnpn.f90 bc/bc.o math/mathops.o math/math.o config/neko_config.o gs/gather_scatter.o common/time_step_controller.o sem/coef.o common/user_intf.o mesh/mesh.o bc/non_normal.o bc/facet_normal.o bc/dirichlet.o field/field.o math/ax.o fluid/advection_fctry.o common/material_properties.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o device/device.o common/projection.o time_schemes/time_scheme_controller.o fluid/fluid_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o field/field_series.o fluid/fluid_scheme.o fluid/fluid_volflow.o common/rhs_maker.o common/rhs_maker_fctry.o math/ax_helm_fctry.o fluid/pnpn_res.o fluid/pnpn_res_fctry.o krylov/krylov.o config/num_types.o 
 fluid/fluid_fctry.o : fluid/fluid_fctry.f90 config/neko_config.o common/utils.o fluid/fluid_pnpn.o fluid/fluid_scheme.o 
 fluid/fluid_volflow.o : fluid/fluid_volflow.f90 math/ax.o bc/bc.o field/scratch_registry.o common/json_utils.o gs/gather_scatter.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o config/neko_config.o comm/comm.o math/math.o time_schemes/time_scheme_controller.o sem/coef.o field/field.o sem/dofmap.o krylov/precon.o krylov/krylov.o math/mathops.o config/num_types.o math/operators.o 
-fluid/pnpn_res.o : fluid/pnpn_res.f90 field/scratch_registry.o config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o 
+fluid/pnpn_res.o : fluid/pnpn_res.f90 config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o 
 fluid/pnpn_res_fctry.o : fluid/pnpn_res_fctry.f90 fluid/bcknd/sx/pnpn_res_sx.o fluid/bcknd/cpu/pnpn_res_cpu.o fluid/bcknd/device/pnpn_res_device.o fluid/pnpn_res.o common/utils.o config/neko_config.o 
 fluid/mean_flow.o : fluid/mean_flow.f90 field/field.o field/mean_field.o 
 fluid/fluid_stats.o : fluid/fluid_stats.f90 common/utils.o config/neko_config.o device/device.o common/stats_quant.o gs/gather_scatter.o field/field_list.o field/field_registry.o field/field.o sem/coef.o math/operators.o math/math.o math/mathops.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o field/mean_field.o 
@@ -179,18 +192,20 @@ fluid/mean_sqr_flow.o : fluid/mean_sqr_flow.f90 field/field.o field/mean_sqr_fie
 fluid/flow_profile.o : fluid/flow_profile.f90 config/num_types.o 
 fluid/flow_ic.o : fluid/flow_ic.f90 common/json_utils.o common/user_intf.o math/math.o sem/coef.o common/utils.o field/field.o device/device.o math/bcknd/device/device_math.o fluid/flow_profile.o config/neko_config.o gs/gather_scatter.o 
 fluid/advection.o : fluid/advection.f90 device/device.o sem/interpolation.o math/operators.o config/neko_config.o math/bcknd/device/device_math.o sem/coef.o field/field.o sem/space.o common/utils.o math/math.o config/num_types.o 
+fluid/advection_fctry.o : fluid/advection_fctry.f90 common/json_utils.o device/device.o sem/coef.o fluid/advection.o config/num_types.o 
 fluid/bcknd/cpu/pnpn_res_cpu.o : fluid/bcknd/cpu/pnpn_res_cpu.f90 sem/space.o config/num_types.o mesh/mesh.o field/scratch_registry.o fluid/pnpn_res.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o 
 fluid/bcknd/sx/pnpn_res_sx.o : fluid/bcknd/sx/pnpn_res_sx.f90 sem/space.o config/num_types.o mesh/mesh.o field/scratch_registry.o fluid/pnpn_res.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o 
 fluid/bcknd/device/pnpn_res_device.o : fluid/bcknd/device/pnpn_res_device.F90 field/scratch_registry.o fluid/pnpn_res.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o sem/space.o config/num_types.o mesh/mesh.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o 
-fluid/fluid_user_source_term.o : fluid/fluid_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o field/field.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o 
-fluid/fluid_source_term.o : fluid/fluid_source_term.f90 common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_factory.o source_terms/source_term.o fluid/fluid_user_source_term.o config/num_types.o config/neko_config.o 
-simulation.o : simulation.f90 common/json_utils.o simulation_components/simulation_component_global.o common/profiler.o common/jobctrl.o common/log.o math/math.o io/file.o time_schemes/time_scheme_controller.o gs/gather_scatter.o case.o 
+fluid/fluid_user_source_term.o : fluid/fluid_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o 
+fluid/fluid_source_term.o : fluid/fluid_source_term.f90 common/utils.o common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_fctry.o source_terms/source_term.o fluid/fluid_user_source_term.o config/num_types.o config/neko_config.o 
+common/time_step_controller.o : common/time_step_controller.f90 common/json_utils.o common/log.o config/num_types.o 
+simulation.o : simulation.f90 common/time_step_controller.o common/json_utils.o simulation_components/simcomp_executor.o common/profiler.o field/field.o common/jobctrl.o math/bcknd/device/device_math.o device/device.o common/log.o math/math.o io/file.o time_schemes/time_scheme_controller.o gs/gather_scatter.o case.o 
 math/ax_helm_fctry.o : math/ax_helm_fctry.f90 math/bcknd/cpu/ax_helm.o math/bcknd/sx/ax_helm_sx.o math/bcknd/xsmm/ax_helm_xsmm.o math/bcknd/device/ax_helm_device.o math/ax.o config/neko_config.o 
-math/bcknd/cpu/ax_helm.o : math/bcknd/cpu/ax_helm.f90 math/math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
-math/bcknd/sx/ax_helm_sx.o : math/bcknd/sx/ax_helm_sx.f90 math/math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
-math/bcknd/xsmm/ax_helm_xsmm.o : math/bcknd/xsmm/ax_helm_xsmm.F90 math/mxm_wrapper.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
-math/bcknd/device/ax_helm_device.o : math/bcknd/device/ax_helm_device.F90 device/device.o math/bcknd/device/device_math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
-common/projection.o : common/projection.f90 common/log.o common/profiler.o common/bcknd/device/device_projection.o math/bcknd/device/device_math.o device/device.o config/neko_config.o gs/gather_scatter.o comm/comm.o bc/bc.o math/ax.o sem/coef.o math/math.o config/num_types.o 
+math/bcknd/cpu/ax_helm.o : math/bcknd/cpu/ax_helm.f90 math/math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
+math/bcknd/sx/ax_helm_sx.o : math/bcknd/sx/ax_helm_sx.f90 math/math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
+math/bcknd/xsmm/ax_helm_xsmm.o : math/bcknd/xsmm/ax_helm_xsmm.F90 math/mxm_wrapper.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
+math/bcknd/device/ax_helm_device.o : math/bcknd/device/ax_helm_device.F90 device/device.o math/bcknd/device/device_math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o 
+common/projection.o : common/projection.f90 common/time_step_controller.o common/log.o common/profiler.o common/bcknd/device/device_projection.o math/bcknd/device/device_math.o device/device.o config/neko_config.o gs/gather_scatter.o comm/comm.o bc/bc.o math/ax.o sem/coef.o math/math.o config/num_types.o 
 common/bcknd/device/device_projection.o : common/bcknd/device/device_projection.F90 common/utils.o config/num_types.o 
 comm/parmetis.o : comm/parmetis.F90 mesh/mesh.o field/mesh_field.o config/num_types.o common/utils.o mesh/point.o comm/comm.o 
 comm/redist.o : comm/redist.f90 mesh/element.o mesh/facet_zone.o io/format/nmsh.o mesh/mesh.o comm/comm.o mesh/curve.o adt/stack.o mesh/point.o adt/htable.o comm/mpi_types.o field/mesh_field.o 
@@ -218,23 +233,39 @@ bc/bcknd/device/device_facet_normal.o : bc/bcknd/device/device_facet_normal.F90
 bc/bcknd/device/device_inhom_dirichlet.o : bc/bcknd/device/device_inhom_dirichlet.F90 common/utils.o config/num_types.o 
 bc/bcknd/device/device_dong_outflow.o : bc/bcknd/device/device_dong_outflow.F90 common/utils.o config/num_types.o 
 scalar/bcknd/device/scalar_residual_device.o : scalar/bcknd/device/scalar_residual_device.F90 math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o math/operators.o gs/gather_scatter.o scalar/scalar_residual.o 
-scalar/scalar_scheme.o : scalar/scalar_scheme.f90 common/material_properties.o common/user_intf.o common/json_utils.o bc/usr_scalar.o field/field_registry.o common/log.o time_schemes/time_scheme_controller.o mesh/facet_zone.o mesh/mesh.o bc/bc.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o scalar/source_scalar.o config/num_types.o common/checkpoint.o config/neko_config.o gs/gather_scatter.o 
-scalar/scalar_pnpn.o : scalar/scalar_pnpn.f90 common/material_properties.o common/user_intf.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o math/math.o common/projection.o time_schemes/time_scheme_controller.o scalar/scalar_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o bc/facet_normal.o field/field_series.o math/ax.o scalar/scalar_residual.o gs/gather_scatter.o sem/coef.o mesh/mesh.o bc/bc.o field/field.o bc/dirichlet.o scalar/scalar_scheme.o common/rhs_maker_fctry.o math/ax_helm_fctry.o scalar/scalar_residual_fctry.o 
+scalar/scalar_scheme.o : scalar/scalar_scheme.f90 common/time_step_controller.o field/field_series.o scalar/scalar_source_term.o comm/comm.o common/utils.o common/material_properties.o common/user_intf.o common/json_utils.o bc/usr_scalar.o field/field_registry.o common/log.o time_schemes/time_scheme_controller.o mesh/facet_zone.o mesh/mesh.o bc/field_dirichlet.o bc/bc.o krylov/precon_fctry.o krylov/pc_hsmg.o krylov/bcknd/sx/pc_jacobi_sx.o krylov/bcknd/device/pc_jacobi_device.o krylov/bcknd/cpu/pc_jacobi.o krylov/krylov_fctry.o bc/neumann.o bc/dirichlet.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field_list.o field/field.o config/num_types.o common/checkpoint.o gs/gather_scatter.o 
+scalar/scalar_pnpn.o : scalar/scalar_pnpn.f90 common/time_step_controller.o config/neko_config.o common/material_properties.o common/user_intf.o common/json_utils.o common/profiler.o fluid/advection_fctry.o fluid/advection.o common/log.o math/math.o common/projection.o time_schemes/time_scheme_controller.o scalar/scalar_aux.o math/bcknd/device/device_math.o krylov/krylov.o bc/facet_normal.o field/field_series.o math/ax.o scalar/scalar_residual.o gs/gather_scatter.o device/device.o sem/coef.o common/checkpoint.o mesh/mesh.o bc/bc.o field/field.o bc/neumann.o bc/dirichlet.o scalar/scalar_scheme.o common/rhs_maker.o common/rhs_maker_fctry.o math/ax_helm_fctry.o scalar/scalar_residual_fctry.o config/num_types.o 
 scalar/scalar_aux.o : scalar/scalar_aux.f90 krylov/krylov.o config/num_types.o common/log.o 
 scalar/scalar_residual.o : scalar/scalar_residual.f90 config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o scalar/source_scalar.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o 
 scalar/scalar_residual_fctry.o : scalar/scalar_residual_fctry.f90 scalar/bcknd/sx/scalar_residual_sx.o scalar/bcknd/cpu/scalar_residual_cpu.o scalar/bcknd/device/scalar_residual_device.o scalar/scalar_residual.o config/neko_config.o 
+scalar/scalar_ic.o : scalar/scalar_ic.f90 common/json_utils.o common/user_intf.o math/math.o sem/coef.o common/utils.o field/field.o device/device.o math/bcknd/device/device_math.o config/num_types.o config/neko_config.o gs/gather_scatter.o 
+scalar/scalar_source_term.o : scalar/scalar_source_term.f90 common/utils.o common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_fctry.o source_terms/source_term.o scalar/scalar_user_source_term.o config/num_types.o config/neko_config.o 
+scalar/scalar_user_source_term.o : scalar/scalar_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o 
 scalar/bcknd/cpu/scalar_residual_cpu.o : scalar/bcknd/cpu/scalar_residual_cpu.f90 math/operators.o scalar/scalar_residual.o gs/gather_scatter.o 
 scalar/bcknd/sx/scalar_residual_sx.o : scalar/bcknd/sx/scalar_residual_sx.f90 math/operators.o scalar/scalar_residual.o gs/gather_scatter.o 
 scalar/source_scalar.o : scalar/source_scalar.f90 math/bcknd/device/device_math.o device/device.o common/utils.o sem/dofmap.o config/num_types.o config/neko_config.o 
 simulation_components/simulation_component.o : simulation_components/simulation_component.f90 common/json_utils.o common/time_based_controller.o case.o config/num_types.o 
-simulation_components/simulation_component_global.o : simulation_components/simulation_component_global.f90 case.o simulation_components/simulation_component_factory.o simulation_components/simulation_component.o 
-simulation_components/vorticity.o : simulation_components/vorticity.f90 case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
-simulation_components/lambda2.o : simulation_components/lambda2.f90 device/device.o case.o math/operators.o field/field_list.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
-simulation_components/simulation_component_factory.o : simulation_components/simulation_component_factory.f90 common/json_utils.o case.o simulation_components/probes.o simulation_components/lambda2.o simulation_components/vorticity.o simulation_components/simulation_component.o 
+simulation_components/simcomp_executor.o : simulation_components/simcomp_executor.f90 case.o common/json_utils.o simulation_components/simulation_component_fctry.o simulation_components/simulation_component.o config/num_types.o 
+simulation_components/vorticity.o : simulation_components/vorticity.f90 simulation_components/field_writer.o common/json_utils.o io/fld_file_output.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
+simulation_components/lambda2.o : simulation_components/lambda2.f90 device/device.o simulation_components/field_writer.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
+simulation_components/les_simcomp.o : simulation_components/les_simcomp.f90 common/json_utils.o les/les_model_fctry.o les/les_model.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o 
+simulation_components/simulation_component_fctry.o : simulation_components/simulation_component_fctry.f90 simulation_components/field_writer.o common/log.o common/json_utils.o case.o simulation_components/les_simcomp.o simulation_components/probes.o simulation_components/lambda2.o simulation_components/vorticity.o simulation_components/simulation_component.o 
 source_terms/source_term.o : source_terms/source_term.f90 field/field_list.o sem/coef.o config/num_types.o config/neko_config.o 
 source_terms/const_source_term.o : source_terms/const_source_term.f90 source_terms/bcknd/device/const_source_term_device.o source_terms/bcknd/cpu/const_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o common/json_utils.o field/field_list.o config/num_types.o 
 source_terms/bcknd/cpu/const_source_term_cpu.o : source_terms/bcknd/cpu/const_source_term_cpu.f90 math/math.o field/field_list.o config/num_types.o 
 source_terms/bcknd/device/const_source_term_device.o : source_terms/bcknd/device/const_source_term_device.f90 math/bcknd/device/device_math.o field/field_list.o config/num_types.o 
-source_terms/source_term_factory.o : source_terms/source_term_factory.f90 sem/coef.o common/utils.o field/field_list.o common/json_utils.o source_terms/const_source_term.o source_terms/source_term.o 
-neko.o : neko.f90 mesh/point_zone_registry.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o sem/point_interpolator.o common/time_interpolator.o io/data_streamer.o simulation_components/simulation_component_global.o field/scratch_registry.o field/field_registry.o common/system.o sem/spectral_error_indicator.o simulation_components/probes.o simulation_components/simulation_component.o math/tensor.o math/vector.o fluid/fluid_user_source_term.o field/field_list.o fluid/fluid_stats.o sem/cpr.o math/bcknd/device/device_math.o device/device.o common/jobctrl.o common/signal.o comm/parmetis.o common/user_intf.o common/projection.o math/mathops.o math/operators.o simulation.o io/output.o common/sampler.o case.o config/neko_config.o math/ax.o math/ax_helm_fctry.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o bc/wall.o bc/bc.o sem/coef.o gs/gather_scatter.o comm/mpi_types.o field/field.o io/file.o common/global_interpolation.o math/mxm_wrapper.o io/format/map.o field/mesh_field.o mesh/point.o mesh/mesh.o adt/tuple.o adt/stack.o adt/uset.o adt/htable.o sem/space.o sem/dofmap.o sem/speclib.o math/math.o common/log.o common/utils.o comm/comm.o config/num_types.o 
+source_terms/boussinesq_source_term.o : source_terms/boussinesq_source_term.f90 field/field_registry.o source_terms/bcknd/device/boussinesq_source_term_device.o source_terms/bcknd/cpu/boussinesq_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o common/json_utils.o field/field.o field/field_list.o config/num_types.o 
+source_terms/bcknd/cpu/boussinesq_source_term_cpu.o : source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 math/math.o field/field.o field/field_list.o config/num_types.o 
+source_terms/bcknd/device/boussinesq_source_term_device.o : source_terms/bcknd/device/boussinesq_source_term_device.f90 math/bcknd/device/device_math.o field/field.o field/field_list.o config/num_types.o 
+source_terms/source_term_fctry.o : source_terms/source_term_fctry.f90 sem/coef.o common/utils.o field/field_list.o common/json_utils.o source_terms/brinkman_source_term.o source_terms/boussinesq_source_term.o source_terms/const_source_term.o source_terms/source_term.o 
+source_terms/brinkman_source_term.o : source_terms/brinkman_source_term.f90 mesh/point_zone_registry.o mesh/point_zone.o mesh/search_tree/aabb.o common/profiler.o math/signed_distance.o source_terms/brinkman/filters.o device/device.o mesh/tri_mesh.o io/file.o source_terms/bcknd/device/brinkman_source_term_device.o source_terms/bcknd/cpu/brinkman_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o field/field_registry.o common/json_utils.o field/field_list.o field/field.o config/num_types.o 
+source_terms/bcknd/cpu/brinkman_source_term_cpu.o : source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 field/field_registry.o math/math.o field/field_list.o field/field.o config/num_types.o 
+source_terms/bcknd/device/brinkman_source_term_device.o : source_terms/bcknd/device/brinkman_source_term_device.f90 field/field_registry.o math/bcknd/device/device_math.o field/field_list.o field/field.o config/num_types.o 
+source_terms/brinkman/filters.o : source_terms/brinkman/filters.f90 source_terms/bcknd/cpu/filters_cpu.o config/num_types.o field/field.o 
+source_terms/bcknd/cpu/filters_cpu.o : source_terms/bcknd/cpu/filters_cpu.f90 config/num_types.o 
+les/les_model.o : les/les_model.f90 device/device.o config/neko_config.o gs/gs_ops.o sem/coef.o sem/dofmap.o field/field_registry.o field/field.o config/num_types.o 
+les/les_model_fctry.o : les/les_model_fctry.f90 sem/coef.o sem/dofmap.o les/vreman.o les/les_model.o 
+les/vreman.o : les/vreman.f90 sem/coef.o les/bcknd/cpu/vreman_cpu.o config/neko_config.o common/utils.o common/json_utils.o sem/dofmap.o les/les_model.o field/field.o config/num_types.o 
+les/bcknd/cpu/vreman_cpu.o : les/bcknd/cpu/vreman_cpu.f90 sem/coef.o math/operators.o field/field.o field/field_registry.o field/scratch_registry.o math/math.o field/field_list.o config/num_types.o 
+neko.o : neko.f90 mesh/point_zone_registry.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o sem/point_interpolator.o common/time_interpolator.o io/data_streamer.o simulation_components/simcomp_executor.o field/scratch_registry.o field/field_registry.o qoi/drag_torque.o common/system.o sem/spectral_error_indicator.o simulation_components/probes.o simulation_components/simulation_component.o math/tensor.o math/matrix.o math/vector.o scalar/scalar_user_source_term.o fluid/fluid_user_source_term.o field/field_list.o fluid/fluid_stats.o sem/cpr.o sem/map_1d.o math/bcknd/device/device_math.o device/device.o common/jobctrl.o common/signal.o comm/parmetis.o common/user_intf.o common/projection.o math/mathops.o math/operators.o simulation.o io/output.o common/sampler.o case.o config/neko_config.o math/ax.o math/ax_helm_fctry.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o bc/wall.o bc/bc.o sem/coef.o gs/gather_scatter.o comm/mpi_types.o field/field.o io/file.o common/global_interpolation.o math/mxm_wrapper.o io/format/map.o field/mesh_field.o mesh/point.o mesh/mesh.o adt/tuple.o adt/stack.o adt/uset.o adt/htable.o sem/space.o sem/dofmap.o sem/speclib.o math/math.o common/log.o common/utils.o comm/comm.o config/num_types.o 
 driver.o : driver.f90 neko.o 
diff --git a/src/.depends_device b/src/.depends_device
index ae467eb0f44..537362f2c0c 100644
--- a/src/.depends_device
+++ b/src/.depends_device
@@ -13,6 +13,7 @@ math/bcknd/device/hip/tensor.o : math/bcknd/device/hip/tensor.hip math/bcknd/dev
 math/bcknd/device/hip/fdm.o : math/bcknd/device/hip/fdm.hip math/bcknd/device/hip/fdm_kernel.h
 krylov/bcknd/device/hip/pc_jacobi.o : krylov/bcknd/device/hip/pc_jacobi.hip
 krylov/bcknd/device/hip/pipecg_aux.o : krylov/bcknd/device/hip/pipecg_aux.hip krylov/bcknd/device/hip/pipecg_kernel.h
+krylov/bcknd/device/hip/fusedcg_aux.o : krylov/bcknd/device/hip/fusedcg_aux.hip krylov/bcknd/device/hip/fusedcg_kernel.h
 krylov/bcknd/device/hip/gmres_aux.o : krylov/bcknd/device/hip/gmres_aux.hip krylov/bcknd/device/hip/gmres_kernel.h
 gs/bcknd/device/hip/gs.o : gs/bcknd/device/hip/gs.hip gs/bcknd/device/hip/gs_kernels.h
 bc/bcknd/device/hip/dirichlet.o : bc/bcknd/device/hip/dirichlet.hip bc/bcknd/device/hip/dirichlet_kernel.h
@@ -42,6 +43,7 @@ math/bcknd/device/cuda/tensor.o : math/bcknd/device/cuda/tensor.cu math/bcknd/de
 math/bcknd/device/cuda/fdm.o : math/bcknd/device/cuda/fdm.cu math/bcknd/device/cuda/fdm_kernel.h
 krylov/bcknd/device/cuda/pc_jacobi.o : krylov/bcknd/device/cuda/pc_jacobi.cu
 krylov/bcknd/device/cuda/pipecg_aux.o : krylov/bcknd/device/cuda/pipecg_aux.cu krylov/bcknd/device/cuda/pipecg_kernel.h
+krylov/bcknd/device/cuda/fusedcg_aux.o : krylov/bcknd/device/cuda/fusedcg_aux.cu krylov/bcknd/device/cuda/fusedcg_kernel.h
 krylov/bcknd/device/cuda/gmres_aux.o : krylov/bcknd/device/cuda/gmres_aux.cu krylov/bcknd/device/cuda/gmres_kernel.h
 gs/bcknd/device/cuda/gs.o : gs/bcknd/device/cuda/gs.cu gs/bcknd/device/cuda/gs_kernels.h 
 bc/bcknd/device/cuda/dirichlet.o : bc/bcknd/device/cuda/dirichlet.cu bc/bcknd/device/cuda/dirichlet_kernel.h
diff --git a/src/Makefile.am b/src/Makefile.am
index 4d0019389ae..1176764478e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -18,11 +18,13 @@ neko_fortran_SOURCES = \
 	common/system.f90\
 	math/mxm_wrapper.F90\
 	sem/speclib.f90 \
+	qoi/drag_torque.f90\
 	sem/local_interpolation.f90 \
 	math/math.f90\
 	math/mathops.f90\
 	math/fast3d.f90\
 	sem/space.f90\
+	sem/map_1d.f90\
 	sem/dofmap.f90\
 	sem/coef.f90\
 	sem/cpr.f90\
@@ -56,10 +58,13 @@ neko_fortran_SOURCES = \
 	mesh/point_zone.f90\
 	mesh/point_zones/sphere_point_zone.f90\
 	mesh/point_zones/box_point_zone.f90\
-	mesh/point_zone_factory.f90\
+	mesh/point_zones/cylinder_point_zone.f90\
+	mesh/point_zone_fctry.f90\
 	mesh/point_zone_registry.f90\
 	mesh/mesh.f90\
 	mesh/octree.f90\
+	mesh/search_tree/aabb.f90\
+	mesh/aabb_tree.f90\
 	mesh/tet_mesh.f90\
 	mesh/tri_mesh.f90\
 	field/field_registry.f90\
@@ -80,6 +85,7 @@ neko_fortran_SOURCES = \
 	math/bcknd/sx/sx_dudxyz.f90\
 	math/bcknd/sx/sx_opgrad.f90\
 	math/bcknd/sx/sx_cfl.f90\
+	math/bcknd/sx/sx_lambda2.f90\
 	math/operators.f90\
 	math/bcknd/cpu/opr_cpu.f90\
 	math/bcknd/sx/opr_sx.f90\
@@ -96,6 +102,7 @@ neko_fortran_SOURCES = \
 	math/schwarz.f90\
 	math/vector.f90\
 	math/matrix.f90\
+	math/signed_distance.f90\
 	common/checkpoint.f90\
 	io/generic_file.f90\
 	io/map_file.f90\
@@ -111,6 +118,7 @@ neko_fortran_SOURCES = \
 	io/file.f90\
 	io/output.f90\
 	io/fluid_output.f90\
+	io/fld_file_output.f90\
 	io/chkp_output.f90\
 	io/mean_flow_output.f90\
 	io/fluid_stats_output.f90\
@@ -122,9 +130,12 @@ neko_fortran_SOURCES = \
 	common/craypat.F90\
 	bc/bc.f90\
 	bc/dirichlet.f90\
+	bc/neumann.f90\
 	bc/dong_outflow.f90\
 	bc/wall.f90\
 	bc/inflow.f90\
+	bc/field_dirichlet.f90\
+	bc/field_dirichlet_vector.f90\
 	bc/usr_inflow.f90\
 	bc/usr_scalar.f90\
 	bc/facet_normal.f90\
@@ -148,6 +159,7 @@ neko_fortran_SOURCES = \
 	krylov/bcknd/sx/pc_jacobi_sx.f90\
 	krylov/bcknd/device/cg_device.f90\
 	krylov/bcknd/device/pipecg_device.F90\
+	krylov/bcknd/device/fusedcg_device.F90\
 	krylov/bcknd/device/gmres_device.F90\
 	krylov/bcknd/device/pc_jacobi_device.F90\
 	krylov/bcknd/device/pc_identity_device.f90\
@@ -162,6 +174,7 @@ neko_fortran_SOURCES = \
 	common/rhs_maker.f90\
 	common/rhs_maker_fctry.f90\
 	simulation_components/probes.F90\
+	simulation_components/field_writer.f90\
 	common/bcknd/cpu/rhs_maker_cpu.f90\
 	common/bcknd/sx/rhs_maker_sx.f90\
 	common/bcknd/device/rhs_maker_device.F90\
@@ -182,11 +195,13 @@ neko_fortran_SOURCES = \
 	fluid/flow_profile.f90\
 	fluid/flow_ic.f90\
 	fluid/advection.f90\
+	fluid/advection_fctry.f90\
 	fluid/bcknd/cpu/pnpn_res_cpu.f90\
 	fluid/bcknd/sx/pnpn_res_sx.f90\
 	fluid/bcknd/device/pnpn_res_device.F90\
 	fluid/fluid_user_source_term.f90\
 	fluid/fluid_source_term.f90\
+	common/time_step_controller.f90\
 	simulation.f90\
 	math/ax_helm_fctry.f90\
 	math/bcknd/cpu/ax_helm.f90\
@@ -226,19 +241,35 @@ neko_fortran_SOURCES = \
 	scalar/scalar_aux.f90\
 	scalar/scalar_residual.f90\
 	scalar/scalar_residual_fctry.f90\
+	scalar/scalar_ic.f90\
+	scalar/scalar_source_term.f90\
+	scalar/scalar_user_source_term.f90\
 	scalar/bcknd/cpu/scalar_residual_cpu.f90\
 	scalar/bcknd/sx/scalar_residual_sx.f90\
 	scalar/source_scalar.f90\
 	simulation_components/simulation_component.f90\
-	simulation_components/simulation_component_global.f90\
+	simulation_components/simcomp_executor.f90\
 	simulation_components/vorticity.f90\
 	simulation_components/lambda2.f90\
-	simulation_components/simulation_component_factory.f90\
+	simulation_components/les_simcomp.f90\
+	simulation_components/simulation_component_fctry.f90\
 	source_terms/source_term.f90\
 	source_terms/const_source_term.f90\
 	source_terms/bcknd/cpu/const_source_term_cpu.f90\
 	source_terms/bcknd/device/const_source_term_device.f90\
-	source_terms/source_term_factory.f90\
+	source_terms/boussinesq_source_term.f90\
+	source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90\
+	source_terms/bcknd/device/boussinesq_source_term_device.f90\
+	source_terms/source_term_fctry.f90\
+	source_terms/brinkman_source_term.f90\
+	source_terms/bcknd/cpu/brinkman_source_term_cpu.f90\
+	source_terms/bcknd/device/brinkman_source_term_device.f90\
+	source_terms/brinkman/filters.f90\
+	source_terms/bcknd/cpu/filters_cpu.f90\
+	les/les_model.f90\
+	les/les_model_fctry.f90\
+	les/vreman.f90\
+	les/bcknd/cpu/vreman_cpu.f90\
 	neko.f90
 
 neko_c_SOURCES = comm/comm_wrapper.c\
@@ -271,6 +302,7 @@ libneko_a_SOURCES += \
 	math/bcknd/device/hip/ax_helm.hip\
 	krylov/bcknd/device/hip/pc_jacobi.hip\
 	krylov/bcknd/device/hip/pipecg_aux.hip\
+	krylov/bcknd/device/hip/fusedcg_aux.hip\
 	krylov/bcknd/device/hip/gmres_aux.hip\
 	gs/bcknd/device/hip/gs.hip\
 	bc/bcknd/device/hip/dirichlet.hip\
@@ -304,6 +336,7 @@ libneko_a_SOURCES += \
 	math/bcknd/device/cuda/ax_helm.cu\
 	krylov/bcknd/device/cuda/pc_jacobi.cu\
 	krylov/bcknd/device/cuda/pipecg_aux.cu\
+	krylov/bcknd/device/cuda/fusedcg_aux.cu\
 	krylov/bcknd/device/cuda/gmres_aux.cu\
 	gs/bcknd/device/cuda/gs.cu\
 	bc/bcknd/device/cuda/dirichlet.cu\
@@ -481,6 +514,8 @@ EXTRA_DIST = \
 	scalar/bcknd/device/hip/scalar_residual_update_kernel.h\
 	krylov/bcknd/device/cuda/pipecg_kernel.h\
 	krylov/bcknd/device/hip/pipecg_kernel.h\
+	krylov/bcknd/device/cuda/fusedcg_kernel.h\
+	krylov/bcknd/device/hip/fusedcg_kernel.h\
 	krylov/bcknd/device/cuda/gmres_kernel.h\
 	krylov/bcknd/device/hip/gmres_kernel.h\
 	sem/bcknd/device/hip/coef_kernel.h\
diff --git a/src/adt/htable.f90 b/src/adt/htable.f90
index 48b025a99dc..f59ba812faf 100644
--- a/src/adt/htable.f90
+++ b/src/adt/htable.f90
@@ -47,7 +47,7 @@ module htable
   type :: h_tuple_t
      logical :: valid = .false.
      logical :: skip = .false.
-     class(*), allocatable :: key      
+     class(*), allocatable :: key
      class(*), allocatable :: data
   end type h_tuple_t
 
@@ -135,7 +135,7 @@ end function htable_hash
      procedure, pass(this) :: set => htable_i4t4_set
      procedure, pass(this) :: get => htable_i4t4_get
      procedure, pass(this) :: hash => htable_i4t4_hash
-     procedure, pass(this) :: remove => htable_i4t4_remove          
+     procedure, pass(this) :: remove => htable_i4t4_remove
   end type htable_i4t4_t
 
   !> C pointer based hash table
@@ -145,7 +145,7 @@ end function htable_hash
      procedure, pass(this) :: set => htable_cptr_set
      procedure, pass(this) :: get => htable_cptr_get
      procedure, pass(this) :: hash => htable_cptr_hash
-     procedure, pass(this) :: remove => htable_cptr_remove          
+     procedure, pass(this) :: remove => htable_cptr_remove
   end type htable_cptr_t
 
   !
@@ -168,7 +168,7 @@ end function htable_hash
      procedure, pass(this) :: init => htable_iter_i4_init
      procedure, pass(this) :: value => htable_iter_i4_value
      procedure, pass(this) :: key => htable_iter_i4_key
-     final :: htable_iter_i4_free          
+     final :: htable_iter_i4_free
   end type htable_iter_i4_t
 
   !> Iterator for an integer*8 based hash table
@@ -242,10 +242,10 @@ subroutine htable_init(this, size, key, data)
     class(*), target, intent(in), optional :: data !< Type of data
     class(*), pointer :: dp
     integer :: i
-    
+
 
     call htable_free(this)
-    
+
     if (size .lt. 4) then
        size = 4
     end if
@@ -283,7 +283,7 @@ subroutine htable_free(this)
 
     this%size = 0
     this%entries = 0
-    
+
   end subroutine htable_free
 
   !> Clear all entries in a hash table
@@ -294,9 +294,9 @@ subroutine htable_clear(this)
        this%t(:)%valid = .false.
        this%entries = 0
     else
-       call neko_error("Hash table not allocated")       
+       call neko_error("Hash table not allocated")
     end if
-    
+
   end subroutine htable_clear
 
   !> Return number of entries in the table
@@ -315,7 +315,7 @@ end function htable_size
 
 
   !> Insert tuple @a (key, value) into the hash table
-  recursive subroutine htable_set(this, key, data) 
+  recursive subroutine htable_set(this, key, data)
     class(htable_t), intent(inout) :: this
     class(*), intent(inout) :: key   !< Table key
     class(*), intent(inout) ::  data !< Data associated with @a key
@@ -326,7 +326,7 @@ recursive subroutine htable_set(this, key, data)
     i = log(1.0/this%size)/log(0.6)
     !i = (this%size-1)/10
     index = 0
-    
+
     do while (i .ge. 0)
        index = this%hash(key, c**2)
        if (index .lt. 0) then
@@ -391,7 +391,7 @@ function htable_get(this, key, data) result(rcode)
 
     c = 0
     i = this%size - 1
-    
+
     do while (i .ge. 0)
        index = this%hash(key, c**2)
        if (index .lt. 0) then
@@ -401,7 +401,7 @@ function htable_get(this, key, data) result(rcode)
        if (.not. this%t(index)%valid .and. &
             .not. this%t(index)%skip) then
           rcode = 1
-          return          
+          return
        else if ((this%t(index)%valid) .and. &
             htable_eq_key(this, index, key)) then
           call htable_get_data(this, index, data)
@@ -422,7 +422,7 @@ subroutine htable_remove(this, key)
 
     c = 0
     i = this%size - 1
-    
+
     do while (i .ge. 0)
        index = this%hash(key, c**2)
        if (index .lt. 0) then
@@ -576,12 +576,12 @@ pure function htable_eq_key(this, idx, key) result(res)
   end function htable_eq_key
 
   !> Set key at @a idx to @a key
-  subroutine htable_set_key(this, idx, key) 
+  subroutine htable_set_key(this, idx, key)
     class(htable_t), target, intent(inout) :: this
     integer, intent(in) :: idx  !< Table index
     class(*), intent(in) :: key !< Key to set at @a idx
     class(*), pointer :: kp
-    
+
     kp => this%t(idx)%key
     select type(key)
     type is (integer)
@@ -633,7 +633,7 @@ function htable_iter_next(this) result(valid)
 
     valid = (this%n .lt. this%t%size)
     if (.not. valid) this%n = -1
-    
+
   end function htable_iter_next
 
   !> Reset an iterator
@@ -688,9 +688,9 @@ subroutine htable_iter_data(this, data)
     class default
        call neko_error('Invalid htable data (iter)')
     end select
-    
+
   end subroutine htable_iter_data
-  
+
   !
   ! Integer based implementation
   !
@@ -706,11 +706,11 @@ subroutine htable_i4_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_i4_init
 
   !> Insert an integer into the hash table
-  subroutine htable_i4_set(this, key, data) 
+  subroutine htable_i4_set(this, key, data)
     class(htable_i4_t), intent(inout) :: this
     integer, intent(inout) :: key   !< Table key
     class(*), intent(inout) :: data !< Data associated with @a key
@@ -761,7 +761,7 @@ pure function htable_i4_hash(this, k, c) result(hash)
   end function htable_i4_hash
 
   !> Remove an integer with key @a key from the hash table
-  subroutine htable_i4_remove(this, key) 
+  subroutine htable_i4_remove(this, key)
     class(htable_i4_t), intent(inout) :: this
     integer, intent(inout) :: key   !< Table key
 
@@ -775,7 +775,7 @@ subroutine htable_iter_i4_init(this, t)
     type(htable_i4_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_i4_init
 
@@ -796,7 +796,7 @@ function htable_iter_i4_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (i4)')
     end select
-    
+
   end function htable_iter_i4_value
 
   !> Return the current key of the integer based hash table iterator
@@ -810,7 +810,7 @@ function htable_iter_i4_key(this) result(key)
     class default
        call neko_error('Invalid key (i4)')
     end select
-    
+
   end function htable_iter_i4_key
 
   !
@@ -828,11 +828,11 @@ subroutine htable_i8_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_i8_init
 
   !> Insert an integer*8 into the hash table
-  subroutine htable_i8_set(this, key, data) 
+  subroutine htable_i8_set(this, key, data)
     class(htable_i8_t), intent(inout) :: this
     integer(kind=i8), intent(inout) :: key   !< Table key
     class(*), intent(inout) :: data !< Data associated with @a key
@@ -884,7 +884,7 @@ pure function htable_i8_hash(this, k, c) result(hash)
   end function htable_i8_hash
 
   !> Remove an integer*8 with key @a key from the hash table
-  subroutine htable_i8_remove(this, key) 
+  subroutine htable_i8_remove(this, key)
     class(htable_i8_t), intent(inout) :: this
     integer(kind=i8), intent(inout) :: key   !< Table key
 
@@ -898,7 +898,7 @@ subroutine htable_iter_i8_init(this, t)
     type(htable_i8_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_i8_init
 
@@ -920,7 +920,7 @@ function htable_iter_i8_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (i8)')
     end select
-    
+
   end function htable_iter_i8_value
 
   !> Return the current key of the integer*8 based hash table iterator
@@ -942,9 +942,9 @@ function htable_iter_i8_key(this) result(key)
     class default
        call neko_error('Corrupt htable iter. (i8)')
     end select
-    
+
   end function htable_iter_i8_key
-  
+
 
   !
   ! Double precision based implementation
@@ -961,11 +961,11 @@ subroutine htable_r8_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_r8_init
 
   !> Insert a double precision @a key (with @a data) into the hash table
-  subroutine htable_r8_set(this, key, data) 
+  subroutine htable_r8_set(this, key, data)
     class(htable_r8_t), intent(inout) :: this
     real(kind=dp), intent(inout) :: key !< Table key
     class(*), intent(inout) :: data     !< Data associated with @a key
@@ -1000,14 +1000,14 @@ pure function htable_r8_hash(this, k, c) result(hash)
   end function htable_r8_hash
 
   !> Remove a double precision key @a key from the hash table
-  subroutine htable_r8_remove(this, key) 
+  subroutine htable_r8_remove(this, key)
     class(htable_r8_t), intent(inout) :: this
     real(kind=dp), intent(inout) :: key   !< Table key
 
     call htable_remove(this, key)
 
   end subroutine htable_r8_remove
-  
+
 
   !> Initialize a double precision based hash table iterator
   subroutine htable_iter_r8_init(this, t)
@@ -1015,7 +1015,7 @@ subroutine htable_iter_r8_init(this, t)
     type(htable_r8_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_r8_init
 
@@ -1036,7 +1036,7 @@ function htable_iter_r8_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (r8)')
     end select
-    
+
   end function htable_iter_r8_value
 
   !> Return the current key of the double precision based hash table iterator
@@ -1050,9 +1050,9 @@ function htable_iter_r8_key(this) result(key)
     class default
        call neko_error('Invalid key (r8)')
     end select
-    
+
   end function htable_iter_r8_key
-  
+
   !
   ! Point based implementation
   !
@@ -1068,11 +1068,11 @@ subroutine htable_pt_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_pt_init
 
   !> Insert a point @a key (with @a data) into the hash table
-  subroutine htable_pt_set(this, key, data) 
+  subroutine htable_pt_set(this, key, data)
     class(htable_pt_t), intent(inout) :: this
     type(point_t), intent(inout) :: key !< Table key
     class(*), intent(inout) :: data     !< Data associated with @a key
@@ -1097,7 +1097,7 @@ pure function htable_pt_hash(this, k, c) result(hash)
     class(htable_pt_t), intent(in) :: this
     class(*), intent(in) :: k
     integer, value :: c
-    integer :: hash, i 
+    integer :: hash, i
     integer(kind=i8) :: hash2, tmp, mult
     integer(kind=i8), parameter :: M1 = int(Z'7ed55d15', i8)
     integer(kind=i8), parameter :: M2 = int(Z'c761c23c', i8)
@@ -1131,7 +1131,7 @@ pure function htable_pt_hash(this, k, c) result(hash)
   end function htable_pt_hash
 
   !> Remove a point with key @a key from the hash table
-  subroutine htable_pt_remove(this, key) 
+  subroutine htable_pt_remove(this, key)
     class(htable_pt_t), intent(inout) :: this
     type(point_t), intent(inout) :: key   !< Table key
 
@@ -1146,7 +1146,7 @@ subroutine htable_iter_pt_init(this, t)
     type(htable_pt_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_pt_init
 
@@ -1167,7 +1167,7 @@ function htable_iter_pt_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (pt)')
     end select
-    
+
   end function htable_iter_pt_value
 
   !> Return the current key of the point based hash table iterator
@@ -1181,7 +1181,7 @@ function htable_iter_pt_key(this) result(key)
     class default
        call neko_error('Invalid key (pt)')
     end select
-    
+
   end function htable_iter_pt_key
 
   !
@@ -1199,11 +1199,11 @@ subroutine htable_i4t2_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_i4t2_init
 
   !> Insert an integer 2-tuple into the hash table
-  subroutine htable_i4t2_set(this, key, data) 
+  subroutine htable_i4t2_set(this, key, data)
     class(htable_i4t2_t), intent(inout) :: this
     type(tuple_i4_t), intent(inout) :: key   !< Table key
     class(*), intent(inout) :: data !< Data associated with @a key
@@ -1261,24 +1261,24 @@ pure function htable_i4t2_hash(this, k, c) result(hash)
   end function htable_i4t2_hash
 
   !> Remove an integer 2-tuple with key @a key from the hash table
-  subroutine htable_i4t2_remove(this, key) 
+  subroutine htable_i4t2_remove(this, key)
     class(htable_i4t2_t), intent(inout) :: this
     type(tuple_i4_t), intent(inout) :: key   !< Table key
 
     call htable_remove(this, key)
 
   end subroutine htable_i4t2_remove
-  
+
   !> Initialize an integer 2-tuple based hash table iterator
   subroutine htable_iter_i4t2_init(this, t)
     class(htable_iter_i4t2_t), intent(inout) :: this
     type(htable_i4t2_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_i4t2_init
-  
+
   !> Destroy an integer 2-tuple based  hash table iterator
   subroutine htable_iter_i4t2_free(this)
     type(htable_iter_i4t2_t), intent(inout) :: this
@@ -1296,7 +1296,7 @@ function htable_iter_i4t2_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (i4t2)')
     end select
-    
+
   end function htable_iter_i4t2_value
 
   !> Return the current key of integer based 2-tuple hash table iterator
@@ -1310,7 +1310,7 @@ function htable_iter_i4t2_key(this) result(key)
     class default
        call neko_error('Invalid key (i4t2)')
     end select
-    
+
   end function htable_iter_i4t2_key
 
   !
@@ -1328,11 +1328,11 @@ subroutine htable_i4t4_init(this, size, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_i4t4_init
 
   !> Insert an integer 4-tuple into the hash table
-  subroutine htable_i4t4_set(this, key, data) 
+  subroutine htable_i4t4_set(this, key, data)
     class(htable_i4t4_t), intent(inout) :: this
     type(tuple4_i4_t), intent(inout) :: key   !< Table key
     class(*), intent(inout) :: data !< Data associated with @a key
@@ -1365,7 +1365,7 @@ pure function htable_i4t4_hash(this, k, c) result(hash)
     integer(kind=i8), parameter :: M4 = int(Z'd3a2646c', i8)
     integer(kind=i8), parameter :: M5 = int(Z'fd7046c5', i8)
     integer(kind=i8), parameter :: M6 = int(Z'b55a4f09', i8)
-    
+
     select type(k)
     type is (tuple4_i4_t)
        mult = int(1000003, i8)
@@ -1390,7 +1390,7 @@ pure function htable_i4t4_hash(this, k, c) result(hash)
   end function htable_i4t4_hash
 
   !> Remove an integer 4-tuple with key @a key from the hash table
-  subroutine htable_i4t4_remove(this, key) 
+  subroutine htable_i4t4_remove(this, key)
     class(htable_i4t4_t), intent(inout) :: this
     type(tuple4_i4_t), intent(inout) :: key   !< Table key
 
@@ -1404,7 +1404,7 @@ subroutine htable_iter_i4t4_init(this, t)
     type(htable_i4t4_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_i4t4_init
 
@@ -1425,7 +1425,7 @@ function htable_iter_i4t4_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (i4t4)')
     end select
-    
+
   end function htable_iter_i4t4_value
 
   !> Return the current key of integer based 4-tuple hash table iterator
@@ -1447,7 +1447,7 @@ function htable_iter_i4t4_key(this) result(key)
     class default
        call neko_error('Corrupt htable iter. (i4t4)')
     end select
-    
+
   end function htable_iter_i4t4_key
 
   !
@@ -1459,17 +1459,17 @@ subroutine htable_cptr_init(this, size, data)
     integer, value :: size                    !< Initial size of the table
     class(*), intent(inout), optional :: data !< Data to associate with @a key
     type(h_cptr_t) :: key
-    
+
     if (present(data)) then
        call htable_init(this, size, key, data)
     else
        call htable_init(this, size, key)
     end if
-    
+
   end subroutine htable_cptr_init
 
   !> Insert a C pointer into the hash table
-  subroutine htable_cptr_set(this, key, data) 
+  subroutine htable_cptr_set(this, key, data)
     class(htable_cptr_t), target, intent(inout) :: this
     type(h_cptr_t), intent(inout) :: key   !< Table key
     class(*), intent(inout) :: data !< Data associated with @a key
@@ -1508,7 +1508,7 @@ pure function htable_cptr_hash(this, k, c) result(hash)
   end function htable_cptr_hash
 
   !> Remove a C pointer with key @a key from the hash table
-  subroutine htable_cptr_remove(this, key) 
+  subroutine htable_cptr_remove(this, key)
     class(htable_cptr_t), target, intent(inout) :: this
     type(h_cptr_t), intent(inout) :: key   !< Table key
 
@@ -1522,7 +1522,7 @@ subroutine htable_iter_cptr_init(this, t)
     type(htable_cptr_t), target, intent(inout) :: t
 
     this%t => t
-    this%n = -1    
+    this%n = -1
 
   end subroutine htable_iter_cptr_init
 
@@ -1545,7 +1545,7 @@ function htable_iter_cptr_value(this) result(value)
     class default
        call neko_error('Key and data of different kind (cptr)')
     end select
-    
+
   end function htable_iter_cptr_value
 
   !> Return the current key of a C pointer based hash table iterator
@@ -1561,7 +1561,7 @@ function htable_iter_cptr_key(this) result(key)
     class default
        call neko_error('Invalid key (cptr)')
     end select
-    
+
   end function htable_iter_cptr_key
 
 end module htable
diff --git a/src/adt/stack.f90 b/src/adt/stack.f90
index 975bbaa1332..24922ee0640 100644
--- a/src/adt/stack.f90
+++ b/src/adt/stack.f90
@@ -39,10 +39,10 @@ module stack
   use point, only : point_t
   use structs, only : struct_curve_t
   use math, only : NEKO_M_LN2
-  use tuple, only : tuple_t, tuple_i4_t, tuple4_i4_t, tuple_i4r8_t, tuple_2i4r8_t
+  use tuple, only : tuple_i4_t, tuple4_i4_t, tuple_i4r8_t, tuple_2i4r8_t
   implicit none
   private
-  
+
   integer, parameter :: NEKO_STACK_SIZE_T = 32
 
   !> Base type for a stack
@@ -55,6 +55,7 @@ module stack
      procedure, non_overridable, pass(this) :: free => stack_free
      procedure, non_overridable, pass(this) :: clear => stack_clear
      procedure, non_overridable, pass(this) :: size => stack_size
+     procedure, non_overridable, pass(this) :: is_empty => stack_is_empty
      procedure, non_overridable, pass(this) :: push => stack_push
   end type stack_t
 
@@ -106,7 +107,7 @@ module stack
      procedure, public, pass(this) :: pop => stack_2i4r8t3_pop
      procedure, public, pass(this) :: array => stack_2i4r8t3_data
   end type stack_2i4r8t3_t
-  
+
   !> Curved element stack
   type, public, extends(stack_t) :: stack_curve_t
    contains
@@ -151,9 +152,9 @@ module stack
 
 contains
 
-  !> Initialize a stack of arbitrary type 
+  !> Initialize a stack of arbitrary type
   subroutine stack_init(this, size)
-    class(stack_t), intent(inout) :: this 
+    class(stack_t), intent(inout) :: this
     integer, optional :: size !< Initial size of the stack
     integer :: size_t
 
@@ -202,16 +203,16 @@ subroutine stack_init(this, size)
     end select
 
   end subroutine stack_init
-  
+
   !> Destroy a stack
   subroutine stack_free(this)
     class(stack_t), intent(inout) :: this
-    
+
     if (allocated(this%data)) then
        deallocate(this%data)
-       this%size_ = 0 
+       this%size_ = 0
        this%top_ = 0
-    end if    
+    end if
 
   end subroutine stack_free
 
@@ -228,6 +229,13 @@ pure function stack_size(this) result(size)
     size = this%top_
   end function stack_size
 
+  !> Return true if the stack is empty
+   pure function stack_is_empty(this) result(is_empty)
+      class(stack_t), intent(in) :: this
+      logical :: is_empty
+      is_empty = this%top_ .eq. 0
+   end function stack_is_empty
+
   !> Push data onto the stack
   subroutine stack_push(this, data)
     class(stack_t), target, intent(inout) :: this
@@ -242,7 +250,7 @@ subroutine stack_push(this, data)
           allocate(integer::tmp(this%size_))
        type is(integer(i8))
           allocate(integer(i8)::tmp(this%size_))
-       type is(double precision)          
+       type is(double precision)
           allocate(double precision::tmp(this%size_))
        type is(tuple_i4_t)
           allocate(tuple_i4_t::tmp(this%size_))
@@ -267,7 +275,7 @@ subroutine stack_push(this, data)
        class default
           call neko_error('Invalid data type (stack_push)')
        end select
-       
+
        select type(tmp)
        type is (integer)
           select type(sdp=>this%data)
@@ -347,7 +355,7 @@ subroutine stack_push(this, data)
        end select
        call move_alloc(tmp, this%data)
     end if
-    
+
     this%top_ = this%top_ + 1
 
     select type(sdp=>this%data)
@@ -427,7 +435,7 @@ function stack_i4_pop(this) result(data)
     integer :: data
 
     select type (sdp=>this%data)
-    type is (integer)       
+    type is (integer)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i4 pop)')
@@ -438,10 +446,10 @@ end function stack_i4_pop
   !> Return a pointer to the internal integer array
   function stack_i4_data(this) result(data)
     class(stack_i4_t), target, intent(inout) :: this
-    integer, pointer :: data(:)
+    integer, contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (integer)       
+    type is (integer)
        data => sdp
     class default
        call neko_error('Invalid data type (i4 array)')
@@ -454,7 +462,7 @@ function stack_i8_pop(this) result(data)
     integer(kind=i8) :: data
 
     select type (sdp=>this%data)
-    type is (integer(i8))       
+    type is (integer(i8))
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i8 pop)')
@@ -465,10 +473,10 @@ end function stack_i8_pop
   !> Return a pointer to the internal integer*8 array
   function stack_i8_data(this) result(data)
     class(stack_i8_t), target, intent(inout) :: this
-    integer(kind=i8), pointer :: data(:)
+    integer(kind=i8), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (integer(i8))       
+    type is (integer(i8))
        data => sdp
     class default
        call neko_error('Invalid data type (i8 array)')
@@ -479,9 +487,9 @@ end function stack_i8_data
   function stack_r8_pop(this) result(data)
     class(stack_r8_t), target, intent(inout) :: this
     real(kind=dp) :: data
-    
+
     select type (sdp=>this%data)
-    type is (double precision)       
+    type is (double precision)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (r8 pop)')
@@ -489,13 +497,13 @@ function stack_r8_pop(this) result(data)
     this%top_ = this%top_ -1
   end function stack_r8_pop
 
-  !> Return a pointer to the internal double precision array 
+  !> Return a pointer to the internal double precision array
   function stack_r8_data(this) result(data)
     class(stack_r8_t), target, intent(inout) :: this
-    real(kind=dp), pointer :: data(:)
+    real(kind=dp), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (double precision)       
+    type is (double precision)
        data => sdp
     class default
        call neko_error('Invalid data type (r8 array)')
@@ -506,9 +514,9 @@ end function stack_r8_data
   function stack_i4t2_pop(this) result(data)
     class(stack_i4t2_t), target, intent(inout) :: this
     type(tuple_i4_t) :: data
-    
+
     select type (sdp=>this%data)
-    type is (tuple_i4_t)       
+    type is (tuple_i4_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i4t2 pop)')
@@ -519,10 +527,10 @@ end function stack_i4t2_pop
   !> Return a pointer to the interal 2-tuple array
   function stack_i4t2_data(this) result(data)
     class(stack_i4t2_t), target, intent(inout) :: this
-    type(tuple_i4_t), pointer :: data(:)
+    type(tuple_i4_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (tuple_i4_t)       
+    type is (tuple_i4_t)
        data => sdp
     class default
        call neko_error('Invalid data type (i4t2 array)')
@@ -533,9 +541,9 @@ end function stack_i4t2_data
   function stack_i4t4_pop(this) result(data)
     class(stack_i4t4_t), target, intent(inout) :: this
     type(tuple4_i4_t) :: data
-    
+
     select type (sdp=>this%data)
-    type is (tuple4_i4_t)       
+    type is (tuple4_i4_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i4t4 pop)')
@@ -546,10 +554,10 @@ end function stack_i4t4_pop
   !> Return a pointer to the internal 4-tuple array
   function stack_i4t4_data(this) result(data)
     class(stack_i4t4_t), target, intent(inout) :: this
-    type(tuple4_i4_t), pointer :: data(:)
+    type(tuple4_i4_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (tuple4_i4_t)       
+    type is (tuple4_i4_t)
        data => sdp
     class default
        call neko_error('Invalid data type (i4t4 array)')
@@ -560,9 +568,9 @@ end function stack_i4t4_data
   function stack_i4r8t2_pop(this) result(data)
     class(stack_i4r8t2_t), target, intent(inout) :: this
     type(tuple_i4r8_t) :: data
-    
+
     select type (sdp=>this%data)
-    type is (tuple_i4r8_t)       
+    type is (tuple_i4r8_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i4r8t2 pop)')
@@ -573,10 +581,10 @@ end function stack_i4r8t2_pop
   !> Return a pointer to the internal 2-tuple array
   function stack_i4r8t2_data(this) result(data)
     class(stack_i4r8t2_t), target, intent(inout) :: this
-    type(tuple_i4r8_t), pointer :: data(:)
+    type(tuple_i4r8_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (tuple_i4r8_t)       
+    type is (tuple_i4r8_t)
        data => sdp
     class default
        call neko_error('Invalid data type (i4r8t2 array)')
@@ -587,9 +595,9 @@ end function stack_i4r8t2_data
   function stack_2i4r8t3_pop(this) result(data)
     class(stack_2i4r8t3_t), target, intent(inout) :: this
     type(tuple_2i4r8_t) :: data
-    
+
     select type (sdp=>this%data)
-    type is (tuple_2i4r8_t)       
+    type is (tuple_2i4r8_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (i4r8t2 pop)')
@@ -600,23 +608,23 @@ end function stack_2i4r8t3_pop
   !> Return a pointer to the internal 2-tuple array
   function stack_2i4r8t3_data(this) result(data)
     class(stack_2i4r8t3_t), target, intent(inout) :: this
-    type(tuple_2i4r8_t), pointer :: data(:)
+    type(tuple_2i4r8_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (tuple_2i4r8_t)       
+    type is (tuple_2i4r8_t)
        data => sdp
     class default
        call neko_error('Invalid data type (i4r8t2 array)')
     end select
   end function stack_2i4r8t3_data
- 
+
   !> Pop a curve element of the stack
   function stack_curve_element_pop(this) result(data)
     class(stack_curve_t), target, intent(inout) :: this
     type(struct_curve_t) :: data
-    
+
     select type (sdp=>this%data)
-    type is (struct_curve_t)       
+    type is (struct_curve_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (curve pop)')
@@ -627,10 +635,10 @@ end function stack_curve_element_pop
   !> Return a pointer to the internal curve element array
   function stack_curve_element_data(this) result(data)
     class(stack_curve_t), target, intent(inout) :: this
-    type(struct_curve_t), pointer :: data(:)
+    type(struct_curve_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (struct_curve_t)       
+    type is (struct_curve_t)
        data => sdp
     class default
        call neko_error('Invalid data type (curve array)')
@@ -643,7 +651,7 @@ function stack_nq_pop(this) result(data)
     type(nmsh_quad_t) :: data
 
     select type (sdp=>this%data)
-    type is (nmsh_quad_t)       
+    type is (nmsh_quad_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (nq pop)')
@@ -654,10 +662,10 @@ end function stack_nq_pop
   !> Return a pointer to the internal Neko quad array
   function stack_nq_data(this) result(data)
     class(stack_nq_t), target, intent(inout) :: this
-    type(nmsh_quad_t), pointer :: data(:)
+    type(nmsh_quad_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (nmsh_quad_t)       
+    type is (nmsh_quad_t)
        data => sdp
     class default
        call neko_error('Invalid data type (nq array)')
@@ -670,7 +678,7 @@ function stack_nh_pop(this) result(data)
     type(nmsh_hex_t) :: data
 
     select type (sdp=>this%data)
-    type is (nmsh_hex_t)       
+    type is (nmsh_hex_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (nh pop)')
@@ -681,10 +689,10 @@ end function stack_nh_pop
   !> Return a pointer to the internal Neko quad array
   function stack_nh_data(this) result(data)
     class(stack_nh_t), target, intent(inout) :: this
-    type(nmsh_hex_t), pointer :: data(:)
+    type(nmsh_hex_t), contiguous, pointer :: data(:)
 
     select type (sdp => this%data)
-    type is (nmsh_hex_t)       
+    type is (nmsh_hex_t)
        data => sdp
     class default
        call neko_error('Invalid data type (nh array)')
@@ -697,7 +705,7 @@ function stack_nz_pop(this) result(data)
     type(nmsh_zone_t) :: data
 
     select type (sdp=>this%data)
-    type is (nmsh_zone_t)       
+    type is (nmsh_zone_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (nz pop)')
@@ -708,10 +716,10 @@ end function stack_nz_pop
   !> Return a pointer to the internal Neko zone array
   function stack_nz_data(this) result(data)
     class(stack_nz_t), target, intent(inout) :: this
-    type(nmsh_zone_t), pointer :: data(:)
+    type(nmsh_zone_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (nmsh_zone_t)       
+    type is (nmsh_zone_t)
        data => sdp
     class default
        call neko_error('Invalid data type (nz array)')
@@ -724,7 +732,7 @@ function stack_nc_pop(this) result(data)
     type(nmsh_curve_el_t) :: data
 
     select type (sdp=>this%data)
-    type is (nmsh_curve_el_t)       
+    type is (nmsh_curve_el_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (nc pop)')
@@ -738,7 +746,7 @@ function stack_nc_data(this) result(data)
     type(nmsh_curve_el_t), pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (nmsh_curve_el_t)       
+    type is (nmsh_curve_el_t)
        data => sdp
     class default
        call neko_error('Invalid data type (nc array)')
@@ -751,7 +759,7 @@ function stack_pt_pop(this) result(data)
     type(point_t) :: data
 
     select type (sdp=>this%data)
-    type is (point_t)       
+    type is (point_t)
        data = sdp(this%top_)
     class default
        call neko_error('Invalid data type (point pop)')
@@ -762,14 +770,14 @@ end function stack_pt_pop
   !> Return a pointer to the internal point array
   function stack_pt_data(this) result(data)
     class(stack_pt_t), target, intent(inout) :: this
-    type(point_t), pointer :: data(:)
+    type(point_t), contiguous, pointer :: data(:)
 
     select type (sdp=>this%data)
-    type is (point_t)       
+    type is (point_t)
        data => sdp
     class default
        call neko_error('Invalid data type (point array)')
     end select
   end function stack_pt_data
-  
+
 end module stack
diff --git a/src/adt/tuple.f90 b/src/adt/tuple.f90
index 8c43dcf9ad9..5c9771a5ec4 100644
--- a/src/adt/tuple.f90
+++ b/src/adt/tuple.f90
@@ -32,12 +32,12 @@
 !
 !> Implements a n-tuple
 module tuple
-  use math, only : abscmp 
+  use math, only : abscmp
   use num_types, only : dp
   implicit none
   private
 
-  !> Base type for an n-tuple 
+  !> Base type for an n-tuple
   type, public, abstract :: tuple_t
    contains
      procedure(tuple_assign_tuple), pass(this), deferred :: assign_tuple
@@ -47,7 +47,7 @@ module tuple
      generic :: assignment(=) => assign_tuple, assign_vector
   end type tuple_t
 
-  !> Integer based 2-tuple 
+  !> Integer based 2-tuple
   type, extends(tuple_t), public :: tuple_i4_t
      integer :: x(2) = (/0, 0/)
    contains
@@ -56,7 +56,7 @@ module tuple
      procedure, pass(this) :: equal => tuple_i4_equal
   end type tuple_i4_t
 
-  !> Integer based 3-tuple 
+  !> Integer based 3-tuple
   type, extends(tuple_t), public :: tuple3_i4_t
      integer :: x(3) = (/0, 0, 0/)
    contains
@@ -65,7 +65,7 @@ module tuple
      procedure, pass(this) :: equal => tuple3_i4_equal
   end type tuple3_i4_t
 
-  !> Integer based 4-tuple 
+  !> Integer based 4-tuple
   type, extends(tuple_t), public :: tuple4_i4_t
      integer :: x(4) = (/0, 0, 0, 0/)
    contains
@@ -74,7 +74,7 @@ module tuple
      procedure, pass(this) :: equal => tuple4_i4_equal
   end type tuple4_i4_t
 
-  !> Double precision based 2-tuple 
+  !> Double precision based 2-tuple
   type, extends(tuple_t), public :: tuple_r8_t
      real(kind=dp) :: x(2) = (/0d0, 0d0/)
    contains
@@ -93,9 +93,9 @@ module tuple
      procedure, pass(this) :: equal => tuple_i4r8_equal
   end type tuple_i4r8_t
 
-  !> Mixed integer (\f$ x, y \f$) double precision (\f$ z \f$) 3-tuple 
+  !> Mixed integer (\f$ x, y \f$) double precision (\f$ z \f$) 3-tuple
   type, extends(tuple_t), public :: tuple_2i4r8_t
-     integer :: x, y     
+     integer :: x, y
      real(kind=dp) :: z
    contains
      procedure, pass(this) :: assign_tuple => tuple_2i4r8_assign_tuple
@@ -120,7 +120,7 @@ subroutine tuple_assign_vector(this, x)
        class(*), dimension(:), intent(in) :: x
      end subroutine tuple_assign_vector
   end interface
-  
+
   !> Abstract intf. for tuple comparison
   abstract interface
      pure function tuple_equal(this, other) result(res)
@@ -132,7 +132,7 @@ end function tuple_equal
   end interface
 
 contains
-  
+
   !> Assign an integer 2-tuple to a tuple
   subroutine tuple_i4_assign_tuple(this, other)
     class(tuple_i4_t), intent(inout) :: this
@@ -152,7 +152,7 @@ subroutine tuple_i4_assign_vector(this, x)
     select type(x)
     type is (integer)
        this%x = x
-    end select    
+    end select
   end subroutine tuple_i4_assign_vector
 
   !> Check if two integer based tuples are equal
@@ -187,14 +187,14 @@ subroutine tuple3_i4_assign_vector(this, x)
     select type(x)
     type is (integer)
        this%x = x
-    end select    
+    end select
   end subroutine tuple3_i4_assign_vector
 
   !> Check if two integer based tuples are equal
   pure function tuple3_i4_equal(this, other) result(res)
     class(tuple3_i4_t), intent(in) :: this
     class(tuple_t), intent(in) :: other
-    logical :: res    
+    logical :: res
 
     res = .false.
     select type(other)
@@ -202,7 +202,7 @@ pure function tuple3_i4_equal(this, other) result(res)
        res = all(this%x .eq. other%x)
     end select
   end function tuple3_i4_equal
-  
+
   !> Assign an integer 4-tuple to a tuple
   subroutine tuple4_i4_assign_tuple(this, other)
     class(tuple4_i4_t), intent(inout) :: this
@@ -222,14 +222,14 @@ subroutine tuple4_i4_assign_vector(this, x)
     select type(x)
     type is (integer)
        this%x = x
-    end select    
+    end select
   end subroutine tuple4_i4_assign_vector
 
   !> Check if two integer based tuples are equal
   pure function tuple4_i4_equal(this, other) result(res)
     class(tuple4_i4_t), intent(in) :: this
     class(tuple_t), intent(in) :: other
-    logical :: res    
+    logical :: res
 
     res = .false.
     select type(other)
@@ -244,7 +244,7 @@ subroutine tuple_r8_assign_tuple(this, other)
     class(tuple_t), intent(in) :: other
 
     select type(other)
-    type is(tuple_r8_t)       
+    type is(tuple_r8_t)
        this%x = other%x
     end select
   end subroutine tuple_r8_assign_tuple
@@ -257,7 +257,7 @@ subroutine tuple_r8_assign_vector(this, x)
     select type(x)
     type is (double precision)
        this%x = x
-    end select    
+    end select
   end subroutine tuple_r8_assign_vector
 
   !> Check if two double precision tuples are equal
@@ -271,7 +271,7 @@ pure function tuple_r8_equal(this, other) result(res)
     type is(tuple_r8_t)
        if (abscmp(this%x(1), other%x(1)) .and. &
             abscmp(this%x(2), other%x(2))) then
-          res = .true.          
+          res = .true.
        end if
     end select
   end function tuple_r8_equal
@@ -282,7 +282,7 @@ subroutine tuple_i4r8_assign_tuple(this, other)
     class(tuple_t), intent(in) :: other
 
     select type(other)
-    type is(tuple_i4r8_t)       
+    type is(tuple_i4r8_t)
        this%x = other%x
        this%y = other%y
     end select
@@ -315,7 +315,7 @@ pure function tuple_i4r8_equal(this, other) result(res)
     type is(tuple_i4r8_t)
        if ((this%x .eq. other%x) .and. &
             abscmp(this%y, other%y)) then
-          res = .true.          
+          res = .true.
        end if
     end select
   end function tuple_i4r8_equal
@@ -326,7 +326,7 @@ subroutine tuple_2i4r8_assign_tuple(this, other)
     class(tuple_t), intent(in) :: other
 
     select type(other)
-    type is(tuple_2i4r8_t)       
+    type is(tuple_2i4r8_t)
        this%x = other%x
        this%y = other%y
        this%z = other%z
@@ -363,9 +363,9 @@ pure function tuple_2i4r8_equal(this, other) result(res)
        if ((this%x .eq. other%x) .and. &
             (this%y .eq. other%y) .and. &
             abscmp(this%z, other%z)) then
-          res = .true.          
+          res = .true.
        end if
     end select
   end function tuple_2i4r8_equal
-   
+
 end module tuple
diff --git a/src/adt/uset.f90 b/src/adt/uset.f90
index 5da3fb60883..f3e2c3237f6 100644
--- a/src/adt/uset.f90
+++ b/src/adt/uset.f90
@@ -31,7 +31,7 @@
 ! POSSIBILITY OF SUCH DAMAGE.
 !
 !> Implements an unordered set ADT
-!! @details A unordered set storing a fixed data-type @a data 
+!! @details A unordered set storing a fixed data-type @a data
 module uset
   use utils, only : neko_error
   use num_types, only : i8, dp
@@ -65,7 +65,7 @@ module uset
      procedure, pass(this) :: element => uset_i4_element
      procedure, pass(this) :: add => uset_i4_add
      procedure, pass(this) :: remove => uset_i4_remove
-     procedure, pass(this) :: iter_init => uset_i4_iter_init     
+     procedure, pass(this) :: iter_init => uset_i4_iter_init
      procedure, pass(this) :: iter_next => uset_i4_iter_next
      procedure, pass(this) :: iter_value => uset_i4_iter_value
   end type uset_i4_t
@@ -82,7 +82,7 @@ module uset
      procedure, pass(this) :: element => uset_i8_element
      procedure, pass(this) :: add => uset_i8_add
      procedure, pass(this) :: remove => uset_i8_remove
-     procedure, pass(this) :: iter_init => uset_i8_iter_init     
+     procedure, pass(this) :: iter_init => uset_i8_iter_init
      procedure, pass(this) :: iter_next => uset_i8_iter_next
      procedure, pass(this) :: iter_value => uset_i8_iter_value
   end type uset_i8_t
@@ -99,14 +99,14 @@ module uset
      procedure, pass(this) :: element => uset_r8_element
      procedure, pass(this) :: add => uset_r8_add
      procedure, pass(this) :: remove => uset_r8_remove
-     procedure, pass(this) :: iter_init => uset_r8_iter_init     
+     procedure, pass(this) :: iter_init => uset_r8_iter_init
      procedure, pass(this) :: iter_next => uset_r8_iter_next
      procedure, pass(this) :: iter_value => uset_r8_iter_value
   end type uset_r8_t
 
   !> Interface for initializing an unordered set
   abstract interface
-     subroutine uset_init(this, n) 
+     subroutine uset_init(this, n)
        import uset_t
        class(uset_t), intent(inout) :: this
        integer, optional :: n
@@ -115,7 +115,7 @@ end subroutine uset_init
 
   !> Interface for destroying an unordered set
   abstract interface
-     subroutine uset_free(this) 
+     subroutine uset_free(this)
        import uset_t
        class(uset_t), intent(inout) :: this
      end subroutine uset_free
@@ -132,7 +132,7 @@ end function uset_size
 
   !> Interface for clearing an unordered set
   abstract interface
-     subroutine uset_clear(this) 
+     subroutine uset_clear(this)
        import uset_t
        class(uset_t), intent(inout) :: this
      end subroutine uset_clear
@@ -149,7 +149,7 @@ end function uset_element
   end interface
 
   !> Inteface for adding @a key to an unorderd set
-  abstract interface     
+  abstract interface
      subroutine uset_add(this, key)
        import uset_t
        class(uset_t), intent(inout) :: this
@@ -158,7 +158,7 @@ end subroutine uset_add
   end interface
 
   !> Inteface for removing @a key in an unorderd set
-  abstract interface     
+  abstract interface
      subroutine uset_remove(this, key)
        import uset_t
        class(uset_t), intent(inout) :: this
@@ -177,16 +177,16 @@ subroutine uset_i4_init(this, n)
        call this%t%init(n)
     else
        call this%t%init(64)
-    end if    
+    end if
   end subroutine uset_i4_init
-  
+
   !> Destroy an integer based unordered set
   subroutine uset_i4_free(this)
     class(uset_i4_t), intent(inout) :: this
 
     nullify(this%it%t)
     call this%t%free()
-    
+
   end subroutine uset_i4_free
 
   !> Return the cardinality of an integer based unordered set
@@ -195,7 +195,7 @@ pure function uset_i4_size(this) result(entries)
     integer :: entries
 
     entries = this%t%num_entries()
-    
+
   end function uset_i4_size
 
   !> Clear an integer based unordered set
@@ -217,9 +217,9 @@ function uset_i4_element(this, key) result(res)
        res = (this%t%get(key, data) .eq. 0)
     class default
        res = .false.
-    end select    
+    end select
   end function uset_i4_element
-  
+
   !> Add an integer @a key to the set
   subroutine uset_i4_add(this, key)
     class(uset_i4_t), intent(inout) :: this
@@ -228,7 +228,7 @@ subroutine uset_i4_add(this, key)
 
     select type(key)
     type is (integer)
-       data = key           
+       data = key
        call this%t%set(key, data)
     class default
        call neko_error("Invalid key")
@@ -251,7 +251,7 @@ end subroutine uset_i4_remove
   !> Initialise an integer based set iterator
   subroutine uset_i4_iter_init(this)
     class(uset_i4_t), target, intent(inout) :: this
-    call this%it%init(this%t)    
+    call this%it%init(this%t)
   end subroutine uset_i4_iter_init
 
   !> Advance an integer based set iterator
@@ -265,7 +265,7 @@ end function uset_i4_iter_next
   function uset_i4_iter_value(this) result(value)
     class(uset_i4_t), target, intent(inout) :: this
     integer, pointer :: value
-    value => this%it%value()    
+    value => this%it%value()
   end function uset_i4_iter_value
 
   !> Initialize an empty integer*8 based unordered set
@@ -277,16 +277,16 @@ subroutine uset_i8_init(this, n)
        call this%t%init(n)
     else
        call this%t%init(64)
-    end if    
+    end if
   end subroutine uset_i8_init
-  
+
   !> Destroy an integer*8 based unordered set
   subroutine uset_i8_free(this)
     class(uset_i8_t), intent(inout) :: this
 
     nullify(this%it%t)
     call this%t%free()
-    
+
   end subroutine uset_i8_free
 
   !> Return the cardinality of an integer*8 based unordered set
@@ -295,7 +295,7 @@ pure function uset_i8_size(this) result(entries)
     integer :: entries
 
     entries = this%t%num_entries()
-    
+
   end function uset_i8_size
 
   !> Clear an integer*8 based unordered set
@@ -317,9 +317,9 @@ function uset_i8_element(this, key) result(res)
        res = (this%t%get(key, data) .eq. 0)
     class default
        res = .false.
-    end select    
+    end select
   end function uset_i8_element
-  
+
   !> Add an integer*8 @a key to the set
   subroutine uset_i8_add(this, key)
     class(uset_i8_t), intent(inout) :: this
@@ -328,7 +328,7 @@ subroutine uset_i8_add(this, key)
 
     select type(key)
     type is (integer(i8))
-       data = key 
+       data = key
        call this%t%set(key, data)
     class default
        call neko_error("Invalid key")
@@ -351,7 +351,7 @@ end subroutine uset_i8_remove
   !> Initialise an integer based set iterator*8
   subroutine uset_i8_iter_init(this)
     class(uset_i8_t), target, intent(inout) :: this
-    call this%it%init(this%t)    
+    call this%it%init(this%t)
   end subroutine uset_i8_iter_init
 
   !> Advance an integer*8 based set iterator
@@ -373,10 +373,10 @@ function uset_i8_iter_value(this) result(value)
     type is (uset_i8_t)
        value => hp%it%value()
     class default
-        call neko_error('Invalid uset htable iter (i8)')
+       call neko_error('Invalid uset htable iter (i8)')
     end select
   end function uset_i8_iter_value
-  
+
   !> Initialize an empty double precision based unordered set
   subroutine uset_r8_init(this, n)
     class(uset_r8_t), intent(inout) :: this
@@ -388,13 +388,13 @@ subroutine uset_r8_init(this, n)
        call this%t%init(64)
     end if
   end subroutine uset_r8_init
-  
+
   !> Destroy a double precision based unordered set
   subroutine uset_r8_free(this)
     class(uset_r8_t), intent(inout) :: this
-    
+
     call this%t%free()
-    
+
   end subroutine uset_r8_free
 
   !> Return the cardinality of a double precision based unordered set
@@ -412,7 +412,7 @@ subroutine uset_r8_clear(this)
 
     call this%t%clear()
   end subroutine uset_r8_clear
-  
+
   !> Check if a double precision @a key is an element of the set
   function uset_r8_element(this, key) result(res)
     class(uset_r8_t), intent(inout) :: this
@@ -426,7 +426,7 @@ function uset_r8_element(this, key) result(res)
     class default
        res = .false.
     end select
-    
+
   end function uset_r8_element
 
   !> Add a double precision @a key to the set
@@ -460,7 +460,7 @@ end subroutine uset_r8_remove
   !> Initialise a double precision based set iterator
   subroutine uset_r8_iter_init(this)
     class(uset_r8_t), target, intent(inout) :: this
-    call this%it%init(this%t)    
+    call this%it%init(this%t)
   end subroutine uset_r8_iter_init
 
   !> Advance a double precision based set iterator
@@ -474,7 +474,7 @@ end function uset_r8_iter_next
   function uset_r8_iter_value(this) result(value)
     class(uset_r8_t), target, intent(inout) :: this
     real(kind=dp), pointer :: value
-    value => this%it%value()    
+    value => this%it%value()
   end function uset_r8_iter_value
 
 
diff --git a/src/bc/bc.f90 b/src/bc/bc.f90
index 6413cf1b249..a58209d1a7b 100644
--- a/src/bc/bc.f90
+++ b/src/bc/bc.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2021, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -36,16 +36,17 @@ module bc
   use num_types
   use device
   use dofmap, only : dofmap_t
+  use coefs, only : coef_t
   use space, only : space_t
-  use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS
+  use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS, NEKO_MSH_MAX_ZLBL_LEN
   use facet_zone, only : facet_zone_t
   use stack, only : stack_i4t2_t
   use tuple, only : tuple_i4_t
-  use utils, only : neko_error, linear_index
+  use utils, only : neko_error, linear_index, split_string
   use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR
   implicit none
   private
-  
+
   !> Base type for a boundary condition
   type, public, abstract :: bc_t
      !> The linear index of each node in each boundary facet
@@ -54,6 +55,8 @@ module bc
      integer, allocatable :: facet(:)
      !> Map of degrees of freedom
      type(dofmap_t), pointer :: dof
+     !> SEM coefficients
+     type(coef_t), pointer :: coef
      !> The mesh
      type(mesh_t), pointer :: msh
      !> The function space
@@ -64,7 +67,7 @@ module bc
      type(c_ptr) :: msk_d = C_NULL_PTR
      !> Device pointer for facet
      type(c_ptr) :: facet_d = C_NULL_PTR
-   contains     
+   contains
      !> Constructor
      procedure, pass(this) :: init => bc_init
      !> Destructor
@@ -94,14 +97,16 @@ module bc
   type, private :: bcp_t
      class(bc_t), pointer :: bcp
   end type bcp_t
-  
+
   !> A list of boundary conditions
   type, public :: bc_list_t
      type(bcp_t), allocatable :: bc(:)
+     !> Number of items.
      integer :: n
+     !> Capacity.
      integer :: size
   end type bc_list_t
-    
+
   abstract interface
      !> Apply the boundary condition to a scalar field
      !! @param x The field for which to apply the boundary condition.
@@ -139,12 +144,12 @@ subroutine bc_apply_vector(this, x, y, z, n, t, tstep)
        integer, intent(in), optional :: tstep
      end subroutine bc_apply_vector
   end interface
-  
+
   abstract interface
      !> Apply the boundary condition to a scalar field on the device
      !! @param x_d Device pointer to the field.
      subroutine bc_apply_scalar_dev(this, x_d, t, tstep)
-       import :: c_ptr       
+       import :: c_ptr
        import :: bc_t
        import :: rp
        class(bc_t), intent(inout), target :: this
@@ -175,23 +180,24 @@ end subroutine bc_apply_vector_dev
   interface bc_list_apply
      module procedure bc_list_apply_scalar, bc_list_apply_vector
   end interface bc_list_apply
-  
+
   public :: bc_list_init, bc_list_free, bc_list_add, &
   bc_list_apply_scalar, bc_list_apply_vector, bc_list_apply
-  
+
 contains
 
   !> Constructor
   !! @param dof Map of degrees of freedom.
-  subroutine bc_init(this, dof)
+  subroutine bc_init(this, coef)
     class(bc_t), intent(inout) :: this
-    type(dofmap_t), target, intent(in) :: dof
+    type(coef_t), target, intent(in) :: coef
 
     call bc_free(this)
 
-    this%dof => dof
-    this%Xh => dof%Xh
-    this%msh => dof%msh
+    this%dof => coef%dof
+    this%coef => coef
+    this%Xh => this%dof%Xh
+    this%msh => this%dof%msh
 
     call this%marked_facet%init()
 
@@ -202,9 +208,9 @@ subroutine bc_free(this)
     class(bc_t), intent(inout) :: this
 
     call this%marked_facet%free()
-    
+
     nullify(this%Xh)
-    nullify(this%msh)    
+    nullify(this%msh)
     nullify(this%dof)
 
     if (allocated(this%msk)) then
@@ -224,7 +230,7 @@ subroutine bc_free(this)
        call device_free(this%facet_d)
        this%facet_d = C_NULL_PTR
     end if
-    
+
   end subroutine bc_free
 
   !> Mark @a facet on element @a el as part of the boundary condition
@@ -238,7 +244,7 @@ subroutine bc_mark_facet(this, facet, el)
 
     t%x = (/facet, el/)
     call this%marked_facet%push(t)
-    
+
   end subroutine bc_mark_facet
 
   !> Mark all facets from a (facet, el) tuple list
@@ -253,7 +259,7 @@ subroutine bc_mark_facets(this, facet_list)
     do i = 1, facet_list%size()
        call this%marked_facet%push(fp(i))
     end do
-       
+
   end subroutine bc_mark_facets
 
   !> Mark all facets from a zone
@@ -277,31 +283,59 @@ subroutine bc_mark_zones_from_list(this, bc_zones, bc_key, bc_labels)
     class(bc_t), intent(inout) :: this
     class(facet_zone_t), intent(inout) :: bc_zones(:)
     character(len=*) :: bc_key
-    character(len=20) :: bc_labels(NEKO_MSH_MAX_ZLBLS)
-    integer :: i, j, k, msh_bc_type 
-    
+    character(len=100), allocatable :: split_key(:)
+    character(len=NEKO_MSH_MAX_ZLBL_LEN) :: bc_labels(NEKO_MSH_MAX_ZLBLS)
+    integer :: i, j, k, l, msh_bc_type
+
     msh_bc_type = 0
     if(trim(bc_key) .eq. 'o' .or. trim(bc_key) .eq. 'on' &
        .or. trim(bc_key) .eq. 'o+dong' .or. trim(bc_key) .eq. 'on+dong') then
        msh_bc_type = 1
+    else if(trim(bc_key) .eq. 'd_pres') then
+       msh_bc_type = 1
     else if(trim(bc_key) .eq. 'w') then
        msh_bc_type = 2
     else if(trim(bc_key) .eq. 'v') then
        msh_bc_type = 2
+    else if(trim(bc_key) .eq. 'd_vel_u') then
+       msh_bc_type = 2
+    else if(trim(bc_key) .eq. 'd_vel_v') then
+       msh_bc_type = 2
+    else if(trim(bc_key) .eq. 'd_vel_w') then
+       msh_bc_type = 2
     else if(trim(bc_key) .eq. 'sym') then
        msh_bc_type = 2
     end if
 
     do i = 1, NEKO_MSH_MAX_ZLBLS
-       if (trim(bc_key) .eq. trim(bc_labels(i))) then
-          call bc_mark_zone(this, bc_zones(i))
-          ! Loop across all faces in the mesh
-          do j = 1,this%msh%nelv
-             do k = 1, 2 * this%msh%gdim
-                if (this%msh%facet_type(k,j) .eq. -i) then
-                   this%msh%facet_type(k,j) = msh_bc_type
-                end if
+       !Check if several bcs are defined for this zone
+       !bcs are seperated by /, but we could use something else
+       if (index(trim(bc_labels(i)), '/') .eq. 0) then
+          if (trim(bc_key) .eq. trim(bc_labels(i))) then
+             call bc_mark_zone(this, bc_zones(i))
+             ! Loop across all faces in the mesh
+             do j = 1,this%msh%nelv
+                do k = 1, 2 * this%msh%gdim
+                   if (this%msh%facet_type(k,j) .eq. -i) then
+                      this%msh%facet_type(k,j) = msh_bc_type
+                   end if
+                end do
              end do
+          end if
+       else
+          split_key = split_string(trim(bc_labels(i)),'/')
+          do l = 1, size(split_key)
+             if (trim(split_key(l)) .eq. trim(bc_key)) then
+                call bc_mark_zone(this, bc_zones(i))
+                ! Loop across all faces in the mesh
+                do j = 1,this%msh%nelv
+                   do k = 1, 2 * this%msh%gdim
+                      if (this%msh%facet_type(k,j) .eq. -i) then
+                         this%msh%facet_type(k,j) = msh_bc_type
+                      end if
+                   end do
+                end do
+             end if
           end do
        end if
     end do
@@ -324,7 +358,7 @@ subroutine bc_finalize(this)
     lz = this%Xh%lz
 
     !>@todo add 2D case
-    
+
     ! Note we assume that lx = ly = lz
     facet_size = lx**2
     allocate(this%msk(0:facet_size * this%marked_facet%size()))
@@ -332,10 +366,10 @@ subroutine bc_finalize(this)
 
     msk_c = 0
     bfp => this%marked_facet%array()
-    
+
     ! Loop through each (facet, element) id tuple
     ! Then loop over all the nodes of the face and compute their linear index
-    ! This index goes into This%msk, whereas the corresponding face id goes into
+    ! This index goes into this%msk, whereas the corresponding face id goes into
     ! this%facet
     do i = 1, this%marked_facet%size()
        bc_facet = bfp(i)
@@ -395,14 +429,16 @@ subroutine bc_finalize(this)
 
     this%msk(0) = msk_c
     this%facet(0) = msk_c
-    
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
        n = facet_size * this%marked_facet%size() + 1
        call device_map(this%msk, this%msk_d, n)
        call device_map(this%facet, this%facet_d, n)
 
-       call device_memcpy(this%msk, this%msk_d, n, HOST_TO_DEVICE)
-       call device_memcpy(this%facet, this%facet_d, n, HOST_TO_DEVICE)
+       call device_memcpy(this%msk, this%msk_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%facet, this%facet_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
   end subroutine bc_finalize
@@ -430,7 +466,7 @@ subroutine bc_list_init(bclst, size)
 
     bclst%n = 0
     bclst%size = n
-        
+
   end subroutine bc_list_init
 
   !> Destructor for a list of boundary conditions
@@ -445,7 +481,7 @@ subroutine bc_list_free(bclst)
 
     bclst%n = 0
     bclst%size = 0
-    
+
   end subroutine bc_list_free
 
   !> Add a condition to a list of boundary conditions
@@ -467,7 +503,7 @@ subroutine bc_list_add(bclst, bc)
 
     bclst%n = bclst%n + 1
     bclst%bc(bclst%n)%bcp => bc
-    
+
   end subroutine bc_list_add
 
   !> Apply a list of boundary conditions to a scalar field
@@ -587,6 +623,6 @@ subroutine bc_list_apply_vector(bclst, x, y, z, n, t, tstep)
     end if
 
   end subroutine bc_list_apply_vector
-  
-  
+
+
 end module bc
diff --git a/src/bc/bcknd/device/cuda/dirichlet_kernel.h b/src/bc/bcknd/device/cuda/dirichlet_kernel.h
index 0daf3e0fd28..d11a9198b2b 100644
--- a/src/bc/bcknd/device/cuda/dirichlet_kernel.h
+++ b/src/bc/bcknd/device/cuda/dirichlet_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DIRICHLET_KERNEL__
+#define __BC_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for scalar apply for a Dirichlet condition
  */
@@ -72,3 +75,4 @@ __global__ void dirichlet_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/cuda/dong_outflow_kernel.h b/src/bc/bcknd/device/cuda/dong_outflow_kernel.h
index a293fddffe5..99dd02bd90d 100644
--- a/src/bc/bcknd/device/cuda/dong_outflow_kernel.h
+++ b/src/bc/bcknd/device/cuda/dong_outflow_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DONG_OUTFLOW_KERNEL__
+#define __BC_DONG_OUTFLOW_KERNEL__
+
 /**
  * Device kernel for vector apply for a dong outflow
  */
@@ -62,3 +65,5 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk,
     x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0;
   }
 }
+
+#endif // __BC_DONG_OUTFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/cuda/facet_normal_kernel.h b/src/bc/bcknd/device/cuda/facet_normal_kernel.h
index 03b72762bd3..1f8d8200cf4 100644
--- a/src/bc/bcknd/device/cuda/facet_normal_kernel.h
+++ b/src/bc/bcknd/device/cuda/facet_normal_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_FACET_NORMAL_KERNEL__
+#define __BC_FACET_NORMAL_KERNEL__
+
 /**
  * Computes the linear index for area and normal arrays
  * @note Fortran indexing input, C indexing output
@@ -122,3 +125,4 @@ void facet_normal_apply_surfvec_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_FACET_NORMAL_KERNEL__
diff --git a/src/bc/bcknd/device/cuda/inflow_kernel.h b/src/bc/bcknd/device/cuda/inflow_kernel.h
index 5da1babbe1c..8b707344b93 100644
--- a/src/bc/bcknd/device/cuda/inflow_kernel.h
+++ b/src/bc/bcknd/device/cuda/inflow_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INFLOW_KERNEL__
+#define __BC_INFLOW_KERNEL__
+
 /**
  * Device kernel for vector apply for a Dirichlet condition
  */
@@ -56,3 +59,4 @@ __global__ void inflow_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_INFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h b/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h
index 4d0dd05074f..afc8ec38cbb 100644
--- a/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h
+++ b/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INHOM_DIRICHLET_KERNEL__
+#define __BC_INHOM_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for vector apply for an inhomogeneous Dirichlet condition
  */
@@ -73,3 +76,5 @@ __global__ void inhom_dirichlet_apply_scalar_kernel(const int * __restrict__ msk
     x[k] = bla_x[i];
   }
 }
+
+#endif // __BC_INHOM_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h b/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h
index d54f92b7149..25f56e7280d 100644
--- a/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h
+++ b/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_NO_SLIP_WALL_KERNEL__
+#define __BC_NO_SLIP_WALL_KERNEL__
+
 /**
  * Device kernel for scalar apply for a no-slip wall conditon
  */
@@ -70,3 +73,4 @@ __global__ void no_slip_wall_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_NO_SLIP_WALL_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/cuda/symmetry_kernel.h b/src/bc/bcknd/device/cuda/symmetry_kernel.h
index ed9878db2b5..6ba8ad1ff56 100644
--- a/src/bc/bcknd/device/cuda/symmetry_kernel.h
+++ b/src/bc/bcknd/device/cuda/symmetry_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_SYMMETRY_KERNEL__
+#define __BC_SYMMETRY_KERNEL__
+
 /**
  * Device kernel for vector apply for a symmetry condition
  */
@@ -66,4 +69,4 @@ __global__ void symmetry_apply_vector_kernel(const int * __restrict__ xmsk,
   
 }
 
-
+#endif // __BC_SYMMETRY_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/device_dirichlet.F90 b/src/bc/bcknd/device/device_dirichlet.F90
index 76da8ddf964..e1da42d3935 100644
--- a/src/bc/bcknd/device/device_dirichlet.F90
+++ b/src/bc/bcknd/device/device_dirichlet.F90
@@ -49,7 +49,7 @@ subroutine hip_dirichlet_apply_scalar(msk, x, g, m) &
        type(c_ptr), value :: msk, x
      end subroutine hip_dirichlet_apply_scalar
   end interface
-  
+
   interface
      subroutine hip_dirichlet_apply_vector(msk, x, y, z, g, m) &
           bind(c, name='hip_dirichlet_apply_vector')
@@ -73,7 +73,7 @@ subroutine cuda_dirichlet_apply_scalar(msk, x, g, m) &
        type(c_ptr), value :: msk, x
      end subroutine cuda_dirichlet_apply_scalar
   end interface
-  
+
   interface
      subroutine cuda_dirichlet_apply_vector(msk, x, y, z, g, m) &
           bind(c, name='cuda_dirichlet_apply_vector')
@@ -97,7 +97,7 @@ subroutine opencl_dirichlet_apply_scalar(msk, x, g, m) &
        type(c_ptr), value :: msk, x
      end subroutine opencl_dirichlet_apply_scalar
   end interface
-  
+
   interface
      subroutine opencl_dirichlet_apply_vector(msk, x, y, z, g, m) &
           bind(c, name='opencl_dirichlet_apply_vector')
@@ -112,7 +112,7 @@ end subroutine opencl_dirichlet_apply_vector
 #endif
 
   public :: device_dirichlet_apply_scalar, device_dirichlet_apply_vector
-  
+
 contains
 
   subroutine device_dirichlet_apply_scalar(msk, x, g, m)
@@ -129,7 +129,7 @@ subroutine device_dirichlet_apply_scalar(msk, x, g, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_dirichlet_apply_scalar
 
   subroutine device_dirichlet_apply_vector(msk, x, y, z, g, m)
@@ -146,7 +146,7 @@ subroutine device_dirichlet_apply_vector(msk, x, y, z, g, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_dirichlet_apply_vector
-  
+
 end module device_dirichlet
diff --git a/src/bc/bcknd/device/device_dong_outflow.F90 b/src/bc/bcknd/device/device_dong_outflow.F90
index bf7e19c6eac..5393080f48a 100644
--- a/src/bc/bcknd/device/device_dong_outflow.F90
+++ b/src/bc/bcknd/device/device_dong_outflow.F90
@@ -46,7 +46,7 @@ subroutine hip_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,&
        import c_rp
        implicit none
        integer(c_int) :: m
-       real(kind=c_rp) :: uinf, delta 
+       real(kind=c_rp) :: uinf, delta
        type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z
      end subroutine hip_dong_outflow_apply_scalar
   end interface
@@ -59,7 +59,7 @@ subroutine cuda_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,&
        import c_rp
        implicit none
        integer(c_int) :: m
-       real(kind=c_rp) :: uinf, delta 
+       real(kind=c_rp) :: uinf, delta
        type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z
      end subroutine cuda_dong_outflow_apply_scalar
   end interface
@@ -72,7 +72,7 @@ subroutine opencl_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,&
        import c_rp
        implicit none
        integer(c_int) :: m
-       real(kind=c_rp) :: uinf, delta 
+       real(kind=c_rp) :: uinf, delta
        type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z
      end subroutine opencl_dong_outflow_apply_scalar
   end interface
@@ -85,7 +85,7 @@ end subroutine opencl_dong_outflow_apply_scalar
   subroutine device_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,&
                                               normal_z, u, v, w, uinf, delta, m)
     integer(c_int) :: m
-    real(kind=c_rp) :: uinf, delta 
+    real(kind=c_rp) :: uinf, delta
     type(c_ptr) :: msk, x, u, v, w, normal_x, normal_y, normal_z
 
 #ifdef HAVE_HIP
@@ -101,7 +101,7 @@ subroutine device_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,&
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_dong_outflow_apply_scalar
-  
+
 end module device_dong_outflow
diff --git a/src/bc/bcknd/device/device_facet_normal.F90 b/src/bc/bcknd/device/device_facet_normal.F90
index 75f0f2083c6..c50c49ffe2d 100644
--- a/src/bc/bcknd/device/device_facet_normal.F90
+++ b/src/bc/bcknd/device/device_facet_normal.F90
@@ -72,7 +72,7 @@ end subroutine opencl_facet_normal_apply_surfvec
 #endif
 
   public :: device_facet_normal_apply_surfvec
-  
+
 contains
 
   subroutine device_facet_normal_apply_surfvec(msk, facet, x, y, z, u, v, w, &
@@ -92,7 +92,7 @@ subroutine device_facet_normal_apply_surfvec(msk, facet, x, y, z, u, v, w, &
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_facet_normal_apply_surfvec
-  
+
 end module device_facet_normal
diff --git a/src/bc/bcknd/device/device_inflow.F90 b/src/bc/bcknd/device/device_inflow.F90
index b8bdc776a12..14721ae8fe8 100644
--- a/src/bc/bcknd/device/device_inflow.F90
+++ b/src/bc/bcknd/device/device_inflow.F90
@@ -35,7 +35,7 @@ module device_inflow
   use utils
   use, intrinsic :: iso_c_binding, only : c_ptr
   private
-  
+
 #ifdef HAVE_HIP
 
   interface
@@ -78,7 +78,7 @@ end subroutine opencl_inflow_apply_vector
 
 contains
 
-    subroutine device_inflow_apply_vector(msk, x, y, z, g, m)
+  subroutine device_inflow_apply_vector(msk, x, y, z, g, m)
     integer, intent(in) :: m
     type(c_ptr) :: msk, x, y, z, g
 
@@ -91,8 +91,8 @@ subroutine device_inflow_apply_vector(msk, x, y, z, g, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_inflow_apply_vector
-  
+
 end module device_inflow
 
diff --git a/src/bc/bcknd/device/device_inhom_dirichlet.F90 b/src/bc/bcknd/device/device_inhom_dirichlet.F90
index e68f21abade..efdf81af1ed 100644
--- a/src/bc/bcknd/device/device_inhom_dirichlet.F90
+++ b/src/bc/bcknd/device/device_inhom_dirichlet.F90
@@ -105,7 +105,7 @@ subroutine opencl_inhom_dirichlet_apply_scalar(msk, x, bla_x, m) &
        type(c_ptr), value :: msk, x, bla_x
      end subroutine opencl_inhom_dirichlet_apply_scalar
   end interface
-#endif 
+#endif
 
 contains
 
@@ -122,9 +122,9 @@ subroutine device_inhom_dirichlet_apply_vector(msk, x, y, z, bla_x, bla_y, bla_z
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_inhom_dirichlet_apply_vector
- 
+
   subroutine device_inhom_dirichlet_apply_scalar(msk, x, bla_x, m)
     integer, intent(in) :: m
     type(c_ptr) :: msk, x, bla_x
@@ -138,7 +138,7 @@ subroutine device_inhom_dirichlet_apply_scalar(msk, x, bla_x, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_inhom_dirichlet_apply_scalar
-  
+
 end module device_inhom_dirichlet
diff --git a/src/bc/bcknd/device/device_symmetry.F90 b/src/bc/bcknd/device/device_symmetry.F90
index a414965ab80..d0fc058c75e 100644
--- a/src/bc/bcknd/device/device_symmetry.F90
+++ b/src/bc/bcknd/device/device_symmetry.F90
@@ -67,7 +67,7 @@ subroutine opencl_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l) &
      end subroutine opencl_symmetry_apply_vector
   end interface
 #endif
-  
+
 contains
 
   subroutine device_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l)
@@ -83,7 +83,7 @@ subroutine device_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_symmetry_apply_vector
-  
+
 end module device_symmetry
diff --git a/src/bc/bcknd/device/device_wall.F90 b/src/bc/bcknd/device/device_wall.F90
index dbe2e283c5b..d5d7ed78a67 100644
--- a/src/bc/bcknd/device/device_wall.F90
+++ b/src/bc/bcknd/device/device_wall.F90
@@ -36,7 +36,7 @@ module device_wall
   private
 
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine hip_no_slip_wall_apply_scalar(msk, x, m) &
           bind(c, name='hip_no_slip_wall_apply_scalar')
        use, intrinsic :: iso_c_binding
@@ -45,7 +45,7 @@ subroutine hip_no_slip_wall_apply_scalar(msk, x, m) &
        type(c_ptr), value :: msk, x
      end subroutine hip_no_slip_wall_apply_scalar
   end interface
-  
+
   interface
      subroutine hip_no_slip_wall_apply_vector(msk, x, y, z, m) &
           bind(c, name='hip_no_slip_wall_apply_vector')
@@ -56,7 +56,7 @@ subroutine hip_no_slip_wall_apply_vector(msk, x, y, z, m) &
      end subroutine hip_no_slip_wall_apply_vector
   end interface
 #elif HAVE_CUDA
-    interface
+  interface
      subroutine cuda_no_slip_wall_apply_scalar(msk, x, m) &
           bind(c, name='cuda_no_slip_wall_apply_scalar')
        use, intrinsic :: iso_c_binding
@@ -65,7 +65,7 @@ subroutine cuda_no_slip_wall_apply_scalar(msk, x, m) &
        type(c_ptr), value :: msk, x
      end subroutine cuda_no_slip_wall_apply_scalar
   end interface
-  
+
   interface
      subroutine cuda_no_slip_wall_apply_vector(msk, x, y, z, m) &
           bind(c, name='cuda_no_slip_wall_apply_vector')
@@ -76,7 +76,7 @@ subroutine cuda_no_slip_wall_apply_vector(msk, x, y, z, m) &
      end subroutine cuda_no_slip_wall_apply_vector
   end interface
 #elif HAVE_OPENCL
-    interface
+  interface
      subroutine opencl_no_slip_wall_apply_scalar(msk, x, m) &
           bind(c, name='opencl_no_slip_wall_apply_scalar')
        use, intrinsic :: iso_c_binding
@@ -85,7 +85,7 @@ subroutine opencl_no_slip_wall_apply_scalar(msk, x, m) &
        type(c_ptr), value :: msk, x
      end subroutine opencl_no_slip_wall_apply_scalar
   end interface
-  
+
   interface
      subroutine opencl_no_slip_wall_apply_vector(msk, x, y, z, m) &
           bind(c, name='opencl_no_slip_wall_apply_vector')
@@ -98,7 +98,7 @@ end subroutine opencl_no_slip_wall_apply_vector
 #endif
 
   public :: device_no_slip_wall_apply_scalar, device_no_slip_wall_apply_vector
-  
+
 contains
 
   subroutine device_no_slip_wall_apply_scalar(msk, x, m)
@@ -114,7 +114,7 @@ subroutine device_no_slip_wall_apply_scalar(msk, x, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_no_slip_wall_apply_scalar
 
   subroutine device_no_slip_wall_apply_vector(msk, x, y, z, m)
@@ -130,7 +130,7 @@ subroutine device_no_slip_wall_apply_vector(msk, x, y, z, m)
 #else
     call neko_error('No device backend configured')
 #endif
-    
+
   end subroutine device_no_slip_wall_apply_vector
-  
+
 end module device_wall
diff --git a/src/bc/bcknd/device/hip/dirichlet_kernel.h b/src/bc/bcknd/device/hip/dirichlet_kernel.h
index 13e66153373..8172e6c782e 100644
--- a/src/bc/bcknd/device/hip/dirichlet_kernel.h
+++ b/src/bc/bcknd/device/hip/dirichlet_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DIRICHLET_KERNEL__
+#define __BC_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for scalar apply for a Dirichlet condition
  */
@@ -72,3 +75,4 @@ __global__ void dirichlet_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_HIP_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/dong_outflow_kernel.h b/src/bc/bcknd/device/hip/dong_outflow_kernel.h
index b6dd44917ca..7f8c59e5a6b 100644
--- a/src/bc/bcknd/device/hip/dong_outflow_kernel.h
+++ b/src/bc/bcknd/device/hip/dong_outflow_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DONG_OUTFLOW_KERNEL__
+#define __BC_DONG_OUTFLOW_KERNEL__
+
 /**
  * Device kernel for vector apply for a dong outflow condition
  */
@@ -48,7 +51,7 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk,
 				      const T uinf,
 				      const T delta,
 				      const int m) {
-  
+
   const int idx = blockIdx.x * blockDim.x + threadIdx.x;
   const int str = blockDim.x * gridDim.x;
 
@@ -62,3 +65,5 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk,
     x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0;
   }
 }
+
+#endif // __BC_DONG_OUTFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/facet_normal_kernel.h b/src/bc/bcknd/device/hip/facet_normal_kernel.h
index b787bd628b9..c02ddddb02d 100644
--- a/src/bc/bcknd/device/hip/facet_normal_kernel.h
+++ b/src/bc/bcknd/device/hip/facet_normal_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_FACET_NORMAL_KERNEL__
+#define __BC_FACET_NORMAL_KERNEL__
+
 /**
  * Computes the linear index for area and normal arrays
  * @note Fortran indexing input, C indexing output
@@ -122,3 +125,4 @@ void facet_normal_apply_surfvec_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_FACET_NORMAL_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/inflow_kernel.h b/src/bc/bcknd/device/hip/inflow_kernel.h
index b4945a9388d..4c35e055471 100644
--- a/src/bc/bcknd/device/hip/inflow_kernel.h
+++ b/src/bc/bcknd/device/hip/inflow_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INFLOW_KERNEL__
+#define __BC_INFLOW_KERNEL__
+
 /**
  * Device kernel for vector apply for a Dirichlet condition
  */
@@ -56,3 +59,4 @@ __global__ void inflow_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_INFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h b/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h
index 3a84872abee..a71ef472cb4 100644
--- a/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h
+++ b/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INHOM_DIRICHLET_KERNEL__
+#define __BC_INHOM_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for vector apply for an inhomogeneous Dirichlet condition
  */
@@ -73,3 +76,5 @@ __global__ void inhom_dirichlet_apply_scalar_kernel(const int * __restrict__ msk
     x[k] = bla_x[i];
   }
 }
+
+#endif // __BC_INHOM_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/no_slip_wall_kernel.h b/src/bc/bcknd/device/hip/no_slip_wall_kernel.h
index 4e18acef4e7..b83e811ce45 100644
--- a/src/bc/bcknd/device/hip/no_slip_wall_kernel.h
+++ b/src/bc/bcknd/device/hip/no_slip_wall_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_NO_SLIP_WALL_KERNEL__
+#define __BC_NO_SLIP_WALL_KERNEL__
+
 #include <hip/hip_runtime.h>
 
 /**
@@ -72,3 +75,4 @@ __global__ void no_slip_wall_apply_vector_kernel(const int * __restrict__ msk,
   }
 }
 
+#endif // __BC_NO_SLIP_WALL_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/hip/symmetry_kernel.h b/src/bc/bcknd/device/hip/symmetry_kernel.h
index f96c33d5e3f..1aea0584621 100644
--- a/src/bc/bcknd/device/hip/symmetry_kernel.h
+++ b/src/bc/bcknd/device/hip/symmetry_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_SYMETRY_KERNEL__
+#define __BC_SYMETRY_KERNEL__
+
 /**
  * Device kernel for vector apply for a symmetry condition
  */
@@ -65,3 +68,4 @@ __global__ void symmetry_apply_vector_kernel(const int * __restrict__ xmsk,
   }
 }
 
+#endif // __BC_SYMETRY_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/dirichlet_kernel.cl b/src/bc/bcknd/device/opencl/dirichlet_kernel.cl
index 6e4a79169b8..cea772ade20 100644
--- a/src/bc/bcknd/device/opencl/dirichlet_kernel.cl
+++ b/src/bc/bcknd/device/opencl/dirichlet_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DIRICHLET_KERNEL__
+#define __BC_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for scalar apply for a Dirichlet condition
  */
@@ -69,3 +72,5 @@ __kernel void dirichlet_apply_vector_kernel(__global const int *msk,
     z[k] = g;
   }
 }
+
+#endif // __BC_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl b/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl
index ec35dd991d0..6a7f9fed77e 100644
--- a/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl
+++ b/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_DONG_OUTFLOW_KERNEL__
+#define __BC_DONG_OUTFLOW_KERNEL__
+
 /**
  * Device kernel for scalar apply for a dong outflow condition
  */
@@ -61,3 +64,5 @@ void dong_outflow_apply_scalar_kernel(__global const int * __restrict__ msk,
     x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0;
   }
 }
+
+#endif // __BC_DONG_OUTFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/facet_normal_kernel.cl b/src/bc/bcknd/device/opencl/facet_normal_kernel.cl
index cff360147e2..53f7e73de5d 100644
--- a/src/bc/bcknd/device/opencl/facet_normal_kernel.cl
+++ b/src/bc/bcknd/device/opencl/facet_normal_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_FACET_NORMAL_KERNEL__
+#define __BC_FACET_NORMAL_KERNEL__
+
 /**
  * Computes the linear index for area and normal arrays
  * @note Fortran indexing input, C indexing output
@@ -120,3 +123,4 @@ void facet_normal_apply_surfvec_kernel(__global const int *msk,
   }
 }
 
+#endif // __BC_FACET_NORMAL_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/inflow_kernel.cl b/src/bc/bcknd/device/opencl/inflow_kernel.cl
index e99b3be5e6f..1d7a6420de1 100644
--- a/src/bc/bcknd/device/opencl/inflow_kernel.cl
+++ b/src/bc/bcknd/device/opencl/inflow_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INFLOW_KERNEL__
+#define __BC_INFLOW_KERNEL__
+
 /**
  * Device kernel for vector apply for a Dirichlet condition
  */
@@ -54,3 +57,5 @@ __kernel void inflow_apply_vector_kernel(__global const int *msk,
     z[k] = gz;
   }
 }
+
+#endif // __BC_INFLOW_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl b/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl
index c6e7c1cf2c4..30ce1cb492f 100644
--- a/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl
+++ b/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_INFLOW_DIRICHLET_KERNEL__
+#define __BC_INFLOW_DIRICHLET_KERNEL__
+
 /**
  * Device kernel for vector apply for a inhomogeneous Dirichlet condition
  */
@@ -71,3 +74,5 @@ __kernel void inhom_dirichlet_apply_scalar_kernel(__global const int *msk,
     x[k] = bla_x[i];
   }
 }
+
+#endif // __BC_INFLOW_DIRICHLET_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl b/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl
index 16e8e8232eb..9241165b9cf 100644
--- a/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl
+++ b/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_NO_SLIP_WALL_KERNEL__
+#define __BC_NO_SLIP_WALL_KERNEL__
+
 /**
  * Device kernel for scalar apply for a no-slip wall conditon
  */
@@ -68,3 +71,4 @@ __kernel void no_slip_wall_apply_vector_kernel(__global const int *msk,
   }
 }
 
+#endif // __BC_NO_SLIP_WALL_KERNEL__
\ No newline at end of file
diff --git a/src/bc/bcknd/device/opencl/symmetry_kernel.cl b/src/bc/bcknd/device/opencl/symmetry_kernel.cl
index 2379085e20e..678319acae6 100644
--- a/src/bc/bcknd/device/opencl/symmetry_kernel.cl
+++ b/src/bc/bcknd/device/opencl/symmetry_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __BC_SYMETRY_KERNEL__
+#define __BC_SYMETRY_KERNEL__
+
 /**
  * Device kernel for vector apply for a symmetry condition
  */
@@ -64,3 +67,5 @@ __kernel void symmetry_apply_vector_kernel(__global const int *xmsk,
   }
   
 }
+
+#endif // ___BC_SYMETRY_KERNEL___
\ No newline at end of file
diff --git a/src/bc/blasius.f90 b/src/bc/blasius.f90
index 66c55f0f4f1..7a67f0e920b 100644
--- a/src/bc/blasius.f90
+++ b/src/bc/blasius.f90
@@ -79,9 +79,9 @@ subroutine blasius_free(this)
     if (c_associated(this%blaz_d)) then
        call device_free(this%blaz_d)
     end if
-    
+
   end subroutine blasius_free
-  
+
   !> No-op scalar apply
   subroutine blasius_apply_scalar(this, x, n, t, tstep)
     class(blasius_t), intent(inout) :: this
@@ -98,7 +98,7 @@ subroutine blasius_apply_scalar_dev(this, x_d, t, tstep)
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
   end subroutine blasius_apply_scalar_dev
-  
+
   !> Apply blasius conditions (vector valued)
   subroutine blasius_apply_vector(this, x, y, z, n, t, tstep)
     class(blasius_t), intent(inout) :: this
@@ -122,10 +122,10 @@ subroutine blasius_apply_vector(this, x, y, z, n, t, tstep)
          case(1,2)
             x(k) = this%bla(zc(idx(1), idx(2), idx(3), idx(4)), &
                  this%delta, this%x(1))
-            y(k) = 0.0_rp 
+            y(k) = 0.0_rp
             z(k) = 0.0_rp
          case(3,4)
-            x(k) = 0.0_rp 
+            x(k) = 0.0_rp
             y(k) = this%bla(xc(idx(1), idx(2), idx(3), idx(4)), &
                  this%delta, this%x(2))
             z(k) = 0.0_rp
@@ -134,7 +134,7 @@ subroutine blasius_apply_vector(this, x, y, z, n, t, tstep)
             y(k) = 0.0_rp
             z(k) = this%bla(yc(idx(1), idx(2), idx(3), idx(4)), &
                  this%delta, this%x(3))
-         end select            
+         end select
       end do
     end associate
   end subroutine blasius_apply_vector
@@ -172,7 +172,7 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
          call device_alloc(blax_d, s)
          call device_alloc(blay_d, s)
          call device_alloc(blaz_d, s)
-         
+
          do i = 1, m
             k = this%msk(i)
             facet = this%facet(i)
@@ -181,10 +181,10 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
             case(1,2)
                bla_x(i) = this%bla(zc(idx(1), idx(2), idx(3), idx(4)), &
                     this%delta, this%x(1))
-               bla_y(i) = 0.0_rp 
+               bla_y(i) = 0.0_rp
                bla_z(i) = 0.0_rp
             case(3,4)
-               bla_x(i) = 0.0_rp 
+               bla_x(i) = 0.0_rp
                bla_y(i) = this%bla(xc(idx(1), idx(2), idx(3), idx(4)), &
                     this%delta, this%x(2))
                bla_z(i) = 0.0_rp
@@ -196,8 +196,8 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
             end select
          end do
 
-         call device_memcpy(bla_x, blax_d, m, HOST_TO_DEVICE)
-         call device_memcpy(bla_y, blay_d, m, HOST_TO_DEVICE)
+         call device_memcpy(bla_x, blax_d, m, HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(bla_y, blay_d, m, HOST_TO_DEVICE, sync=.false.)
          call device_memcpy(bla_z, blaz_d, m, HOST_TO_DEVICE, sync=.true.)
 
          deallocate(bla_x, bla_y, bla_z)
@@ -205,7 +205,7 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
 
       call device_inhom_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, &
            blax_d, blay_d, blaz_d, m)
-      
+
     end associate
 
   end subroutine blasius_apply_vector_dev
@@ -216,7 +216,7 @@ subroutine blasius_set_params(this, delta, type)
     real(kind=rp) :: delta
     character(len=*) :: type
     this%delta = delta
-    
+
     select case(trim(type))
     case('linear')
        this%bla => blasius_linear
@@ -239,5 +239,5 @@ subroutine blasius_set_coef(this, c)
     type(coef_t), target, intent(inout) :: c
     this%c => c
   end subroutine blasius_set_coef
-     
+
 end module blasius
diff --git a/src/bc/dirichlet.f90 b/src/bc/dirichlet.f90
index b1dc3462491..0c16064c278 100644
--- a/src/bc/dirichlet.f90
+++ b/src/bc/dirichlet.f90
@@ -89,7 +89,7 @@ subroutine dirichlet_apply_vector(this, x, y, z, n, t, tstep)
        y(k) = this%g
        z(k) = this%g
     end do
-    
+
   end subroutine dirichlet_apply_vector
 
   !> Boundary condition apply for a generic Dirichlet condition
@@ -102,10 +102,10 @@ subroutine dirichlet_apply_scalar_dev(this, x_d, t, tstep)
 
     call device_dirichlet_apply_scalar(this%msk_d, x_d, &
                                        this%g, size(this%msk))
-    
+
   end subroutine dirichlet_apply_scalar_dev
-  
-  !> Boundary condition apply for a generic Dirichlet condition 
+
+  !> Boundary condition apply for a generic Dirichlet condition
   !! to vectors @a x, @a y and @a z (device version)
   subroutine dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
     class(dirichlet_t), intent(inout), target :: this
@@ -117,7 +117,7 @@ subroutine dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
 
     call device_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, &
                                        this%g, size(this%msk))
-    
+
   end subroutine dirichlet_apply_vector_dev
 
   !> Set value of \f$ g \f$
@@ -126,7 +126,7 @@ subroutine dirichlet_set_g(this, g)
     real(kind=rp), intent(in) :: g
 
     this%g = g
-    
+
   end subroutine dirichlet_set_g
-  
+
 end module dirichlet
diff --git a/src/bc/dong_outflow.f90 b/src/bc/dong_outflow.f90
index c5fb35b3251..d07d2ce9817 100644
--- a/src/bc/dong_outflow.f90
+++ b/src/bc/dong_outflow.f90
@@ -42,12 +42,13 @@ module dong_outflow
   use coefs
   use utils
   use device_dong_outflow
+  use field_registry, only : neko_field_registry
   use, intrinsic :: iso_c_binding, only : c_ptr, c_sizeof
   implicit none
   private
 
   !> Dong outflow condition
-  !! Follows 
+  !! Follows
   !! "A Convective-like Energy-Stable Open Boundary Condition for
   !! Simulations of Incompressible Flows"
   !! by S. Dong
@@ -55,9 +56,8 @@ module dong_outflow
      type(field_t), pointer :: u
      type(field_t), pointer :: v
      type(field_t), pointer :: w
-     type(coef_t), pointer :: c_Xh
-     real(kind=rp) :: delta 
-     real(kind=rp) :: uinf  
+     real(kind=rp) :: delta
+     real(kind=rp) :: uinf
      type(c_ptr) :: normal_x_d
      type(c_ptr) :: normal_y_d
      type(c_ptr) :: normal_z_d
@@ -70,55 +70,53 @@ module dong_outflow
   end type dong_outflow_t
 
 contains
-    subroutine dong_outflow_set_vars(this, c_Xh, u, v, w, uinf, delta)
-      class(dong_outflow_t), intent(inout) :: this
-      type(coef_t), target, intent(in) :: c_Xh
-      type(field_t), target, intent(in) :: u, v, w
-      real(kind=rp), intent(in) :: uinf
-      real(kind=rp), optional, intent(in) :: delta
-      real(kind=rp), allocatable :: temp_x(:)
-      real(kind=rp), allocatable :: temp_y(:)
-      real(kind=rp), allocatable :: temp_z(:)
-      real(c_rp) :: dummy
-      integer :: i, m, k, facet, idx(4)
-      real(kind=rp) :: normal_xyz(3)
-      
-
-      if (present(delta)) then
-         this%delta = delta
-      else 
-         this%delta = 0.01_rp
-      end if
-      this%uinf = uinf
-      this%u => u
-      this%v => v
-      this%c_Xh=> c_Xh
-      this%w => w
-      if ((NEKO_BCKND_DEVICE .eq. 1) .and. (this%msk(0) .gt. 0)) then
-         call device_alloc(this%normal_x_d,c_sizeof(dummy)*this%msk(0))
-         call device_alloc(this%normal_y_d,c_sizeof(dummy)*this%msk(0))
-         call device_alloc(this%normal_z_d,c_sizeof(dummy)*this%msk(0))
-         m = this%msk(0)
-         allocate(temp_x(m))
-         allocate(temp_y(m))
-         allocate(temp_z(m))
-         do i = 1, m
-            k = this%msk(i)
-            facet = this%facet(i)
-            idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx)
-            normal_xyz = &
-                 this%c_Xh%get_normal(idx(1), idx(2), idx(3), idx(4),facet)
+  subroutine dong_outflow_set_vars(this, uinf, delta)
+    class(dong_outflow_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: uinf
+    real(kind=rp), optional, intent(in) :: delta
+    real(kind=rp), allocatable :: temp_x(:)
+    real(kind=rp), allocatable :: temp_y(:)
+    real(kind=rp), allocatable :: temp_z(:)
+    real(c_rp) :: dummy
+    integer :: i, m, k, facet, idx(4)
+    real(kind=rp) :: normal_xyz(3)
+
+
+    if (present(delta)) then
+       this%delta = delta
+    else
+       this%delta = 0.01_rp
+    end if
+    this%uinf = uinf
+    this%u => neko_field_registry%get_field("u")
+    this%v => neko_field_registry%get_field("v")
+    this%w => neko_field_registry%get_field("w")
+    if ((NEKO_BCKND_DEVICE .eq. 1) .and. (this%msk(0) .gt. 0)) then
+       call device_alloc(this%normal_x_d,c_sizeof(dummy)*this%msk(0))
+       call device_alloc(this%normal_y_d,c_sizeof(dummy)*this%msk(0))
+       call device_alloc(this%normal_z_d,c_sizeof(dummy)*this%msk(0))
+       m = this%msk(0)
+       allocate(temp_x(m))
+       allocate(temp_y(m))
+       allocate(temp_z(m))
+       do i = 1, m
+          k = this%msk(i)
+          facet = this%facet(i)
+          idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx)
+          normal_xyz = &
+                 this%coef%get_normal(idx(1), idx(2), idx(3), idx(4),facet)
             temp_x(i) = normal_xyz(1)
             temp_y(i) = normal_xyz(2)
             temp_z(i) = normal_xyz(3)
          end do
-         call device_memcpy(temp_x, this%normal_x_d, m, HOST_TO_DEVICE)
-         call device_memcpy(temp_y, this%normal_y_d, m, HOST_TO_DEVICE)
+         call device_memcpy(temp_x, this%normal_x_d, m, &
+                            HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(temp_y, this%normal_y_d, m, &
+                            HOST_TO_DEVICE, sync=.false.)
          call device_memcpy(temp_z, this%normal_z_d, m, &
                             HOST_TO_DEVICE, sync=.true.)
          deallocate( temp_x, temp_y, temp_z)
       end if
-
   end subroutine dong_outflow_set_vars
 
   !> Boundary condition apply for a generic Dirichlet condition
@@ -140,13 +138,13 @@ subroutine dong_outflow_apply_scalar(this, x, n, t, tstep)
        uy = this%v%x(k,1,1,1)
        uz = this%w%x(k,1,1,1)
        idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx)
-       normal_xyz = this%c_Xh%get_normal(idx(1), idx(2), idx(3), idx(4),facet)
-       vn = ux*normal_xyz(1) + uy*normal_xyz(2) + uz*normal_xyz(3) 
+       normal_xyz = this%coef%get_normal(idx(1), idx(2), idx(3), idx(4),facet)
+       vn = ux*normal_xyz(1) + uy*normal_xyz(2) + uz*normal_xyz(3)
        S0 = 0.5_rp*(1.0_rp - tanh(vn / (this%uinf * this%delta)))
-                                     
+
        x(k)=-0.5*(ux*ux+uy*uy+uz*uz)*S0
     end do
-end subroutine dong_outflow_apply_scalar
+  end subroutine dong_outflow_apply_scalar
 
   !> Boundary condition apply for a generic Dirichlet condition
   !! to vectors @a x, @a y and @a z
@@ -158,7 +156,7 @@ subroutine dong_outflow_apply_vector(this, x, y, z, n, t, tstep)
     real(kind=rp), intent(inout),  dimension(n) :: z
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
-    
+
   end subroutine dong_outflow_apply_vector
 
   !> Boundary condition apply for a generic Dirichlet condition
@@ -174,10 +172,10 @@ subroutine dong_outflow_apply_scalar_dev(this, x_d, t, tstep)
                                           this%u%x_d, this%v%x_d, this%w%x_d,&
                                           this%uinf, this%delta,&
                                           this%msk(0))
-    
+
   end subroutine dong_outflow_apply_scalar_dev
-  
-  !> Boundary condition apply for a generic Dirichlet condition 
+
+  !> Boundary condition apply for a generic Dirichlet condition
   !! to vectors @a x, @a y and @a z (device version)
   subroutine dong_outflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
     class(dong_outflow_t), intent(inout), target :: this
@@ -189,7 +187,7 @@ subroutine dong_outflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
 
     !call device_dong_outflow_apply_vector(this%msk_d, x_d, y_d, z_d, &
     !                                   this%g, size(this%msk))
-    
+
   end subroutine dong_outflow_apply_vector_dev
 
 end module dong_outflow
diff --git a/src/bc/facet_normal.f90 b/src/bc/facet_normal.f90
index 15856db900e..47fc23f7938 100644
--- a/src/bc/facet_normal.f90
+++ b/src/bc/facet_normal.f90
@@ -35,8 +35,8 @@ module facet_normal
   use device_facet_normal
   use num_types
   use math
-  use coefs, only : coef_t    
-  use dirichlet, only : dirichlet_t    
+  use coefs, only : coef_t
+  use dirichlet, only : dirichlet_t
   use utils
   use, intrinsic :: iso_c_binding, only : c_ptr
   implicit none
@@ -50,7 +50,6 @@ module facet_normal
      procedure, pass(this) :: apply_vector => facet_normal_apply_vector
      procedure, pass(this) :: apply_surfvec => facet_normal_apply_surfvec
      procedure, pass(this) :: apply_surfvec_dev => facet_normal_apply_surfvec_dev
-     procedure, pass(this) :: set_coef => facet_normal_set_coef
   end type facet_normal_t
 
 contains
@@ -88,18 +87,15 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep)
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
     integer :: i, m, k, idx(4), facet
-    
-    if (.not. associated(this%c)) then
-       call neko_error('No coefficients assigned')
-    end if
-    associate(c => this%c)
+
+    associate(c => this%coef)
       m = this%msk(0)
       do i = 1, m
          k = this%msk(i)
          facet = this%facet(i)
          idx = nonlinear_index(k, c%Xh%lx, c%Xh%lx, c%Xh%lx)
          select case(facet)
-         case(1,2)          
+         case(1,2)
             x(k) = u(k) * c%nx(idx(2), idx(3), facet, idx(4)) &
                  * c%area(idx(2), idx(3), facet, idx(4))
             y(k) = v(k) * c%ny(idx(2), idx(3), facet, idx(4)) &
@@ -112,7 +108,7 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep)
             y(k) = v(k) * c%ny(idx(1), idx(3), facet, idx(4)) &
                  * c%area(idx(1), idx(3), facet, idx(4))
             z(k) = w(k) * c%nz(idx(1), idx(3), facet, idx(4)) &
-                 * c%area(idx(1), idx(3), facet, idx(4))          
+                 * c%area(idx(1), idx(3), facet, idx(4))
          case(5,6)
             x(k) = u(k) * c%nx(idx(1), idx(2), facet, idx(4)) &
                  * c%area(idx(1), idx(2), facet, idx(4))
@@ -123,15 +119,8 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep)
          end select
       end do
     end associate
-    
+
   end subroutine facet_normal_apply_surfvec
-  
-  !> Assign coefficients (facet normals etc)
-  subroutine facet_normal_set_coef(this, c)
-    class(facet_normal_t), intent(inout) :: this
-    type(coef_t), target, intent(inout) :: c
-    this%c => c
-  end subroutine facet_normal_set_coef
 
   !> Apply in facet normal direction (vector valued, device version)
   subroutine facet_normal_apply_surfvec_dev(this, x_d, y_d, z_d, &
@@ -141,16 +130,13 @@ subroutine facet_normal_apply_surfvec_dev(this, x_d, y_d, z_d, &
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
 
-    if (.not. associated(this%c)) then
-       call neko_error('No coefficients assigned')
-    end if
-    associate(c => this%c)
+    associate(c => this%coef)
       call device_facet_normal_apply_surfvec(this%msk_d, this%facet_d, &
                                              x_d, y_d, z_d, u_d, v_d, w_d, &
                                              c%nx_d, c%ny_d, c%nz_d, c%area_d, &
                                              c%Xh%lx, size(this%msk))
     end associate
-         
+
   end subroutine facet_normal_apply_surfvec_dev
-  
+
 end module facet_normal
diff --git a/src/bc/field_dirichlet.f90 b/src/bc/field_dirichlet.f90
new file mode 100644
index 00000000000..84b80c8d846
--- /dev/null
+++ b/src/bc/field_dirichlet.f90
@@ -0,0 +1,189 @@
+! Copyright (c) 2020-2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Defines inflow dirichlet conditions
+module field_dirichlet
+  use num_types, only: rp
+  use coefs, only: coef_t
+  use dirichlet, only: dirichlet_t
+  use bc, only: bc_list_t, bc_t
+  use device, only: c_ptr, c_size_t
+  use utils, only: split_string
+  use field, only : field_t
+  use field_list, only : field_list_t
+  use math, only: masked_copy
+  use device_math, only: device_masked_copy
+  use dofmap, only : dofmap_t
+  use utils, only: neko_error
+  implicit none
+  private
+  
+  !> User defined dirichlet condition, for which the user can work
+  !! with an entire field.
+  !! Would be neat to add another class that contains all three 
+  !! dirichlet bcs for the velocity, this bc would then implement
+  !! apply_vector.
+  type, public, extends(bc_t) :: field_dirichlet_t
+     type(field_t) :: field_bc
+   contains
+     !> Constructor.
+     procedure, pass(this) :: init_field => field_dirichlet_init
+     !> Apply scalar by performing a masked copy.
+     procedure, pass(this) :: apply_scalar => field_dirichlet_apply_scalar
+     !> (No-op) Apply vector.
+     procedure, pass(this) :: apply_vector => field_dirichlet_apply_vector
+     !> (No-op) Apply vector (device).
+     procedure, pass(this) :: apply_vector_dev => field_dirichlet_apply_vector_dev
+     !> Apply scalar (device).
+     procedure, pass(this) :: apply_scalar_dev => field_dirichlet_apply_scalar_dev
+  end type field_dirichlet_t
+
+  !> Abstract interface defining a dirichlet condition on a list of fields.
+  !! @param field_bc_list List of fields that are used to extract values for field_dirichlet.
+  !! @param dirichlet_bc_list List of BCs containing field_dirichlet_t BCs only.
+  !! @param coef Coef object.
+  !! @param t Current time.
+  !! @param tstep Current time step.
+  !! @param which_solver Indicates wether the fields provided come from "fluid" or "scalar".
+  abstract interface
+     subroutine field_dirichlet_update(dirichlet_field_list, dirichlet_bc_list, coef, t, tstep, which_solver)
+       import rp
+       import field_list_t
+       import bc_list_t
+       import coef_t
+       type(field_list_t), intent(inout) :: dirichlet_field_list
+       type(bc_list_t), intent(inout) :: dirichlet_bc_list
+       type(coef_t), intent(inout) :: coef
+       real(kind=rp), intent(in) :: t
+       integer, intent(in) :: tstep
+       character(len=*), intent(in) :: which_solver
+     end subroutine field_dirichlet_update
+  end interface
+
+  public :: field_dirichlet_update
+
+contains
+     
+  !> Initializes this%field_bc.
+  !! @param bc_name Name of this%field_bc
+  subroutine field_dirichlet_init(this, bc_name)
+    class(field_dirichlet_t), intent(inout) :: this
+    character(len=*), intent(in) :: bc_name
+
+    call this%field_bc%init(this%dof, bc_name)
+
+  end subroutine field_dirichlet_init
+
+  !> Destructor. Currently this%field_bc is being freed in `fluid_scheme::free`
+  subroutine field_dirichlet_free(this)
+    type(field_dirichlet_t), intent(inout) :: this
+
+    call this%field_bc%free()
+
+  end subroutine field_dirichlet_free
+  
+  !> Apply scalar by performing a masked copy.
+  !! @param x Field onto which to copy the values (e.g. u,v,w,p or s).
+  !! @param n Size of the array `x`.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_apply_scalar(this, x, n, t, tstep)
+    class(field_dirichlet_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    if (this%msk(0) .gt. 0) then
+       call masked_copy(x, this%field_bc%x, this%msk, n, this%msk(0))
+    end if
+
+  end subroutine field_dirichlet_apply_scalar
+  
+  !> Apply scalar (device).
+  !! @param x_d Device pointer to the field onto which to copy the values.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_apply_scalar_dev(this, x_d, t, tstep)
+    class(field_dirichlet_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    if (this%msk(0) .gt. 0) then
+       call device_masked_copy(x_d, this%field_bc%x_d, this%msk_d, &
+            this%field_bc%dof%size(), this%msk(0))
+    end if
+  
+  end subroutine field_dirichlet_apply_scalar_dev
+
+  !> (No-op) Apply vector.
+  !! @param x x-component of the field onto which to apply the values.
+  !! @param y y-component of the field onto which to apply the values.
+  !! @param z z-component of the field onto which to apply the values.
+  !! @param n Size of the `x`, `y` and `z` arrays.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_apply_vector(this, x, y, z, n, t, tstep)
+    class(field_dirichlet_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(inout),  dimension(n) :: y
+    real(kind=rp), intent(inout),  dimension(n) :: z
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("field_dirichlet cannot apply vector BCs.&
+&Use field_dirichlet_vector instead!")
+
+  end subroutine field_dirichlet_apply_vector
+
+  !> (No-op) Apply vector (device).
+  !! @param x x-component of the field onto which to apply the values.
+  !! @param y y-component of the field onto which to apply the values.
+  !! @param z z-component of the field onto which to apply the values.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
+    class(field_dirichlet_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    type(c_ptr) :: y_d
+    type(c_ptr) :: z_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("field_dirichlet cannot apply vector BCs.&
+&Use field_dirichlet_vector instead!")
+
+  end subroutine field_dirichlet_apply_vector_dev
+
+end module field_dirichlet
diff --git a/src/bc/field_dirichlet_vector.f90 b/src/bc/field_dirichlet_vector.f90
new file mode 100644
index 00000000000..6c8ebd79f95
--- /dev/null
+++ b/src/bc/field_dirichlet_vector.f90
@@ -0,0 +1,188 @@
+! Copyright (c) 2020-2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Defines inflow dirichlet conditions
+module field_dirichlet_vector
+  use num_types, only: rp
+  use coefs, only: coef_t
+  use dirichlet, only: dirichlet_t
+  use bc, only: bc_list_t, bc_t
+  use device, only: c_ptr, c_size_t
+  use utils, only: split_string
+  use field, only : field_t
+  use field_list, only : field_list_t
+  use math, only: masked_copy
+  use device_math, only: device_masked_copy
+  use dofmap, only : dofmap_t
+  use field_dirichlet, only: field_dirichlet_t
+  use utils, only: neko_error
+  implicit none
+  private
+  
+  !> Extension of the user defined dirichlet condition `field_dirichlet`
+  ! for the application on a vector field.
+  type, public, extends(bc_t) :: field_dirichlet_vector_t
+     type(field_dirichlet_t) :: field_dirichlet_u
+     type(field_dirichlet_t) :: field_dirichlet_v
+     type(field_dirichlet_t) :: field_dirichlet_w
+   contains
+     !> Initializes this%field_bc.
+     procedure, pass(this) :: init_field => field_dirichlet_vector_init
+     !> Apply scalar by performing a masked copy.
+     procedure, pass(this) :: apply_scalar => field_dirichlet_vector_apply_scalar
+     !> (No-op) Apply vector.
+     procedure, pass(this) :: apply_vector => field_dirichlet_vector_apply_vector
+     !> (No-op) Apply vector (device).
+     procedure, pass(this) :: apply_vector_dev => &
+          field_dirichlet_vector_apply_vector_dev
+     !> Apply scalar (device).
+     procedure, pass(this) :: apply_scalar_dev => &
+          field_dirichlet_vector_apply_scalar_dev
+  end type field_dirichlet_vector_t
+
+contains
+     
+  !> Initializes this%field_bc.
+  subroutine field_dirichlet_vector_init(this, bc_name)
+    class(field_dirichlet_vector_t), intent(inout) :: this
+    character(len=*), intent(in) :: bc_name
+
+    call neko_error("Fields must be initialized individually!")
+
+  end subroutine field_dirichlet_vector_init
+
+  !> Destructor. Currently unused as is, all field_dirichlet attributes
+  !! are freed in `fluid_scheme::free`.
+  subroutine field_dirichlet_vector_free(this)
+    type(field_dirichlet_vector_t), intent(inout) :: this
+
+    call this%field_dirichlet_u%free()
+    call this%field_dirichlet_v%free()
+    call this%field_dirichlet_w%free()
+    
+  end subroutine field_dirichlet_vector_free
+  
+  !> Apply scalar by performing a masked copy.
+  !! @param x Field onto which to copy the values (e.g. u,v,w,p or s).
+  !! @param n Size of the array `x`.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_vector_apply_scalar(this, x, n, t, tstep)
+    class(field_dirichlet_vector_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("field_dirichlet_vector cannot apply scalar BCs.&
+&Use field_dirichlet instead!")
+
+  end subroutine field_dirichlet_vector_apply_scalar
+  
+  !> Apply scalar (device).
+  !! @param x_d Device pointer to the field onto which to copy the values.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_vector_apply_scalar_dev(this, x_d, t, tstep)
+    class(field_dirichlet_vector_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("field_dirichlet_vector cannot apply scalar BCs.&
+&Use field_dirichlet instead!")
+
+  end subroutine field_dirichlet_vector_apply_scalar_dev
+
+  !> (No-op) Apply vector.
+  !! @param x x-component of the field onto which to apply the values.
+  !! @param y y-component of the field onto which to apply the values.
+  !! @param z z-component of the field onto which to apply the values.
+  !! @param n Size of the `x`, `y` and `z` arrays.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_vector_apply_vector(this, x, y, z, n, t, tstep)
+    class(field_dirichlet_vector_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(inout),  dimension(n) :: y
+    real(kind=rp), intent(inout),  dimension(n) :: z
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    if (present(t) .and. present(tstep)) then
+       call this%field_dirichlet_u%apply_scalar(x, n, t, tstep)
+       call this%field_dirichlet_v%apply_scalar(y, n, t, tstep)
+       call this%field_dirichlet_w%apply_scalar(z, n, t, tstep)
+    else if (present(t)) then
+       call this%field_dirichlet_u%apply_scalar(x, n, t=t)
+       call this%field_dirichlet_v%apply_scalar(y, n, t=t)
+       call this%field_dirichlet_w%apply_scalar(z, n, t=t)
+    else if (present(tstep)) then
+       call this%field_dirichlet_u%apply_scalar(x, n, tstep=tstep)
+       call this%field_dirichlet_v%apply_scalar(y, n, tstep=tstep)
+       call this%field_dirichlet_w%apply_scalar(z, n, tstep=tstep)
+    end if
+
+  end subroutine field_dirichlet_vector_apply_vector
+
+  !> (No-op) Apply vector (device).
+  !! @param x x-component of the field onto which to apply the values.
+  !! @param y y-component of the field onto which to apply the values.
+  !! @param z z-component of the field onto which to apply the values.
+  !! @param t Time.
+  !! @param tstep Time step.
+  subroutine field_dirichlet_vector_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
+    class(field_dirichlet_vector_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    type(c_ptr) :: y_d
+    type(c_ptr) :: z_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    if (present(t) .and. present(tstep)) then
+       call this%field_dirichlet_u%apply_scalar_dev(x_d, t, tstep)
+       call this%field_dirichlet_v%apply_scalar_dev(y_d, t, tstep)
+       call this%field_dirichlet_w%apply_scalar_dev(z_d, t, tstep)
+    else if (present(t)) then
+       call this%field_dirichlet_u%apply_scalar_dev(x_d, t=t)
+       call this%field_dirichlet_v%apply_scalar_dev(y_d, t=t)
+       call this%field_dirichlet_w%apply_scalar_dev(z_d, t=t)
+    else if (present(tstep)) then
+       call this%field_dirichlet_u%apply_scalar_dev(x_d, tstep=tstep)
+       call this%field_dirichlet_v%apply_scalar_dev(y_d, tstep=tstep)
+       call this%field_dirichlet_w%apply_scalar_dev(z_d, tstep=tstep)
+    end if
+
+   end subroutine field_dirichlet_vector_apply_vector_dev
+
+end module field_dirichlet_vector
diff --git a/src/bc/inflow.f90 b/src/bc/inflow.f90
index 15e65b75371..8b991db8492 100644
--- a/src/bc/inflow.f90
+++ b/src/bc/inflow.f90
@@ -38,7 +38,7 @@ module inflow
   use, intrinsic :: iso_c_binding, only : c_ptr, c_loc
   implicit none
   private
-  
+
   !> Dirichlet condition for inlet (vector valued)
   type, public, extends(dirichlet_t) :: inflow_t
      real(kind=rp), dimension(3) :: x = (/0d0, 0d0, 0d0 /)
@@ -49,7 +49,7 @@ module inflow
      procedure, pass(this) :: apply_vector_dev => inflow_apply_vector_dev
      procedure, pass(this) :: set_inflow => inflow_set_vector
   end type inflow_t
-  
+
 contains
 
   !> No-op scalar apply
@@ -68,7 +68,7 @@ subroutine inflow_apply_scalar_dev(this, x_d, t, tstep)
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
   end subroutine inflow_apply_scalar_dev
-  
+
   !> Apply inflow conditions (vector valued)
   subroutine inflow_apply_vector(this, x, y, z, n, t, tstep)
     class(inflow_t), intent(inout) :: this
@@ -97,10 +97,10 @@ subroutine inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
     type(c_ptr) :: z_d
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
-    
+
     call device_inflow_apply_vector(this%msk_d, x_d, y_d, z_d, &
                                     c_loc(this%x), this%msk(0))
-    
+
   end subroutine inflow_apply_vector_dev
 
   !> Set inflow vector
@@ -109,6 +109,6 @@ subroutine inflow_set_vector(this, x)
     real(kind=rp), dimension(3), intent(inout) :: x
     this%x = x
   end subroutine inflow_set_vector
-    
-  
+
+
 end module inflow
diff --git a/src/bc/neumann.f90 b/src/bc/neumann.f90
new file mode 100644
index 00000000000..1e4ab0aee63
--- /dev/null
+++ b/src/bc/neumann.f90
@@ -0,0 +1,149 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Defines a Neumann boundary condition.
+module neumann
+  use num_types
+  use bc, only : bc_t
+  use, intrinsic :: iso_c_binding, only : c_ptr
+  use utils, only : neko_error, nonlinear_index
+  use coefs, only : coef_t
+  implicit none
+  private
+
+  !> Generic Neumann boundary condition.
+  !! This sets the flux of the field to the chosen value.
+  !! @note The condition is imposed weekly by adding an appropriate source term
+  !! to the right-hand-side.
+  type, public, extends(bc_t) :: neumann_t
+     real(kind=rp), private :: flux_
+   contains
+     procedure, pass(this) :: apply_scalar => neumann_apply_scalar
+     procedure, pass(this) :: apply_vector => neumann_apply_vector
+     procedure, pass(this) :: apply_scalar_dev => neumann_apply_scalar_dev
+     procedure, pass(this) :: apply_vector_dev => neumann_apply_vector_dev
+     procedure, pass(this) :: init_neumann => neumann_init_neumann
+     procedure, pass(this) :: flux => neumann_flux
+  end type neumann_t
+
+contains
+
+  !> Boundary condition apply for a generic Neumann condition
+  !! to a vector @a x
+  subroutine neumann_apply_scalar(this, x, n, t, tstep)
+    class(neumann_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+    integer :: i, m, k, facet
+    ! Store non-linear index
+    integer :: idx(4)
+
+    m = this%msk(0)
+    do i = 1, m
+       k = this%msk(i)
+       facet = this%facet(i)
+       idx = nonlinear_index(k, this%coef%Xh%lx, this%coef%Xh%lx,&
+                             this%coef%Xh%lx)
+       select case(facet)
+       case(1,2)
+          x(k) = x(k) + this%flux_*this%coef%area(idx(2), idx(3), facet, idx(4))
+       case(3,4)
+          x(k) = x(k) + this%flux_*this%coef%area(idx(1), idx(3), facet, idx(4))
+       case(5,6)
+          x(k) = x(k) + this%flux_*this%coef%area(idx(1), idx(2), facet, idx(4))
+       end select
+    end do
+  end subroutine neumann_apply_scalar
+
+  !> Boundary condition apply for a generic Neumann condition
+  !! to vectors @a x, @a y and @a z
+  subroutine neumann_apply_vector(this, x, y, z, n, t, tstep)
+    class(neumann_t), intent(inout) :: this
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout),  dimension(n) :: x
+    real(kind=rp), intent(inout),  dimension(n) :: y
+    real(kind=rp), intent(inout),  dimension(n) :: z
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+    integer :: i, m, k
+
+    call neko_error("Neumann bc not implemented for vectors")
+
+  end subroutine neumann_apply_vector
+
+  !> Boundary condition apply for a generic Neumann condition
+  !! to a vector @a x (device version)
+  subroutine neumann_apply_scalar_dev(this, x_d, t, tstep)
+    class(neumann_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("Neumann bc not implemented on the device")
+
+  end subroutine neumann_apply_scalar_dev
+
+  !> Boundary condition apply for a generic Neumann condition
+  !! to vectors @a x, @a y and @a z (device version)
+  subroutine neumann_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
+    class(neumann_t), intent(inout), target :: this
+    type(c_ptr) :: x_d
+    type(c_ptr) :: y_d
+    type(c_ptr) :: z_d
+    real(kind=rp), intent(in), optional :: t
+    integer, intent(in), optional :: tstep
+
+    call neko_error("Neumann bc not implemented on the device")
+
+  end subroutine neumann_apply_vector_dev
+
+  !> Constructor
+  !> @param flux The desired flux.
+  !> @param coef The SEM coefficients.
+  subroutine neumann_init_neumann(this, flux)
+    class(neumann_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: flux
+
+    this%flux_ = flux
+  end subroutine neumann_init_neumann
+
+  !> Get the set flux.
+  pure function neumann_flux(this) result(flux)
+    class(neumann_t), intent(in) :: this
+    real(kind=rp) :: flux
+
+    flux = this%flux_
+  end function neumann_flux
+
+end module neumann
diff --git a/src/bc/non_normal.f90 b/src/bc/non_normal.f90
index 74b0b84337f..17a3a1ddfe7 100644
--- a/src/bc/non_normal.f90
+++ b/src/bc/non_normal.f90
@@ -55,23 +55,23 @@ module non_normal
 contains
 
   !> Initialize symmetry mask for each axis
-  subroutine non_normal_init_msk(this, c)
+  subroutine non_normal_init_msk(this)
     class(non_normal_t), intent(inout) :: this
-    type(coef_t), intent(in) :: c
-    integer :: i, j, k, l 
+    integer :: i, j, k, l
     type(tuple_i4_t), pointer :: bfp(:)
     real(kind=rp) :: sx,sy,sz
     real(kind=rp), parameter :: TOL = 1d-3
     type(tuple_i4_t) :: bc_facet
     integer :: facet, el
-    
+
     call non_normal_free(this)
 
-    call this%bc_x%init(c%dof)
-    call this%bc_y%init(c%dof)
-    call this%bc_z%init(c%dof)
-    
-    associate(nx => c%nx, ny => c%ny, nz => c%nz)
+    call this%bc_x%init(this%coef)
+    call this%bc_y%init(this%coef)
+    call this%bc_z%init(this%coef)
+
+    associate(c=>this%coef, nx => this%coef%nx, ny => this%coef%ny, &
+              nz => this%coef%nz)
       bfp => this%marked_facet%array()
       do i = 1, this%marked_facet%size()
          bc_facet = bfp(i)
@@ -80,7 +80,7 @@ subroutine non_normal_init_msk(this, c)
          sx = 0d0
          sy = 0d0
          sz = 0d0
-         select case (facet)               
+         select case (facet)
          case(1,2)
             do l = 2, c%Xh%lx - 1
                do j = 2, c%Xh%lx -1
@@ -104,7 +104,7 @@ subroutine non_normal_init_msk(this, c)
                   sy = sy + abs(abs(ny(l, j, facet, el)) - 1d0)
                   sz = sz + abs(abs(nz(l, j, facet, el)) - 1d0)
                end do
-            end do               
+            end do
          end select
          sx = sx / (c%Xh%lx - 2)**2
          sy = sy / (c%Xh%lx - 2)**2
@@ -134,7 +134,7 @@ subroutine non_normal_init_msk(this, c)
     call this%bc_z%set_g(0.0_rp)
   end subroutine non_normal_init_msk
 
- 
+
   subroutine non_normal_free(this)
     type(non_normal_t), intent(inout) :: this
 
@@ -143,4 +143,4 @@ subroutine non_normal_free(this)
     call this%bc_z%free()
 
   end subroutine non_normal_free
- end module non_normal
+end module non_normal
diff --git a/src/bc/symmetry.f90 b/src/bc/symmetry.f90
index 50d93d1d0d5..e2c259c02ef 100644
--- a/src/bc/symmetry.f90
+++ b/src/bc/symmetry.f90
@@ -37,11 +37,11 @@ module symmetry
   use num_types
   use dirichlet
   use bc
-  use coefs
   use math
   use utils
   use stack
   use tuple
+  use coefs, only : coef_t
   use, intrinsic :: iso_c_binding, only : c_ptr
   implicit none
   private
@@ -62,23 +62,23 @@ module symmetry
 contains
 
   !> Initialize symmetry mask for each axis
-  subroutine symmetry_init_msk(this, c)
+  subroutine symmetry_init_msk(this)
     class(symmetry_t), intent(inout) :: this
-    type(coef_t), intent(in) :: c
     integer :: i, m, j, l
     type(tuple_i4_t), pointer :: bfp(:)
     real(kind=rp) :: sx,sy,sz
     real(kind=rp), parameter :: TOL = 1d-3
     type(tuple_i4_t) :: bc_facet
     integer :: facet, el
-    
+
     call symmetry_free(this)
 
-    call this%bc_x%init(c%dof)
-    call this%bc_y%init(c%dof)
-    call this%bc_z%init(c%dof)
-    
-    associate(nx => c%nx, ny => c%ny, nz => c%nz)
+    call this%bc_x%init(this%coef)
+    call this%bc_y%init(this%coef)
+    call this%bc_z%init(this%coef)
+
+    associate(c=>this%coef, nx => this%coef%nx, ny => this%coef%ny, &
+              nz => this%coef%nz)
       bfp => this%marked_facet%array()
       do i = 1, this%marked_facet%size()
          bc_facet = bfp(i)
@@ -87,7 +87,7 @@ subroutine symmetry_init_msk(this, c)
          sx = 0d0
          sy = 0d0
          sz = 0d0
-         select case (facet)               
+         select case (facet)
          case(1,2)
             do l = 2, c%Xh%lx - 1
                do j = 2, c%Xh%lx -1
@@ -111,7 +111,7 @@ subroutine symmetry_init_msk(this, c)
                   sy = sy + abs(abs(ny(l, j, facet, el)) - 1d0)
                   sz = sz + abs(abs(nz(l, j, facet, el)) - 1d0)
                end do
-            end do               
+            end do
          end select
          sx = sx / (c%Xh%lx - 2)**2
          sy = sy / (c%Xh%lx - 2)**2
@@ -138,16 +138,16 @@ subroutine symmetry_init_msk(this, c)
     call this%bc_z%set_g(0.0_rp)
 
   end subroutine symmetry_init_msk
-  
+
   subroutine symmetry_free(this)
     type(symmetry_t), intent(inout) :: this
-    
+
     call this%bc_x%free()
     call this%bc_y%free()
     call this%bc_z%free()
 
   end subroutine symmetry_free
-  
+
   !> No-op scalar apply
   subroutine symmetry_apply_scalar(this, x, n, t, tstep)
     class(symmetry_t), intent(inout) :: this
@@ -171,7 +171,7 @@ subroutine symmetry_apply_vector(this, x, y, z, n, t, tstep)
     call this%bc_x%apply_scalar(x,n)
     call this%bc_y%apply_scalar(y,n)
     call this%bc_z%apply_scalar(z,n)
-    
+
   end subroutine symmetry_apply_vector
 
   !> No-op scalar apply (device version)
@@ -198,5 +198,5 @@ subroutine symmetry_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
                                       this%bc_z%msk(0))
 
   end subroutine symmetry_apply_vector_dev
-      
+
 end module symmetry
diff --git a/src/bc/usr_inflow.f90 b/src/bc/usr_inflow.f90
index 51c12a8fd7e..cf2b4be6edb 100644
--- a/src/bc/usr_inflow.f90
+++ b/src/bc/usr_inflow.f90
@@ -40,7 +40,7 @@ module usr_inflow
   use utils
   implicit none
   private
-  
+
   !> User defined dirichlet condition for inlet (vector valued)
   type, public, extends(inflow_t) :: usr_inflow_t
      type(coef_t), pointer :: c => null()
@@ -59,7 +59,7 @@ module usr_inflow
   end type usr_inflow_t
 
   abstract interface
-   
+
      !> Abstract interface defining a user defined inflow condition (pointwise)
      !! @param u The x componenet of the velocity in this point
      !! @param v The y componenet of the velocity in this point
@@ -100,14 +100,14 @@ end subroutine usr_inflow_eval
   public :: usr_inflow_eval
 
 contains
-     
+
   subroutine usr_inflow_free(this)
     type(usr_inflow_t), intent(inout) :: this
 
     if (c_associated(this%usr_x_d)) then
        call device_free(this%usr_x_d)
     end if
-    
+
     if (c_associated(this%usr_y_d)) then
        call device_free(this%usr_y_d)
     end if
@@ -115,10 +115,10 @@ subroutine usr_inflow_free(this)
     if (c_associated(this%usr_z_d)) then
        call device_free(this%usr_z_d)
     end if
-    
+
   end subroutine usr_inflow_free
-  
-  !> No-op scalar apply 
+
+  !> No-op scalar apply
   subroutine usr_inflow_apply_scalar(this, x, n, t, tstep)
     class(usr_inflow_t), intent(inout) :: this
     integer, intent(in) :: n
@@ -126,7 +126,7 @@ subroutine usr_inflow_apply_scalar(this, x, n, t, tstep)
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
   end subroutine usr_inflow_apply_scalar
-  
+
   !> No-op scalar apply (device version)
   subroutine usr_inflow_apply_scalar_dev(this, x_d, t, tstep)
     class(usr_inflow_t), intent(inout), target :: this
@@ -168,7 +168,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep)
          facet = this%facet(i)
          idx = nonlinear_index(k, lx, lx, lx)
          select case(facet)
-         case(1,2)          
+         case(1,2)
             call this%eval(x(k), y(k), z(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
@@ -182,7 +182,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep)
             call this%eval(x(k), y(k), z(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
-                 zc(idx(1), idx(2), idx(3), idx(4)), &       
+                 zc(idx(1), idx(2), idx(3), idx(4)), &
                  nx(idx(1), idx(3), facet, idx(4)), &
                  ny(idx(1), idx(3), facet, idx(4)), &
                  nz(idx(1), idx(3), facet, idx(4)), &
@@ -192,7 +192,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep)
             call this%eval(x(k), y(k), z(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
-                 zc(idx(1), idx(2), idx(3), idx(4)), &                     
+                 zc(idx(1), idx(2), idx(3), idx(4)), &
                  nx(idx(1), idx(2), facet, idx(4)), &
                  ny(idx(1), idx(2), facet, idx(4)), &
                  nz(idx(1), idx(2), facet, idx(4)), &
@@ -201,7 +201,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep)
          end select
       end do
     end associate
-    
+
   end subroutine usr_inflow_apply_vector
 
   subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
@@ -241,7 +241,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
       ! Pretabulate values during first call to apply
       if (.not. c_associated(usr_x_d)) then
          allocate(x(m), y(m), z(m)) ! Temp arrays
-         
+
          s = m*rp
 
          call device_alloc(usr_x_d, s)
@@ -256,7 +256,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
               facet = this%facet(i)
               idx = nonlinear_index(k, lx, lx, lx)
               select case(facet)
-              case(1,2)          
+              case(1,2)
                  call this%eval(x(i), y(i), z(i), &
                       xc(idx(1), idx(2), idx(3), idx(4)), &
                       yc(idx(1), idx(2), idx(3), idx(4)), &
@@ -270,7 +270,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
                  call this%eval(x(i), y(i), z(i), &
                       xc(idx(1), idx(2), idx(3), idx(4)), &
                       yc(idx(1), idx(2), idx(3), idx(4)), &
-                      zc(idx(1), idx(2), idx(3), idx(4)), &       
+                      zc(idx(1), idx(2), idx(3), idx(4)), &
                       nx(idx(1), idx(3), facet, idx(4)), &
                       ny(idx(1), idx(3), facet, idx(4)), &
                       nz(idx(1), idx(3), facet, idx(4)), &
@@ -280,7 +280,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
                  call this%eval(x(i), y(i), z(i), &
                       xc(idx(1), idx(2), idx(3), idx(4)), &
                       yc(idx(1), idx(2), idx(3), idx(4)), &
-                      zc(idx(1), idx(2), idx(3), idx(4)), &                     
+                      zc(idx(1), idx(2), idx(3), idx(4)), &
                       nx(idx(1), idx(2), facet, idx(4)), &
                       ny(idx(1), idx(2), facet, idx(4)), &
                       nz(idx(1), idx(2), facet, idx(4)), &
@@ -289,10 +289,9 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
               end select
            end do
          end associate
- 
         
-         call device_memcpy(x, usr_x_d, m, HOST_TO_DEVICE)
-         call device_memcpy(y, usr_y_d, m, HOST_TO_DEVICE)
+         call device_memcpy(x, usr_x_d, m, HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(y, usr_y_d, m, HOST_TO_DEVICE, sync=.false.)
          call device_memcpy(z, usr_z_d, m, HOST_TO_DEVICE, sync=.true.)
 
          deallocate(x, y, z)
@@ -300,11 +299,11 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
 
       call device_inhom_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, &
            usr_x_d, usr_y_d, usr_z_d, m)
-      
+
     end associate
 
   end subroutine usr_inflow_apply_vector_dev
-  
+
   !> Assign coefficients (facet normals etc)
   subroutine usr_inflow_set_coef(this, c)
     class(usr_inflow_t), intent(inout) :: this
@@ -325,10 +324,10 @@ subroutine usr_inflow_validate(this)
     class(usr_inflow_t), intent(inout) :: this
     logical :: valid
 
-    valid = .true. ! Assert it's going to be ok...    
+    valid = .true. ! Assert it's going to be ok...
     if (.not. associated(this%c)) then
        call neko_warning('Missing coefficients')
-       valid = .false.       
+       valid = .false.
     end if
 
     if (.not. associated(this%eval)) then
@@ -339,7 +338,7 @@ subroutine usr_inflow_validate(this)
     if (.not. valid) then
        call neko_error('Invalid user defined inflow condition')
     end if
-    
+
   end subroutine usr_inflow_validate
-  
+
 end module usr_inflow
diff --git a/src/bc/usr_scalar.f90 b/src/bc/usr_scalar.f90
index 4b4aa4e45ca..efb81be6727 100644
--- a/src/bc/usr_scalar.f90
+++ b/src/bc/usr_scalar.f90
@@ -40,7 +40,7 @@ module usr_scalar
   use utils
   implicit none
   private
-  
+
   !> User defined dirichlet condition for scalars
   type, public, extends(dirichlet_t) :: usr_scalar_t
      type(coef_t), pointer :: c => null()
@@ -97,16 +97,16 @@ end subroutine usr_scalar_bc_eval
   public :: usr_scalar_bc_eval
 
 contains
-     
+
   subroutine usr_inflow_free(this)
     type(usr_scalar_t), intent(inout) :: this
 
     if (c_associated(this%usr_x_d)) then
        call device_free(this%usr_x_d)
     end if
-        
+
   end subroutine usr_inflow_free
-  
+
   !> Scalar apply
   !! Just imitating inflow for now, but we should look this over
   !! Applies boundary conditions in eval on x
@@ -142,7 +142,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep)
          facet = this%facet(i)
          idx = nonlinear_index(k, lx, lx, lx)
          select case(facet)
-         case(1,2)          
+         case(1,2)
             call this%eval(x(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
@@ -156,7 +156,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep)
             call this%eval(x(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
-                 zc(idx(1), idx(2), idx(3), idx(4)), &       
+                 zc(idx(1), idx(2), idx(3), idx(4)), &
                  nx(idx(1), idx(3), facet, idx(4)), &
                  ny(idx(1), idx(3), facet, idx(4)), &
                  nz(idx(1), idx(3), facet, idx(4)), &
@@ -166,7 +166,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep)
             call this%eval(x(k), &
                  xc(idx(1), idx(2), idx(3), idx(4)), &
                  yc(idx(1), idx(2), idx(3), idx(4)), &
-                 zc(idx(1), idx(2), idx(3), idx(4)), &                     
+                 zc(idx(1), idx(2), idx(3), idx(4)), &
                  nx(idx(1), idx(2), facet, idx(4)), &
                  ny(idx(1), idx(2), facet, idx(4)), &
                  nz(idx(1), idx(2), facet, idx(4)), &
@@ -222,7 +222,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep)
             facet = this%facet(i)
             idx = nonlinear_index(k, lx, lx, lx)
             select case(facet)
-            case(1,2)          
+            case(1,2)
                call this%eval(x(i), &
                     xc(idx(1), idx(2), idx(3), idx(4)), &
                     yc(idx(1), idx(2), idx(3), idx(4)), &
@@ -236,7 +236,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep)
                call this%eval(x(i), &
                     xc(idx(1), idx(2), idx(3), idx(4)), &
                     yc(idx(1), idx(2), idx(3), idx(4)), &
-                    zc(idx(1), idx(2), idx(3), idx(4)), &       
+                    zc(idx(1), idx(2), idx(3), idx(4)), &
                     nx(idx(1), idx(3), facet, idx(4)), &
                     ny(idx(1), idx(3), facet, idx(4)), &
                     nz(idx(1), idx(3), facet, idx(4)), &
@@ -246,7 +246,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep)
                call this%eval(x(i), &
                     xc(idx(1), idx(2), idx(3), idx(4)), &
                     yc(idx(1), idx(2), idx(3), idx(4)), &
-                    zc(idx(1), idx(2), idx(3), idx(4)), &                     
+                    zc(idx(1), idx(2), idx(3), idx(4)), &
                     nx(idx(1), idx(2), facet, idx(4)), &
                     ny(idx(1), idx(2), facet, idx(4)), &
                     nz(idx(1), idx(2), facet, idx(4)), &
@@ -254,7 +254,6 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep)
                     t_, tstep_)
             end select
          end do
- 
         
          call device_memcpy(x, this%usr_x_d, m, HOST_TO_DEVICE, sync=.true.)
 
@@ -267,7 +266,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep)
 
 
   end subroutine usr_scalar_apply_scalar_dev
-  
+
   !> No-op vector apply
   subroutine usr_scalar_apply_vector(this, x, y, z, n, t, tstep)
     class(usr_scalar_t), intent(inout) :: this
@@ -277,7 +276,7 @@ subroutine usr_scalar_apply_vector(this, x, y, z, n, t, tstep)
     real(kind=rp), intent(inout),  dimension(n) :: z
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
-    integer :: i, m, k, idx(4), facet    
+    integer :: i, m, k, idx(4), facet
   end subroutine usr_scalar_apply_vector
 
   !> No-op vector apply (device version)
@@ -315,10 +314,10 @@ subroutine usr_scalar_validate(this)
     class(usr_scalar_t), intent(inout) :: this
     logical :: valid
 
-    valid = .true. ! Assert it's going to be ok...    
+    valid = .true. ! Assert it's going to be ok...
     if (.not. associated(this%c)) then
        call neko_warning('Missing coefficients')
-       valid = .false.       
+       valid = .false.
     end if
 
     if (.not. associated(this%eval)) then
@@ -329,7 +328,7 @@ subroutine usr_scalar_validate(this)
     if (.not. valid) then
        call neko_error('Invalid user defined scalar condition')
     end if
-    
+
   end subroutine usr_scalar_validate
-  
+
 end module usr_scalar
diff --git a/src/bc/wall.f90 b/src/bc/wall.f90
index 469350010e1..910dd3102c3 100644
--- a/src/bc/wall.f90
+++ b/src/bc/wall.f90
@@ -65,16 +65,16 @@ subroutine no_slip_wall_apply_scalar(this, x, n, t, tstep)
        k = this%msk(i)
        x(k) = 0d0
     end do
-    
+
   end subroutine no_slip_wall_apply_scalar
-  
+
   !> Boundary condition apply for a no-slip wall condition
   !! to vectors @a x, @a y and @a z
   subroutine no_slip_wall_apply_vector(this, x, y, z, n, t, tstep)
     class(no_slip_wall_t), intent(inout) :: this
     integer, intent(in) :: n
     real(kind=rp), intent(inout),  dimension(n) :: x
-    real(kind=rp), intent(inout),  dimension(n) :: y    
+    real(kind=rp), intent(inout),  dimension(n) :: y
     real(kind=rp), intent(inout),  dimension(n) :: z
     real(kind=rp), intent(in), optional :: t
     integer, intent(in), optional :: tstep
@@ -87,7 +87,7 @@ subroutine no_slip_wall_apply_vector(this, x, y, z, n, t, tstep)
        y(k) = 0d0
        z(k) = 0d0
     end do
-    
+
   end subroutine no_slip_wall_apply_vector
 
   !> Boundary condition apply for a no-slip wall condition
@@ -99,9 +99,9 @@ subroutine no_slip_wall_apply_scalar_dev(this, x_d, t, tstep)
     integer, intent(in), optional :: tstep
 
     call device_no_slip_wall_apply_scalar(this%msk_d, x_d, size(this%msk))
-    
+
   end subroutine no_slip_wall_apply_scalar_dev
-  
+
   !> Boundary condition apply for a no-slip wall condition
   !! to vectors @a x, @a y and @a z (device version)
   subroutine no_slip_wall_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
@@ -114,7 +114,7 @@ subroutine no_slip_wall_apply_vector_dev(this, x_d, y_d, z_d, t, tstep)
 
     call device_no_slip_wall_apply_vector(this%msk_d, x_d, y_d, z_d, &
                                           size(this%msk))
-    
+
   end subroutine no_slip_wall_apply_vector_dev
-  
+
 end module wall
diff --git a/src/case.f90 b/src/case.f90
index 65219a51a7c..5aa8a470cb4 100644
--- a/src/case.f90
+++ b/src/case.f90
@@ -44,7 +44,8 @@ module case
   use parmetis
   use redist
   use sampler
-  use flow_ic    
+  use flow_ic
+  use scalar_ic, only : set_scalar_ic
   use stats
   use file
   use utils
@@ -53,7 +54,7 @@ module case
   use time_scheme_controller, only : time_scheme_controller_t
   use logger
   use jobctrl
-  use user_intf  
+  use user_intf
   use scalar_pnpn ! todo directly load the pnpn? can we have other
   use json_module, only : json_file, json_core, json_value
   use json_utils, only : json_get, json_get_or_default
@@ -61,7 +62,7 @@ module case
   use point_zone_registry, only: neko_point_zone_registry
   use material_properties, only : material_properties_t
   implicit none
-  
+
   type :: case_t
      type(mesh_t) :: msh
      type(json_file) :: params
@@ -76,7 +77,7 @@ module case
      type(chkp_output_t) :: f_chkp
      type(mean_flow_output_t) :: f_mf
      type(mean_sqr_flow_output_t) :: f_msqrf
-     type(stats_t) :: q   
+     type(stats_t) :: q
      type(user_t) :: usr
      class(fluid_scheme_t), allocatable :: fluid
      type(scalar_pnpn_t), allocatable :: scalar 
@@ -84,7 +85,7 @@ module case
   end type case_t
 
   interface case_init
-     module procedure case_init_from_file
+     module procedure case_init_from_file, case_init_from_json
   end interface case_init
 
   private :: case_init_from_file, case_init_from_json, case_init_common
@@ -97,14 +98,15 @@ subroutine case_init_from_file(C, case_file)
     character(len=*), intent(in) :: case_file
     integer :: ierr, integer_val
     character(len=:), allocatable :: json_buffer
-   
+
     call neko_log%section('Case')
-    call neko_log%message('Reading case file ' // trim(case_file))
-    
+    call neko_log%message('Reading case file ' // trim(case_file), &
+                          NEKO_LOG_QUIET)
+
     if (pe_rank .eq. 0) then
-      call C%params%load_file(filename=trim(case_file))
-      call C%params%print_to_string(json_buffer)
-      integer_val = len(json_buffer)
+       call C%params%load_file(filename=trim(case_file))
+       call C%params%print_to_string(json_buffer)
+       integer_val = len(json_buffer)
     end if
 
     call MPI_Bcast(integer_val, 1, MPI_INTEGER, 0, NEKO_COMM, ierr)
@@ -115,7 +117,7 @@ subroutine case_init_from_file(C, case_file)
     deallocate(json_buffer)
 
     call case_init_common(C)
-    
+
   end subroutine case_init_from_file
 
   !> Initialize a case from a JSON object describing a case
@@ -124,12 +126,12 @@ subroutine case_init_from_json(C, case_json)
     type(json_file), intent(in) :: case_json
 
     call neko_log%section('Case')
-    call neko_log%message('Creating case from JSON object')
+    call neko_log%message('Creating case from JSON object', NEKO_LOG_QUIET)
 
     C%params = case_json
 
     call case_init_common(C)
-    
+
   end subroutine case_init_from_json
 
   !> Initialize a case from its (loaded) params object
@@ -145,21 +147,19 @@ subroutine case_init_common(C)
     real(kind=rp) :: real_val
     character(len=:), allocatable :: string_val
     real(kind=rp) :: stats_start_time, stats_output_val
-    integer ::  stats_sampling_interval 
+    integer ::  stats_sampling_interval
     integer :: output_dir_len
-    integer :: n_simcomps, i
-    type(json_core) :: core
-    type(json_value), pointer :: json_val1, json_val2 
-    type(json_file) :: json_subdict
+    integer :: n_simcomps
+    integer :: precision
 
     !
     ! Load mesh
     !
     call json_get(C%params, 'case.mesh_file', string_val)
     msh_file = file_t(string_val)
-    
+
     call msh_file%read(C%msh)
-    
+
     !
     ! Load Balancing
     !
@@ -170,19 +170,30 @@ subroutine case_init_common(C)
        call neko_log%section('Load Balancing')
        call parmetis_partmeshkway(C%msh, parts)
        call redist_mesh(C%msh, parts)
-       call neko_log%end_section()       
+       call neko_log%end_section()
     end if
 
     !
     ! Time step
     !
-    call json_get(C%params, 'case.timestep', C%dt)
+    call C%params%get('case.variable_timestep', logical_val, found)
+    if (.not. logical_val) then
+       call json_get(C%params, 'case.timestep', C%dt)
+    else
+       ! randomly set an initial dt to get cfl when dt is variable
+       C%dt = 1.0_rp
+    end if
 
     !
     ! End time
     !
     call json_get(C%params, 'case.end_time', C%end_time)
 
+    !
+    ! Initialize point_zones registry
+    !
+    call neko_point_zone_registry%init(C%params, C%msh)
+
     !
     ! Setup user defined functions
     !
@@ -194,7 +205,7 @@ subroutine case_init_common(C)
     ! Material properties
     !
     call C%material_properties%init(C%params, C%usr)
-    
+
     !
     ! Setup fluid scheme
     !
@@ -208,25 +219,20 @@ subroutine case_init_common(C)
     C%fluid%chkp%dtlag => C%dtlag
     select type(f => C%fluid)
     type is(fluid_pnpn_t)
-      f%chkp%abx1 => f%abx1
-      f%chkp%abx2 => f%abx2
-      f%chkp%aby1 => f%aby1
-      f%chkp%aby2 => f%aby2
-      f%chkp%abz1 => f%abz1
-      f%chkp%abz2 => f%abz2
+       f%chkp%abx1 => f%abx1
+       f%chkp%abx2 => f%abx2
+       f%chkp%aby1 => f%aby1
+       f%chkp%aby2 => f%aby2
+       f%chkp%abz1 => f%abz1
+       f%chkp%abz2 => f%abz2
     end select
 
-    
+
     !
     ! Setup scratch registry
     !
     neko_scratch_registry = scratch_registry_t(C%fluid%dm_Xh, 10, 10)
 
-    !
-    ! Initialize point_zones registry
-    !
-    call neko_point_zone_registry%init(C%params, C%fluid%u%dof)
-
     !
     ! Setup scalar scheme
     !
@@ -256,29 +262,15 @@ subroutine case_init_common(C)
           call C%fluid%set_usr_inflow(C%usr%fluid_user_if)
        end if
     end if
-    
-    ! Setup source term for the scalar
-    ! @todo should be expanded for user sources etc. Now copies the fluid one
-    if (scalar) then
-       logical_val = C%params%valid_path('case.scalar.source_term')
-       call json_get_or_default(C%params, 'case.scalar.source_term.type',&
-                                string_val, 'noforce')
-       if (trim(string_val) .eq. 'user') then
-          call C%scalar%set_source(trim(string_val), &
-               usr_f=C%usr%scalar_user_f)
-       else if (trim(string_val) .eq. 'user_vector') then
-          call C%scalar%set_source(trim(string_val), &
-               usr_f_vec=C%usr%scalar_user_f_vector)
-       else
-          call C%scalar%set_source(trim(string_val))
-       end if
 
+    ! Setup user boundary conditions for the scalar.
+    if (scalar) then
        call C%scalar%set_user_bc(C%usr%scalar_user_bc)
     end if
 
     !
     ! Setup initial conditions
-    ! 
+    !
     call json_get(C%params, 'case.fluid.initial_condition.type',&
                   string_val)
     if (trim(string_val) .ne. 'user') then
@@ -289,6 +281,17 @@ subroutine case_init_common(C)
             C%fluid%c_Xh, C%fluid%gs_Xh, C%usr%fluid_user_ic, C%params)
     end if
 
+    if (scalar) then
+       call json_get(C%params, 'case.scalar.initial_condition.type', string_val)
+       if (trim(string_val) .ne. 'user') then
+          call set_scalar_ic(C%scalar%s, &
+            C%scalar%c_Xh, C%scalar%gs_Xh, string_val, C%params)
+       else
+          call set_scalar_ic(C%scalar%s, &
+            C%scalar%c_Xh, C%scalar%gs_Xh, C%usr%scalar_user_ic, C%params)
+       end if
+    end if
+
     ! Add initial conditions to BDF scheme (if present)
     select type(f => C%fluid)
     type is(fluid_pnpn_t)
@@ -297,7 +300,6 @@ subroutine case_init_common(C)
        call f%wlag%set(f%w)
     end select
 
-
     !
     ! Validate that the case is properly setup for time-stepping
     !
@@ -329,10 +331,10 @@ subroutine case_init_common(C)
           end if
        end if
     end if
-    
+
     !
     ! Save boundary markings for fluid (if requested)
-    ! 
+    !
     call json_get_or_default(C%params, 'case.output_boundary',&
                              logical_val, .false.)
     if (logical_val) then
@@ -353,14 +355,28 @@ subroutine case_init_common(C)
        call mesh_field_free(msh_part)
     end if
 
+    !
+    ! Setup output precision of the field files
+    !
+    call json_get_or_default(C%params, 'case.output_precision', string_val,&
+         'single')
+
+    if (trim(string_val) .eq. 'double') then
+       precision = dp
+    else
+       precision = sp
+    end if
+
     !
     ! Setup sampler
     !
     call C%s%init(C%end_time)
     if (scalar) then
-       C%f_out = fluid_output_t(C%fluid, C%scalar, path=trim(output_directory))
+       C%f_out = fluid_output_t(precision, C%fluid, C%scalar, &
+            path=trim(output_directory))
     else
-       C%f_out = fluid_output_t(C%fluid, path=trim(output_directory))
+       C%f_out = fluid_output_t(precision, C%fluid, &
+            path=trim(output_directory))
     end if
 
     call json_get_or_default(C%params, 'case.fluid.output_control',&
@@ -372,11 +388,13 @@ subroutine case_init_common(C)
        call C%s%add(C%f_out, real_val, 'nsamples')
     else if (trim(string_val) .eq. 'never') then
        ! Fix a dummy 0.0 output_value
+       call json_get_or_default(C%params, 'case.fluid.output_value', real_val, &
+                                0.0_rp)
        call C%s%add(C%f_out, 0.0_rp, string_val)
-    else 
+    else
        call json_get(C%params, 'case.fluid.output_value', real_val)
        call C%s%add(C%f_out, real_val, string_val)
-    end if   
+    end if
 
     !
     ! Save checkpoints (if nothing specified, default to saving at end of sim)
@@ -385,15 +403,17 @@ subroutine case_init_common(C)
                              logical_val, .true.)
     if (logical_val) then
        C%f_chkp = chkp_output_t(C%fluid%chkp, path=output_directory)
-       call json_get_or_default(C%params, 'case.checkpoint_control', string_val,"simulationtime")
-       call json_get_or_default(C%params, 'case.checkpoint_value', real_val,1e10_rp)
+       call json_get_or_default(C%params, 'case.checkpoint_control', &
+            string_val, "simulationtime")
+       call json_get_or_default(C%params, 'case.checkpoint_value', real_val,&
+            1e10_rp)
        call C%s%add(C%f_chkp, real_val, string_val)
     end if
 
     !
     ! Setup statistics
     !
-    
+
     ! Always init, so that we can call eval in simulation.f90 with no if.
     ! Note, don't use json_get_or_default here, because that will break the
     ! valid_path if statement below (the path will become valid always).
@@ -424,7 +444,7 @@ subroutine case_init_common(C)
                         string_val)
           call json_get(C%params, 'case.statistics.output_value', &
                         stats_output_val)
-       
+
           call C%s%add(C%f_mf, stats_output_val, string_val)
           call C%q%add(C%fluid%stats)
 
@@ -458,10 +478,10 @@ subroutine case_init_common(C)
     end if
 
     call neko_log%end_section()
-    
+
   end subroutine case_init_common
-  
-  !> Deallocate a case 
+
+  !> Deallocate a case
   subroutine case_free(C)
     type(case_t), intent(inout) :: C
 
@@ -480,7 +500,7 @@ subroutine case_free(C)
     call C%s%free()
 
     call C%q%free()
-    
+
   end subroutine case_free
-  
+
 end module case
diff --git a/src/comm/mpi_types.f90 b/src/comm/mpi_types.f90
index 2b95f57531c..102cba2c412 100644
--- a/src/comm/mpi_types.f90
+++ b/src/comm/mpi_types.f90
@@ -135,7 +135,7 @@ subroutine mpi_type_nmsh_hex_init
     call MPI_Get_address(nmsh_hex%v(7)%v_xyz, disp(15), ierr)
     call MPI_Get_address(nmsh_hex%v(8)%v_idx, disp(16), ierr)
     call MPI_Get_address(nmsh_hex%v(8)%v_xyz, disp(17), ierr)
-    
+
 
     base = disp(1)
     do i = 1, 17
@@ -150,7 +150,7 @@ subroutine mpi_type_nmsh_hex_init
     type(2:16:2) = MPI_INTEGER
     type(3:17:2) = MPI_DOUBLE_PRECISION
     call MPI_Type_create_struct(17, len, disp, type, MPI_NMSH_HEX, ierr)
-    call MPI_Type_commit(MPI_NMSH_HEX, ierr)    
+    call MPI_Type_commit(MPI_NMSH_HEX, ierr)
   end subroutine mpi_type_nmsh_hex_init
 
   !> Define a MPI derived type for a 2d nmsh quad
@@ -169,7 +169,7 @@ subroutine mpi_type_nmsh_quad_init
     call MPI_Get_address(nmsh_quad%v(3)%v_xyz, disp(7), ierr)
     call MPI_Get_address(nmsh_quad%v(4)%v_idx, disp(8), ierr)
     call MPI_Get_address(nmsh_quad%v(4)%v_xyz, disp(9), ierr)
-    
+
 
     base = disp(1)
     do i = 1, 9
@@ -184,7 +184,7 @@ subroutine mpi_type_nmsh_quad_init
     type(2:8:2) = MPI_INTEGER
     type(3:9:2) = MPI_DOUBLE_PRECISION
     call MPI_Type_create_struct(9, len, disp, type, MPI_NMSH_QUAD, ierr)
-    call MPI_Type_commit(MPI_NMSH_QUAD, ierr)    
+    call MPI_Type_commit(MPI_NMSH_QUAD, ierr)
   end subroutine mpi_type_nmsh_quad_init
 
   !> Define a MPI derived type for a nmsh zone
@@ -213,9 +213,9 @@ subroutine mpi_type_nmsh_zone_init
 
     call MPI_Type_create_struct(6, len, disp, type, MPI_NMSH_ZONE, ierr)
     call MPI_Type_commit(MPI_NMSH_ZONE, ierr)
-    
+
   end subroutine mpi_type_nmsh_zone_init
- 
+
   !> Define a MPI derived type for a nmsh curved element
   subroutine mpi_type_nmsh_curve_init
     type(nmsh_curve_el_t) :: nmsh_curve_el
@@ -241,22 +241,22 @@ subroutine mpi_type_nmsh_curve_init
 
     call MPI_Type_create_struct(3, len, disp, type, MPI_NMSH_CURVE, ierr)
     call MPI_Type_commit(MPI_NMSH_CURVE, ierr)
-    
+
   end subroutine mpi_type_nmsh_curve_init
-  
- 
+
+
   !> Define a MPI derived type for a 3d re2 data
   subroutine mpi_type_re2_xyz_init
     type(re2v1_xyz_t) :: re2v1_data
     type(re2v2_xyz_t) :: re2v2_data
     type(MPI_Datatype) :: type(4)
-    integer(kind=MPI_ADDRESS_KIND) :: disp(4), base    
+    integer(kind=MPI_ADDRESS_KIND) :: disp(4), base
     integer :: len(4), ierr, i
 
     !
     ! Setup version 1
     !
-    
+
     call MPI_Get_address(re2v1_data%rgroup, disp(1), ierr)
     call MPI_Get_address(re2v1_data%x, disp(2), ierr)
     call MPI_Get_address(re2v1_data%y, disp(3), ierr)
@@ -277,7 +277,7 @@ subroutine mpi_type_re2_xyz_init
     !
     ! Setup version 2
     !
-    
+
     call MPI_Get_address(re2v2_data%rgroup, disp(1), ierr)
     call MPI_Get_address(re2v2_data%x, disp(2), ierr)
     call MPI_Get_address(re2v2_data%y, disp(3), ierr)
@@ -302,7 +302,7 @@ subroutine mpi_type_re2_xy_init
     type(re2v1_xy_t) :: re2v1_data
     type(re2v2_xy_t) :: re2v2_data
     type(MPI_Datatype) :: type(3)
-    integer(kind=MPI_ADDRESS_KIND) :: disp(3), base    
+    integer(kind=MPI_ADDRESS_KIND) :: disp(3), base
     integer :: len(3), ierr, i
 
     !
@@ -324,7 +324,7 @@ subroutine mpi_type_re2_xy_init
 
     call MPI_Type_create_struct(3, len, disp, type, MPI_RE2V1_DATA_XY, ierr)
     call MPI_Type_commit(MPI_RE2V1_DATA_XY, ierr)
-    
+
     !
     ! Setup version 2
     !
@@ -358,7 +358,7 @@ subroutine mpi_type_re2_cv_init
     !
     ! Setup version 1
     !
-    
+
     call MPI_Get_address(re2v1_data%elem, disp(1), ierr)
     call MPI_Get_address(re2v1_data%zone, disp(2), ierr)
     call MPI_Get_address(re2v1_data%point, disp(3), ierr)
@@ -382,7 +382,7 @@ subroutine mpi_type_re2_cv_init
     !
     ! Setup version 2
     !
-    
+
     call MPI_Get_address(re2v2_data%elem, disp(1), ierr)
     call MPI_Get_address(re2v2_data%zone, disp(2), ierr)
     call MPI_Get_address(re2v2_data%point, disp(3), ierr)
@@ -402,9 +402,9 @@ subroutine mpi_type_re2_cv_init
 
     call MPI_Type_create_struct(4, len, disp, type, MPI_RE2V2_DATA_CV, ierr)
     call MPI_Type_commit(MPI_RE2V2_DATA_CV, ierr)
-    
+
   end subroutine mpi_type_re2_cv_init
-  
+
   !> Define a MPI derived type for re2 bc data
   subroutine mpi_type_re2_bc_init
     type(re2v1_bc_t) :: re2v1_data
@@ -460,7 +460,7 @@ subroutine mpi_type_re2_bc_init
 
     call MPI_Type_create_struct(4, len, disp, type, MPI_RE2V2_DATA_BC, ierr)
     call MPI_Type_commit(MPI_RE2V2_DATA_BC, ierr)
-    
+
   end subroutine mpi_type_re2_bc_init
 
   !> Define a MPI dervied type for a STL header
@@ -486,7 +486,7 @@ subroutine mpi_type_stl_header_init
 
     call MPI_Type_create_struct(2, len, disp, type, MPI_STL_HEADER, ierr)
     call MPI_Type_commit(MPI_STL_HEADER, ierr)
-      
+
   end subroutine mpi_type_stl_header_init
 
   !> Define a MPI derived type for a STL triangle
@@ -515,7 +515,7 @@ subroutine mpi_type_stl_triangle_init
 
     call MPI_Type_create_struct(5, len, disp, type, MPI_STL_TRIANGLE, ierr)
     call MPI_Type_commit(MPI_STL_TRIANGLE, ierr)
-        
+
   end subroutine mpi_type_stl_triangle_init
 
   !> Deallocate all derived MPI types
@@ -548,13 +548,13 @@ subroutine mpi_type_nmsh_zone_free
     integer ierr
     call MPI_Type_free(MPI_NMSH_ZONE, ierr)
   end subroutine mpi_type_nmsh_zone_free
-  
+
   !> Deallocate nmsh curve derived MPI type
   subroutine mpi_type_nmsh_curve_free
     integer ierr
     call MPI_Type_free(MPI_NMSH_CURVE, ierr)
   end subroutine mpi_type_nmsh_curve_free
-  
+
   !> Deallocate re2 xyz derived MPI type
   subroutine mpi_type_re2_xyz_free
     integer ierr
@@ -575,7 +575,7 @@ subroutine mpi_type_re2_cv_free
     call MPI_Type_free(MPI_RE2V1_DATA_CV, ierr)
     call MPI_Type_free(MPI_RE2V2_DATA_CV, ierr)
   end subroutine mpi_type_re2_cv_free
-  
+
   !> Deallocate re2 bc derived MPI type
   subroutine mpi_type_re2_bc_free
     integer ierr
@@ -588,7 +588,7 @@ subroutine mpi_type_stl_header_free
     integer ierr
     call MPI_Type_free(MPI_STL_HEADER, ierr)
   end subroutine mpi_type_stl_header_free
-  
+
   !> Deallocate STL triangle derived MPI type
   subroutine mpi_type_stl_triangle_free
     integer ierr
diff --git a/src/comm/parmetis.F90 b/src/comm/parmetis.F90
index 96c5e745309..cf742dca5f1 100644
--- a/src/comm/parmetis.F90
+++ b/src/comm/parmetis.F90
@@ -36,7 +36,7 @@ module parmetis
   use point
   use utils
   use num_types
-  use mesh_field 
+  use mesh_field
   use mesh
   use, intrinsic :: iso_c_binding
   implicit none
@@ -60,7 +60,7 @@ integer (c_int) function parmetis_v3_partmeshkway &
        type(c_ptr), value :: tpwgts, ubvec
      end function parmetis_v3_partmeshkway
   end interface
-  
+
   interface
      integer (c_int) function parmetis_v3_partgeom &
           (vtxdist, ndims, xyz, part) &
@@ -101,12 +101,12 @@ end function parmetis_v3_partgeom
 #define neko_idx(i) (i)
 #endif
 #endif
-  
+
 contains
 
 #ifdef HAVE_PARMETIS
 
-  !> Compute a k-way partitioning of a mesh @a msh 
+  !> Compute a k-way partitioning of a mesh @a msh
   subroutine parmetis_partmeshkway(msh, parts, weights, nprts)
     type(mesh_t), intent(inout) :: msh                !< Mesh
     type(mesh_fld_t), intent(inout) :: parts          !< Partitions
@@ -132,13 +132,13 @@ subroutine parmetis_partmeshkway(msh, parts, weights, nprts)
 
     if (present(nprts)) then
        nparts = nprts
-    else          
+    else
        nparts = pe_size
     end if
-    
+
     allocate(elmdist(0:pe_size), eptr(0:msh%nelv))
     allocate(eind(0:(msh%nelv * msh%npts)), part(msh%nelv))
-    allocate(elmwgt(msh%nelv), tpwgts(ncon * nparts), ubvec(ncon)) 
+    allocate(elmwgt(msh%nelv), tpwgts(ncon * nparts), ubvec(ncon))
 
     call parmetis_dist(elmdist, msh%nelv)
 
@@ -171,11 +171,11 @@ subroutine parmetis_partmeshkway(msh, parts, weights, nprts)
     else
        call neko_error(rcode)
     end if
-    
+
     deallocate(elmdist, eptr, eind, part, elmwgt, tpwgts, ubvec)
 
   end subroutine parmetis_partmeshkway
-  
+
   !> Compute a k-way partitioning of a mesh @a msh using
   !! a coordinated-based space-filing curves method
   subroutine parmetis_partgeom(msh, parts)
@@ -193,7 +193,7 @@ subroutine parmetis_partgeom(msh, parts)
     allocate(vtxdist(0:pe_size))
 
     call parmetis_dist(vtxdist, msh%nelv)
-    
+
     i = 1
     do j = 1, msh%nelv
        c = msh%elements(j)%e%centroid()
@@ -202,18 +202,18 @@ subroutine parmetis_partgeom(msh, parts)
        xyz(i + 2) = parmetis_real(c%x(3))
        i = i + 3
     end do
-    
+
     rcode = parmetis_v3_partgeom(c_loc(vtxdist), c_loc(ndims), &
          c_loc(xyz), c_loc(part))
-    
+
     if (rcode .eq. METIS_OK) then
        call parmetis_mark_parts(parts, msh, part)
     else
        call neko_error(rcode)
     end if
 
-    deallocate(part, xyz, vtxdist)    
-    
+    deallocate(part, xyz, vtxdist)
+
   end subroutine parmetis_partgeom
 
   !> Fill mesh field according to new partitions
@@ -228,7 +228,7 @@ subroutine parmetis_mark_parts(parts, msh, part)
     do i = 1, msh%nelv
        parts%data(i) = neko_idx(part(i))
     end do
-    
+
   end subroutine parmetis_mark_parts
 
   !> Setup weights and balance constraints for the dual graph
@@ -240,7 +240,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight)
     integer, intent(in) :: nparts, ncon
     type(mesh_fld_t), intent(in), optional :: weight
     integer :: i
-    
+
     if (present(weight)) then
        do i = 1, msh%nelv
           wgt(i) = parmetis_idx(weight%data(i))
@@ -248,7 +248,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight)
     else
        wgt = parmetis_idx(1)
     end if
-    
+
     do i = 1, (ncon * nparts)
        tpwgts(i) = parmetis_real(1) / parmetis_real(nparts)
     end do
@@ -258,7 +258,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight)
     end do
 
   end subroutine parmetis_wgt
-  
+
   !> Compute the (parallel) vertex distribution of the dual graph
   subroutine parmetis_dist(dist, nelv)
     integer(kind=M_INT), intent(inout) :: dist(0:pe_size)
@@ -283,7 +283,7 @@ end subroutine parmetis_dist
 
 #else
 
-  !> Compute a k-way partitioning of a mesh @a msh 
+  !> Compute a k-way partitioning of a mesh @a msh
   subroutine parmetis_partmeshkway(msh, parts, weights, nprts)
     type(mesh_t), intent(inout) :: msh                !< Mesh
     type(mesh_fld_t), intent(inout) :: parts          !< Partitions
@@ -299,7 +299,7 @@ subroutine parmetis_partgeom(msh, parts)
     type(mesh_fld_t), intent(inout) :: parts !< Partitions
     call neko_error('NEKO needs to be built with ParMETIS support')
   end subroutine parmetis_partgeom
-  
+
 #endif
-  
+
 end module parmetis
diff --git a/src/comm/redist.f90 b/src/comm/redist.f90
index 6eecd11ea22..a9fa8d25a2b 100644
--- a/src/comm/redist.f90
+++ b/src/comm/redist.f90
@@ -34,7 +34,7 @@
 module redist
   use mesh_field
   use neko_mpi_types
-  use mpi_f08    
+  use mpi_f08
   use htable
   use point
   use stack
@@ -48,7 +48,7 @@ module redist
   private
 
   public :: redist_mesh
-  
+
 contains
 
   !> Redistribute a mesh @a msh according to new partitions
@@ -80,10 +80,10 @@ subroutine redist_mesh(msh, parts)
     ! Reset possible periodic ids
     !
     call msh%reset_periodic_ids()
-    
+
     !
     ! Extract new zone distributions
-    !     
+    !
 
     allocate(new_zone_dist(0:pe_size - 1))
     do i = 0, pe_size - 1
@@ -111,11 +111,11 @@ subroutine redist_mesh(msh, parts)
     end do
 
     call redist_curve(msh, msh%curve, parts, new_curve_dist)
-    
+
     !
     ! Extract new mesh distribution
     !
-    
+
     allocate(new_mesh_dist(0:(pe_size - 1)))
     do i = 0, pe_size - 1
        call new_mesh_dist(i)%init()
@@ -127,12 +127,12 @@ subroutine redist_mesh(msh, parts)
        do j = 1, 8
           el%v(j)%v_idx = ep%pts(j)%p%id()
           el%v(j)%v_xyz = ep%pts(j)%p%x
-       end do       
+       end do
        call new_mesh_dist(parts%data(i))%push(el)
     end do
 
-    
-    gdim = msh%gdim    
+
+    gdim = msh%gdim
     call msh%free()
 
     max_recv = 0
@@ -141,13 +141,13 @@ subroutine redist_mesh(msh, parts)
        max_recv(2) = max(max_recv(2), new_zone_dist(i)%size())
        max_recv(3) = max(max_recv(3), new_curve_dist(i)%size())
     end do
-    
+
     call MPI_Allreduce(MPI_IN_PLACE, max_recv, 3, MPI_INTEGER, &
          MPI_MAX, NEKO_COMM, ierr)
     allocate(recv_buf_msh(max_recv(1)))
     allocate(recv_buf_zone(max_recv(2)))
     allocate(recv_buf_curve(max_recv(3)))
-    
+
     do i = 1, pe_size - 1
        src = modulo(pe_rank - i + pe_size, pe_size)
        dst = modulo(pe_rank + i, pe_size)
@@ -189,9 +189,9 @@ subroutine redist_mesh(msh, parts)
 
        do j = 1, recv_size
           call new_curve_dist(pe_rank)%push(recv_buf_curve(j))
-      end do              
+       end do
     end do
-    
+
     deallocate(recv_buf_msh)
     deallocate(recv_buf_zone)
     deallocate(recv_buf_curve)
@@ -223,12 +223,12 @@ subroutine redist_mesh(msh, parts)
           end do
           call msh%add_element(i, &
                p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8))
-          
+
           if (el_map%get(np(i)%el_idx, tmp) .gt. 0) then
              ! Old glb to new local
              tmp = i
              call el_map%set(np(i)%el_idx, tmp)
-             
+
              ! Old glb to new glb
              tmp = msh%elements(i)%e%id()
              call glb_map%set(np(i)%el_idx,  tmp)
@@ -244,7 +244,7 @@ subroutine redist_mesh(msh, parts)
     ! Figure out new mesh distribution (necessary for periodic zones)
     !
     call pe_lst%init()
-    
+
     ! We should use the %array() procedure, which works great for
     ! GNU, Intel and NEC, but it breaks horribly on Cray when using
     ! certain data types
@@ -256,13 +256,13 @@ subroutine redist_mesh(msh, parts)
           end if
        end do
     end select
-    
+
     max_recv_idx = 2 * pe_lst%size()
     call MPI_Allreduce(MPI_IN_PLACE, max_recv_idx, 1, MPI_INTEGER, &
          MPI_MAX, NEKO_COMM, ierr)
     allocate(recv_buf_idx(max_recv_idx))
     allocate(send_buf_idx(max_recv_idx))
-    
+
     do i = 1, pe_size - 1
        src = modulo(pe_rank - i + pe_size, pe_size)
        dst = modulo(pe_rank + i, pe_size)
@@ -295,15 +295,15 @@ subroutine redist_mesh(msh, parts)
 
        do j = 1, recv_size, 2
           call glb_map%set(recv_buf_idx(j),  recv_buf_idx(j+1))
-       end do              
+       end do
     end do
     deallocate(recv_buf_idx)
     deallocate(send_buf_idx)
     call pe_lst%free()
-    
+
     !
     ! Add zone data for new mesh distribution
-    !     
+    !
     zp => new_zone_dist(pe_rank)%array()
     do i = 1, new_zone_dist(pe_rank)%size()
        if (el_map%get(zp(i)%e, new_el_idx) .gt. 0) then
@@ -322,7 +322,7 @@ subroutine redist_mesh(msh, parts)
           if (glb_map%get(zp(i)%p_e, new_pel_idx) .gt. 0) then
              call neko_error('Missing periodic element after redistribution')
           end if
-          
+
           call msh%mark_periodic_facet(zp(i)%f, new_el_idx, &
                zp(i)%p_f, new_pel_idx, zp(i)%glb_pt_ids)
        case(7)
@@ -338,12 +338,12 @@ subroutine redist_mesh(msh, parts)
           if (glb_map%get(zp(i)%p_e, new_pel_idx) .gt. 0) then
              call neko_error('Missing periodic element after redistribution')
           end if
-          
+
           call msh%apply_periodic_facet(zp(i)%f, new_el_idx, &
                zp(i)%p_f, new_pel_idx, zp(i)%glb_pt_ids)
        end select
     end do
- 
+
     call new_zone_dist(pe_rank)%free()
 
     !
@@ -359,7 +359,7 @@ subroutine redist_mesh(msh, parts)
     call new_curve_dist(pe_rank)%free()
 
     call msh%finalize()
-    
+
   end subroutine redist_mesh
 
   !> Fill redistribution list for zone data
@@ -378,7 +378,7 @@ subroutine redist_zone(msh, z, type, parts, new_dist, label)
     else
        lbl = 0
     end if
-    
+
     select type(zp => z)
     type is (facet_zone_periodic_t)
        do i = 1, zp%size
@@ -387,8 +387,8 @@ subroutine redist_zone(msh, z, type, parts, new_dist, label)
           nmsh_zone%f = zp%facet_el(i)%x(1)
           nmsh_zone%p_e = zp%p_facet_el(i)%x(2)
           nmsh_zone%p_f = zp%p_facet_el(i)%x(1)
-          nmsh_zone%glb_pt_ids = zp%p_ids(i)%x          
-          nmsh_zone%type = type          
+          nmsh_zone%glb_pt_ids = zp%p_ids(i)%x
+          nmsh_zone%type = type
           call new_dist(parts%data(zone_el))%push(nmsh_zone)
        end do
     type is (facet_zone_t)
@@ -419,7 +419,7 @@ subroutine redist_curve(msh, c, parts, new_dist)
        nmsh_curve%type = c%curve_el(i)%curve_type
        call new_dist(parts%data(curve_el))%push(nmsh_curve)
     end do
-    
+
   end subroutine redist_curve
 
 end module redist
diff --git a/src/common/bcknd/cpu/rhs_maker_cpu.f90 b/src/common/bcknd/cpu/rhs_maker_cpu.f90
index 7e766e07103..1998f4e70c7 100644
--- a/src/common/bcknd/cpu/rhs_maker_cpu.f90
+++ b/src/common/bcknd/cpu/rhs_maker_cpu.f90
@@ -6,7 +6,7 @@ module rhs_maker_cpu
   use scratch_registry
   implicit none
   private
-  
+
   type, public, extends(rhs_maker_sumab_t) :: rhs_maker_sumab_cpu_t
    contains
      procedure, nopass :: compute_fluid => rhs_maker_sumab_cpu
@@ -23,7 +23,7 @@ module rhs_maker_cpu
      procedure, nopass :: compute_fluid => rhs_maker_bdf_cpu
      procedure, nopass :: compute_scalar => scalar_rhs_maker_bdf_cpu
   end type rhs_maker_bdf_cpu_t
-  
+
 contains
 
   subroutine rhs_maker_sumab_cpu(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
@@ -36,20 +36,20 @@ subroutine rhs_maker_sumab_cpu(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab
 
     n = uu%dof%size()
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        u%x(i,1,1,1) = ab(1) * uu%x(i,1,1,1) + ab(2) * uulag%lf(1)%x(i,1,1,1)
        v%x(i,1,1,1) = ab(1) * vv%x(i,1,1,1) + ab(2) * vvlag%lf(1)%x(i,1,1,1)
        w%x(i,1,1,1) = ab(1) * ww%x(i,1,1,1) + ab(2) * wwlag%lf(1)%x(i,1,1,1)
     end do
 
     if (nab .eq. 3) then
-       do i = 1, n
+       do concurrent (i = 1:n)
           u%x(i,1,1,1) = u%x(i,1,1,1) + ab(3) * uulag%lf(2)%x(i,1,1,1)
           v%x(i,1,1,1) = v%x(i,1,1,1) + ab(3) * vvlag%lf(2)%x(i,1,1,1)
           w%x(i,1,1,1) = w%x(i,1,1,1) + ab(3) * wwlag%lf(2)%x(i,1,1,1)
        end do
     end if
-    
+
   end subroutine rhs_maker_sumab_cpu
 
   subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
@@ -63,12 +63,12 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
     integer :: i
     type(field_t), pointer :: temp1, temp2, temp3
     integer :: temp_indices(3)
-    
+
     call neko_scratch_registry%request_field(temp1, temp_indices(1))
     call neko_scratch_registry%request_field(temp2, temp_indices(2))
     call neko_scratch_registry%request_field(temp3, temp_indices(3))
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        temp1%x(i,1,1,1) = ext_coeffs(2) * fx_lag%x(i,1,1,1) + &
                           ext_coeffs(3) * fx_laglag%x(i,1,1,1)
        temp2%x(i,1,1,1) = ext_coeffs(2) * fy_lag%x(i,1,1,1) + &
@@ -77,7 +77,7 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
                           ext_coeffs(3) * fz_laglag%x(i,1,1,1)
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        fx_laglag%x(i,1,1,1) = fx_lag%x(i,1,1,1)
        fy_laglag%x(i,1,1,1) = fy_lag%x(i,1,1,1)
        fz_laglag%x(i,1,1,1) = fz_lag%x(i,1,1,1)
@@ -86,47 +86,50 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, &
        fz_lag%x(i,1,1,1) = fz(i)
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        fx(i) = (ext_coeffs(1) * fx(i) + temp1%x(i,1,1,1)) * rho
        fy(i) = (ext_coeffs(1) * fy(i) + temp2%x(i,1,1,1)) * rho
        fz(i) = (ext_coeffs(1) * fz(i) + temp3%x(i,1,1,1)) * rho
     end do
-    
+
     call neko_scratch_registry%relinquish_field(temp_indices)
-    
+
   end subroutine rhs_maker_ext_cpu
 
-  subroutine scalar_rhs_maker_ext_cpu(temp1, fs_lag, fs_laglag, fs, rho, ext_coeffs, &
-                                n)
-    type(field_t), intent(inout) :: temp1
+  subroutine scalar_rhs_maker_ext_cpu(fs_lag, fs_laglag, fs, rho, ext_coeffs, n)
     type(field_t), intent(inout) :: fs_lag
     type(field_t), intent(inout) :: fs_laglag
     real(kind=rp), intent(inout) :: rho, ext_coeffs(4)
     integer, intent(in) :: n
     real(kind=rp), intent(inout) :: fs(n)
     integer :: i
+    type(field_t), pointer :: temp1
+    integer :: temp_index
 
-    do i = 1, n
+    call neko_scratch_registry%request_field(temp1, temp_index)
+
+    do concurrent (i = 1:n)
        temp1%x(i,1,1,1) = ext_coeffs(2) * fs_lag%x(i,1,1,1) + &
                           ext_coeffs(3) * fs_laglag%x(i,1,1,1)
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        fs_laglag%x(i,1,1,1) = fs_lag%x(i,1,1,1)
        fs_lag%x(i,1,1,1) = fs(i)
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        fs(i) = (ext_coeffs(1) * fs(i) + temp1%x(i,1,1,1)) * rho
     end do
-    
+
+    call neko_scratch_registry%relinquish_field(temp_index)
   end subroutine scalar_rhs_maker_ext_cpu
 
   subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, &
-                               u, v, w, B, rho, dt, bd, nbd, n)    
+                               u, v, w, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
     type(field_t), intent(in) :: u, v, w
-    type(field_series_t), intent(in) :: ulag, vlag, wlag        
+    type(field_series_t), intent(in) :: ulag, vlag, wlag
     real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(4)
@@ -142,27 +145,27 @@ subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, &
     call neko_scratch_registry%request_field(tb2, temp_indices(5))
     call neko_scratch_registry%request_field(tb3, temp_indices(6))
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        tb1%x(i,1,1,1) = u%x(i,1,1,1) * B(i) * bd(2)
        tb2%x(i,1,1,1) = v%x(i,1,1,1) * B(i) * bd(2)
        tb3%x(i,1,1,1) = w%x(i,1,1,1) * B(i) * bd(2)
     end do
 
     do ilag = 2, nbd
-       do i = 1, n
+       do concurrent (i = 1:n)
           ta1%x(i,1,1,1) = ulag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
           ta2%x(i,1,1,1) = vlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
           ta3%x(i,1,1,1) = wlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
        end do
 
-       do i = 1, n
+       do concurrent (i = 1:n)
           tb1%x(i,1,1,1) = tb1%x(i,1,1,1) + ta1%x(i,1,1,1)
           tb2%x(i,1,1,1) = tb2%x(i,1,1,1) + ta2%x(i,1,1,1)
           tb3%x(i,1,1,1) = tb3%x(i,1,1,1) + ta3%x(i,1,1,1)
        end do
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        bfx(i) = bfx(i) + tb1%x(i,1,1,1) * (rho / dt)
        bfy(i) = bfy(i) + tb2%x(i,1,1,1) * (rho / dt)
        bfz(i) = bfz(i) + tb3%x(i,1,1,1) * (rho / dt)
@@ -172,35 +175,39 @@ subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, &
 
   end subroutine rhs_maker_bdf_cpu
 
-  subroutine scalar_rhs_maker_bdf_cpu(temp1, temp2, s_lag, fs, s, B, rho, dt, &
-       bd, nbd, n)    
+  subroutine scalar_rhs_maker_bdf_cpu(s_lag, fs, s, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
-    type(field_t), intent(inout) :: temp1, temp2
     type(field_t), intent(in) :: s
     type(field_series_t), intent(in) :: s_lag
     real(kind=rp), intent(inout) :: fs(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(4)
     integer :: i, ilag
+    type(field_t), pointer :: temp1, temp2
+    integer :: temp_indices(2)
+
+    call neko_scratch_registry%request_field(temp1, temp_indices(1))
+    call neko_scratch_registry%request_field(temp2, temp_indices(2))
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        temp2%x(i,1,1,1) = s%x(i,1,1,1) * B(i) * bd(2)
     end do
 
     do ilag = 2, nbd
-       do i = 1, n
+       do concurrent (i = 1:n)
           temp1%x(i,1,1,1) = s_lag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1)
        end do
 
-       do i = 1, n
+       do concurrent (i = 1:n)
           temp2%x(i,1,1,1) = temp2%x(i,1,1,1) + temp1%x(i,1,1,1)
        end do
     end do
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        fs(i) = fs(i) + temp2%x(i,1,1,1) * (rho / dt)
     end do
 
+    call neko_scratch_registry%relinquish_field(temp_indices)
   end subroutine scalar_rhs_maker_bdf_cpu
 
 end module rhs_maker_cpu
diff --git a/src/common/bcknd/device/cuda/makebdf_kernel.h b/src/common/bcknd/device/cuda/makebdf_kernel.h
index 32ebf958a28..8c420ee7f88 100644
--- a/src/common/bcknd/device/cuda/makebdf_kernel.h
+++ b/src/common/bcknd/device/cuda/makebdf_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_MAKEBDF_KERNEL__
+#define __COMMON_MAKEBDF_KERNEL__
+
 template< typename T >
 __global__ void makebdf_kernel(const T * __restrict__ ulag1,
                                const T * __restrict__ ulag2,
@@ -115,3 +118,5 @@ __global__ void scalar_makebdf_kernel(const T * __restrict__ s_lag,
   }
   
 }
+
+#endif // __COMMON_MAKEBDF_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/cuda/makeext_kernel.h b/src/common/bcknd/device/cuda/makeext_kernel.h
index 69967a19f62..155d2790961 100644
--- a/src/common/bcknd/device/cuda/makeext_kernel.h
+++ b/src/common/bcknd/device/cuda/makeext_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_MAKEEXT_KERNEL__
+#define __COMMON_MAKEEXT_KERNEL__
+
 template< typename T >
 __global__ void makeext_kernel(T * __restrict__ abx1,
                                T * __restrict__ aby1,
@@ -93,3 +96,5 @@ __global__ void scalar_makeext_kernel(T * __restrict__ fs_lag,
   } 
   
 }
+
+#endif // __COMMON_MAKEEXT_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/cuda/projection_kernel.h b/src/common/bcknd/device/cuda/projection_kernel.h
index f50838d6f04..a59fb7b3236 100644
--- a/src/common/bcknd/device/cuda/projection_kernel.h
+++ b/src/common/bcknd/device/cuda/projection_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_PROJECTION_KERNEL__
+#define __COMMON_PROJECTION_KERNEL__
+
 /**
  * Project on vector operations
  */
@@ -90,3 +93,4 @@ __global__ void project_ortho_vec_kernel(T * __restrict__ x,
 }
 
 
+#endif // __COMMON_PROJECTION_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/cuda/sumab_kernel.h b/src/common/bcknd/device/cuda/sumab_kernel.h
index 6681b2bd55e..da49b9e6320 100644
--- a/src/common/bcknd/device/cuda/sumab_kernel.h
+++ b/src/common/bcknd/device/cuda/sumab_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_SUMAB_KERNEL__
+#define __COMMON_SUMAB_KERNEL__
+
 template< typename T >
 __global__ void sumab_kernel(T * __restrict__ u,
                              T * __restrict__ v,
@@ -70,3 +73,4 @@ __global__ void sumab_kernel(T * __restrict__ u,
   
 }
 
+#endif // __COMMON_SUMAB_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/device_projection.F90 b/src/common/bcknd/device/device_projection.F90
index a31a5e84ecf..4b253e8bd13 100644
--- a/src/common/bcknd/device/device_projection.F90
+++ b/src/common/bcknd/device/device_projection.F90
@@ -120,5 +120,5 @@ subroutine device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, &
     call neko_error('No device backend configured')
 #endif
   end subroutine device_project_ortho
-  
+
 end module device_projection
diff --git a/src/common/bcknd/device/hip/makebdf_kernel.h b/src/common/bcknd/device/hip/makebdf_kernel.h
index d1204825e20..8ef70a6bfdb 100644
--- a/src/common/bcknd/device/hip/makebdf_kernel.h
+++ b/src/common/bcknd/device/hip/makebdf_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_MAKEBDF_KERNEL__
+#define __COMMON_MAKEBDF_KERNEL__
+
 template< typename T >
 __global__ void makebdf_kernel(const T * __restrict__ ulag1,
                                const T * __restrict__ ulag2,
@@ -116,3 +119,5 @@ __global__ void scalar_makebdf_kernel(const T * __restrict__ s_lag,
   }
 
 }
+
+#endif //__COMMON_MAKEBDF_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/hip/makeext_kernel.h b/src/common/bcknd/device/hip/makeext_kernel.h
index 3534c34b8e8..4327d9f2bca 100644
--- a/src/common/bcknd/device/hip/makeext_kernel.h
+++ b/src/common/bcknd/device/hip/makeext_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_MAKEEXT_KERNEL__
+#define __COMMON_MAKEEXT_KERNEL__
+
 template< typename T >
 __global__ void makeext_kernel(T * __restrict__ abx1,
                                T * __restrict__ aby1,
@@ -93,3 +96,5 @@ __global__ void scalar_makeext_kernel(T * __restrict__ fs_lag,
   } 
   
 }
+
+#endif // __COMMON_MAKEEXT_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/hip/projection_kernel.h b/src/common/bcknd/device/hip/projection_kernel.h
index 67d96b099ef..0dc225d2a36 100644
--- a/src/common/bcknd/device/hip/projection_kernel.h
+++ b/src/common/bcknd/device/hip/projection_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_PROJECTION_KERNEL__
+#define __COMMON_PROJECTION_KERNEL__
+
 /**
  * Project on vector operations
  */
@@ -88,3 +91,5 @@ __global__ void project_ortho_vec_kernel(T * __restrict__ x,
   }
   
 }
+
+#endif // __COMMON_PROJECTION_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/hip/sumab_kernel.h b/src/common/bcknd/device/hip/sumab_kernel.h
index 6681b2bd55e..da49b9e6320 100644
--- a/src/common/bcknd/device/hip/sumab_kernel.h
+++ b/src/common/bcknd/device/hip/sumab_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_SUMAB_KERNEL__
+#define __COMMON_SUMAB_KERNEL__
+
 template< typename T >
 __global__ void sumab_kernel(T * __restrict__ u,
                              T * __restrict__ v,
@@ -70,3 +73,4 @@ __global__ void sumab_kernel(T * __restrict__ u,
   
 }
 
+#endif // __COMMON_SUMAB_KERNEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/opencl/rhs_maker_kernel.cl b/src/common/bcknd/device/opencl/rhs_maker_kernel.cl
index 771531d08fb..5efc6b56028 100644
--- a/src/common/bcknd/device/opencl/rhs_maker_kernel.cl
+++ b/src/common/bcknd/device/opencl/rhs_maker_kernel.cl
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __COMMON_RHS_MAKER_KENEL__
+#define __COMMON_RHS_MAKER_KENEL__
+
 __kernel void sumab_kernel(__global real * __restrict__ u,
                            __global real * __restrict__ v,
                            __global real * __restrict__ w,
@@ -212,3 +215,4 @@ __kernel void scalar_makebdf_kernel(__global const real * __restrict__ s_lag,
 }
 
 
+#endif // __COMMON_RHS_MAKER_KENEL__
\ No newline at end of file
diff --git a/src/common/bcknd/device/rhs_maker_device.F90 b/src/common/bcknd/device/rhs_maker_device.F90
index 89713916462..f06bedcf020 100644
--- a/src/common/bcknd/device/rhs_maker_device.F90
+++ b/src/common/bcknd/device/rhs_maker_device.F90
@@ -59,7 +59,7 @@ module rhs_maker_device
   end type rhs_maker_bdf_device_t
 
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine rhs_maker_sumab_hip(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
           uulag1, uulag2, vvlag1, vvlag2, wwlag1, wwlag2, ab1, ab2, ab3, nab, n)&
           bind(c, name='rhs_maker_sumab_hip')
@@ -80,7 +80,7 @@ subroutine rhs_maker_ext_hip(abx1_d, aby1_d, abz1_d, &
                                   bind(c, name='rhs_maker_ext_hip')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -94,7 +94,7 @@ subroutine scalar_rhs_maker_ext_hip(fs_lag_d, fs_laglag_d, fs_d, rho, &
                                           bind(c, name='scalar_rhs_maker_ext_hip')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d 
+       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d
        real(c_rp) :: rho, ext1, ext2, ext3
        integer(c_int) :: n
      end subroutine scalar_rhs_maker_ext_hip
@@ -112,7 +112,7 @@ subroutine rhs_maker_bdf_hip(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_hip
-  end interface 
+  end interface
 
   interface
      subroutine scalar_rhs_maker_bdf_hip(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
@@ -125,7 +125,7 @@ subroutine scalar_rhs_maker_bdf_hip(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine scalar_rhs_maker_bdf_hip
-  end interface  
+  end interface
 #elif HAVE_CUDA
   interface
      subroutine rhs_maker_sumab_cuda(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
@@ -148,7 +148,7 @@ subroutine rhs_maker_ext_cuda(abx1_d, aby1_d, abz1_d, &
                                    bind(c, name='rhs_maker_ext_cuda')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -162,7 +162,7 @@ subroutine scalar_rhs_maker_ext_cuda(fs_lag_d, fs_laglag_d, fs_d, rho, &
                                           bind(c, name='scalar_rhs_maker_ext_cuda')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d 
+       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d
        real(c_rp) :: rho, ext1, ext2, ext3
        integer(c_int) :: n
      end subroutine scalar_rhs_maker_ext_cuda
@@ -180,8 +180,8 @@ subroutine rhs_maker_bdf_cuda(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_cuda
-  end interface  
-  
+  end interface
+
   interface
      subroutine scalar_rhs_maker_bdf_cuda(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
           rho, dt, bd2, bd3, bd4, nbd, n) &
@@ -193,7 +193,7 @@ subroutine scalar_rhs_maker_bdf_cuda(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine scalar_rhs_maker_bdf_cuda
-  end interface  
+  end interface
 #elif HAVE_OPENCL
   interface
      subroutine rhs_maker_sumab_opencl(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
@@ -216,7 +216,7 @@ subroutine rhs_maker_ext_opencl(abx1_d, aby1_d, abz1_d, &
                                      bind(c, name='rhs_maker_ext_opencl')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -230,7 +230,7 @@ subroutine scalar_rhs_maker_ext_opencl(fs_lag_d, fs_laglag_d, fs_d, rho, &
                                           bind(c, name='scalar_rhs_maker_ext_opencl')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d 
+       type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d
        real(c_rp) :: rho, ext1, ext2, ext3
        integer(c_int) :: n
      end subroutine scalar_rhs_maker_ext_opencl
@@ -248,7 +248,7 @@ subroutine rhs_maker_bdf_opencl(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_opencl
-  end interface 
+  end interface
 
   interface
      subroutine scalar_rhs_maker_bdf_opencl(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
@@ -261,12 +261,12 @@ subroutine scalar_rhs_maker_bdf_opencl(s_lag_d, s_laglag_d, fs_d, s_d, B_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine scalar_rhs_maker_bdf_opencl
-  end interface  
+  end interface
 #endif
 
 contains
 
-    subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
+  subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
     type(field_t), intent(inout) :: u,v, w
     type(field_t), intent(inout) :: uu, vv, ww
     type(field_series_t), intent(inout) :: uulag, vvlag, wwlag
@@ -278,7 +278,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab,
          uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, &
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
          uu%dof%size())
-#elif HAVE_CUDA    
+#elif HAVE_CUDA
     call rhs_maker_sumab_cuda(u%x_d, v%x_d, w%x_d, uu%x_d, vv%x_d, ww%x_d, &
          uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, &
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
@@ -289,7 +289,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab,
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
          uu%dof%size())
 #endif
-    
+
   end subroutine rhs_maker_sumab_device
 
   subroutine rhs_maker_ext_device(fx_lag, fy_lag, fz_lag, &
@@ -322,12 +322,11 @@ subroutine rhs_maker_ext_device(fx_lag, fy_lag, fz_lag, &
                               fx_d, fy_d, fz_d, rho, &
                               ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n)
 #endif
-    
+
   end subroutine rhs_maker_ext_device
 
-  subroutine scalar_rhs_maker_ext_device(temp1, fs_lag, fs_laglag, fs, &
+  subroutine scalar_rhs_maker_ext_device(fs_lag, fs_laglag, fs, &
                            rho, ext_coeffs, n)
-    type(field_t), intent(inout) :: temp1
     type(field_t), intent(inout) :: fs_lag
     type(field_t), intent(inout) :: fs_laglag
     real(kind=rp), intent(inout) :: rho, ext_coeffs(4)
@@ -347,14 +346,14 @@ subroutine scalar_rhs_maker_ext_device(temp1, fs_lag, fs_laglag, fs, &
     call scalar_rhs_maker_ext_opencl(fs_lag%x_d, fs_laglag%x_d, fs_d, rho, &
                               ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n)
 #endif
-    
+
   end subroutine scalar_rhs_maker_ext_device
 
   subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, &
-                               u, v, w, B, rho, dt, bd, nbd, n)    
+                               u, v, w, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
     type(field_t), intent(in) :: u, v, w
-    type(field_series_t), intent(in) :: ulag, vlag, wlag        
+    type(field_series_t), intent(in) :: ulag, vlag, wlag
     real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(4)
@@ -364,7 +363,7 @@ subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, &
     bfy_d = device_get_ptr(bfy)
     bfz_d = device_get_ptr(bfz)
     B_d = device_get_ptr(B)
-    
+
 #ifdef HAVE_HIP
     call rhs_maker_bdf_hip(ulag%lf(1)%x_d, ulag%lf(2)%x_d, &
                            vlag%lf(1)%x_d, vlag%lf(2)%x_d, &
@@ -387,12 +386,10 @@ subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, &
 
   end subroutine rhs_maker_bdf_device
 
-  subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, &
-                                         bd, nbd, n)    
+  subroutine scalar_rhs_maker_bdf_device(s_lag, fs, s, B, rho, dt, &
+                                         bd, nbd, n)
     integer, intent(in) :: n, nbd
-    type(field_t), intent(inout) :: temp1
-    type(field_t), intent(in) :: s 
-    type(field_t), intent(inout) :: temp2
+    type(field_t), intent(in) :: s
     type(field_series_t), intent(in) :: s_lag
     real(kind=rp), intent(inout) :: fs(n)
     real(kind=rp), intent(in) :: B(n)
@@ -401,7 +398,7 @@ subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, &
 
     fs_d = device_get_ptr(fs)
     B_d = device_get_ptr(B)
-    
+
 #ifdef HAVE_HIP
     call scalar_rhs_maker_bdf_hip(s_lag%lf(1)%x_d, s_lag%lf(2)%x_d, &
                            fs_d, s%x_d, B_d, rho, dt, bd(2), bd(3), bd(4), &
@@ -417,5 +414,5 @@ subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, &
 #endif
 
   end subroutine scalar_rhs_maker_bdf_device
-  
+
 end module rhs_maker_device
diff --git a/src/common/bcknd/sx/rhs_maker_sx.f90 b/src/common/bcknd/sx/rhs_maker_sx.f90
index 7bfa7eedc13..abe5357e805 100644
--- a/src/common/bcknd/sx/rhs_maker_sx.f90
+++ b/src/common/bcknd/sx/rhs_maker_sx.f90
@@ -6,7 +6,7 @@ module rhs_maker_sx
   use scratch_registry, only : neko_scratch_registry
   implicit none
   private
-  
+
   type, public, extends(rhs_maker_sumab_t) :: rhs_maker_sumab_sx_t
    contains
      procedure, nopass :: compute_fluid => rhs_maker_sumab_sx
@@ -23,7 +23,7 @@ module rhs_maker_sx
      procedure, nopass :: compute_fluid => rhs_maker_bdf_sx
      procedure, nopass :: compute_scalar => scalar_rhs_maker_bdf_sx
   end type rhs_maker_bdf_sx_t
-  
+
 contains
 
   subroutine rhs_maker_sumab_sx(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
@@ -49,7 +49,7 @@ subroutine rhs_maker_sumab_sx(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
           w%x(i,1,1,1) = w%x(i,1,1,1) + ab(3) * wwlag%lf(2)%x(i,1,1,1)
        end do
     end if
-    
+
   end subroutine rhs_maker_sumab_sx
 
   subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, &
@@ -63,7 +63,7 @@ subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, &
     integer :: i
     type(field_t), pointer :: temp1, temp2, temp3
     integer :: temp_indices(3)
-    
+
     call neko_scratch_registry%request_field(temp1, temp_indices(1))
     call neko_scratch_registry%request_field(temp2, temp_indices(2))
     call neko_scratch_registry%request_field(temp3, temp_indices(3))
@@ -93,18 +93,20 @@ subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, &
     end do
 
     call neko_scratch_registry%relinquish_field(temp_indices)
-    
+
   end subroutine rhs_maker_ext_sx
 
-  subroutine scalar_rhs_maker_ext_sx(temp1, fs_lag, fs_laglag, fs, rho, &
-       ext_coeffs, n)
-    type(field_t), intent(inout) :: temp1
+  subroutine scalar_rhs_maker_ext_sx(fs_lag, fs_laglag, fs, rho, ext_coeffs, n)
     type(field_t), intent(inout) :: fs_lag
     type(field_t), intent(inout) :: fs_laglag
     real(kind=rp), intent(inout) :: rho, ext_coeffs(4)
     integer, intent(in) :: n
     real(kind=rp), intent(inout) :: fs(n)
     integer :: i
+    type(field_t), pointer :: temp1
+    integer :: temp_index
+
+    call neko_scratch_registry%request_field(temp1, temp_index)
 
     do i = 1, n
        temp1%x(i,1,1,1) = ext_coeffs(2) * fs_lag%x(i,1,1,1) + &
@@ -119,14 +121,15 @@ subroutine scalar_rhs_maker_ext_sx(temp1, fs_lag, fs_laglag, fs, rho, &
     do i = 1, n
        fs(i) = (ext_coeffs(1) * fs(i) + temp1%x(i,1,1,1)) * rho
     end do
-    
+
+    call neko_scratch_registry%relinquish_field(temp_index)
   end subroutine scalar_rhs_maker_ext_sx
 
   subroutine rhs_maker_bdf_sx(ulag, vlag, wlag, bfx, bfy, bfz, &
-                              u, v, w, B, rho, dt, bd, nbd, n)    
+                              u, v, w, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
     type(field_t), intent(in) :: u, v, w
-    type(field_series_t), intent(in) :: ulag, vlag, wlag        
+    type(field_series_t), intent(in) :: ulag, vlag, wlag
     real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(4)
@@ -172,16 +175,19 @@ subroutine rhs_maker_bdf_sx(ulag, vlag, wlag, bfx, bfy, bfz, &
 
   end subroutine rhs_maker_bdf_sx
 
-  subroutine scalar_rhs_maker_bdf_sx(temp1, temp2, s_lag, fs, s, B, rho, dt, &
-       bd, nbd, n)
+  subroutine scalar_rhs_maker_bdf_sx(s_lag, fs, s, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
-    type(field_t), intent(inout) :: temp1, temp2
     type(field_t), intent(in) :: s
     type(field_series_t), intent(in) :: s_lag
     real(kind=rp), intent(inout) :: fs(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(4)
     integer :: i, ilag
+    type(field_t), pointer :: temp1, temp2
+    integer :: temp_indices(2)
+
+    call neko_scratch_registry%request_field(temp1, temp_indices(1))
+    call neko_scratch_registry%request_field(temp2, temp_indices(2))
 
     do i = 1, n
        temp2%x(i,1,1,1) = s%x(i,1,1,1) * B(i) * bd(2)
@@ -201,6 +207,7 @@ subroutine scalar_rhs_maker_bdf_sx(temp1, temp2, s_lag, fs, s, B, rho, dt, &
        fs(i) = fs(i) + temp2%x(i,1,1,1) * (rho / dt)
     end do
 
+    call neko_scratch_registry%relinquish_field(temp_indices)
   end subroutine scalar_rhs_maker_bdf_sx
 
 end module rhs_maker_sx
diff --git a/src/common/checkpoint.f90 b/src/common/checkpoint.f90
index cec1a8a4811..d769080cbb2 100644
--- a/src/common/checkpoint.f90
+++ b/src/common/checkpoint.f90
@@ -69,6 +69,7 @@ module checkpoint
      type(field_t), pointer :: abz1 => null()
      type(field_t), pointer :: abz2 => null()
 
+
      type(field_t), pointer :: s => null()
      type(field_series_t), pointer :: slag => null()
 
@@ -110,14 +111,14 @@ subroutine chkp_init(this, u, v, w, p)
     if ( u%msh%nelv .ne. p%msh%nelv ) then
        call neko_error('Velocity and pressure defined on different meshes')
     end if
-    
+
     this%u => u
     this%v => v
     this%w => w
     this%p => p
 
     this%t = 0d0
-    
+
   end subroutine chkp_init
 
   !> Reset checkpoint
@@ -132,7 +133,7 @@ subroutine chkp_free(this)
     nullify(this%ulag)
     nullify(this%vlag)
     nullify(this%wlag)
-    
+
   end subroutine chkp_free
 
   !> Synchronize checkpoint with device
@@ -146,57 +147,57 @@ subroutine chkp_sync_host(this)
 
          if (associated(this%u) .and. associated(this%v) .and. &
               associated(this%w) .and. associated(this%p)) then
-            call device_memcpy(u%x, u%x_d, u%dof%size(), DEVICE_TO_HOST)
-            call device_memcpy(v%x, v%x_d, v%dof%size(), DEVICE_TO_HOST)
-            call device_memcpy(w%x, w%x_d, w%dof%size(), DEVICE_TO_HOST)
-            call device_memcpy(p%x, p%x_d, p%dof%size(), DEVICE_TO_HOST)
+            call device_memcpy(u%x, u%x_d, u%dof%size(), DEVICE_TO_HOST, sync=.false.)
+            call device_memcpy(v%x, v%x_d, v%dof%size(), DEVICE_TO_HOST, sync=.false.)
+            call device_memcpy(w%x, w%x_d, w%dof%size(), DEVICE_TO_HOST, sync=.false.)
+            call device_memcpy(p%x, p%x_d, p%dof%size(), DEVICE_TO_HOST, sync=.false.)
          end if
-         
+
          if (associated(this%ulag) .and. associated(this%vlag) .and. &
               associated(this%wlag)) then
             call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, &
-                               u%dof%size(), DEVICE_TO_HOST)
+                               u%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, &
-                               u%dof%size(), DEVICE_TO_HOST)
-  
+                               u%dof%size(), DEVICE_TO_HOST, sync=.false.)
+
             call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, &
-                               v%dof%size(), DEVICE_TO_HOST)
+                               v%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, &
-                               v%dof%size(), DEVICE_TO_HOST)
-    
+                               v%dof%size(), DEVICE_TO_HOST, sync=.false.)
+
             call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abx1%x, this%abx1%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abx2%x, this%abx2%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%aby1%x, this%aby1%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%aby2%x, this%aby2%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abz1%x, this%abz1%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abz2%x, this%abz2%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
          end if
          if (associated(this%s)) then
             call device_memcpy(this%s%x, this%s%x_d, &
-                               this%s%dof%size(), DEVICE_TO_HOST)
+                               this%s%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, &
-                               this%s%dof%size(), DEVICE_TO_HOST)
+                               this%s%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, &
-                               this%s%dof%size(), DEVICE_TO_HOST)
+                               this%s%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abs1%x, this%abs1%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
             call device_memcpy(this%abs2%x, this%abs2%x_d, &
-                               w%dof%size(), DEVICE_TO_HOST)
+                               w%dof%size(), DEVICE_TO_HOST, sync=.false.)
          end if
        end associate
-    call device_sync()
+       call device_sync(glb_cmd_queue)
     end if
-         
+
   end subroutine chkp_sync_host
 
   !> Synchronize device with checkpoint
@@ -210,41 +211,54 @@ subroutine chkp_sync_device(this)
 
          if (associated(this%u) .and. associated(this%v) .and. &
               associated(this%w)) then
-            call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE)
+            call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(p%x, p%x_d, p%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
-         
+
          if (associated(this%ulag) .and. associated(this%vlag) .and. &
               associated(this%wlag)) then
-            call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, &
-                               u%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, &
-                               u%dof%size(), HOST_TO_DEVICE)
-  
-            call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, &
-                               v%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, &
-                               v%dof%size(), HOST_TO_DEVICE)
-    
-            call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, &
-                               w%dof%size(), HOST_TO_DEVICE)
-            call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, &
-                               w%dof%size(), HOST_TO_DEVICE)
+            call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, u%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, u%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+
+            call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, v%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, v%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+
+            call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, w%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, w%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
          if (associated(this%s)) then
-            call device_memcpy(this%s%x, this%s%x_d, &
-                               this%s%dof%size(), HOST_TO_DEVICE)
+            call device_memcpy(this%s%x, this%s%x_d, this%s%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
+
+            call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, &
+                               this%s%dof%size(), HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, &
+                               this%s%dof%size(), HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(this%abs1%x, this%abs1%x_d, &
+                               w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(this%abs2%x, this%abs2%x_d, &
+                               w%dof%size(), HOST_TO_DEVICE, sync=.false.)
          end if
        end associate
     end if
-         
+
   end subroutine chkp_sync_device
 
   !> Add lagged velocity terms
   subroutine chkp_add_lag(this, ulag, vlag, wlag)
-    class(chkp_t), intent(inout) :: this    
+    class(chkp_t), intent(inout) :: this
     type(field_series_t), target :: ulag
     type(field_series_t), target :: vlag
     type(field_series_t), target :: wlag
@@ -252,16 +266,16 @@ subroutine chkp_add_lag(this, ulag, vlag, wlag)
     this%ulag => ulag
     this%vlag => vlag
     this%wlag => wlag
-    
+
   end subroutine chkp_add_lag
 
   !> Add scalars
   subroutine chkp_add_scalar(this, s)
-    class(chkp_t), intent(inout) :: this    
+    class(chkp_t), intent(inout) :: this
     type(field_t), target :: s
 
     this%s => s
-    
+
   end subroutine chkp_add_scalar
 
 
@@ -272,5 +286,5 @@ pure function chkp_restart_time(this) result(rtime)
 
     rtime = this%t
   end function chkp_restart_time
-  
+
 end module checkpoint
diff --git a/src/common/cpuid.c b/src/common/cpuid.c
index 9d1ac211f7d..5036d9a786a 100644
--- a/src/common/cpuid.c
+++ b/src/common/cpuid.c
@@ -35,6 +35,7 @@ void system_cpuid(char *name, int len) {
   /* Generic ARM unless we found something known */
   strncpy(name, "ARM", len);
   int cpufj = 0;
+  int cpuarm = 0;
 #endif
   while (fgets (buf, MAXLEN, fp)) {
 #if defined(_ARCH_PPC64)
@@ -52,6 +53,10 @@ void system_cpuid(char *name, int len) {
         cpufj = 1;
         continue;
       }
+      else if(strstr(token, "0x41")) {
+	cpuarm = 1;
+	continue;
+      }
     }
 
     if (strstr(buf, "CPU part") && cpufj) {
@@ -60,7 +65,16 @@ void system_cpuid(char *name, int len) {
       if (strstr(token, "0x001")) {
         strncpy(name, "A64FX", len);
         break;
-      }
+      }      
+    }
+
+    if (strstr(buf, "CPU part") && cpuarm) {
+      char *token = strtok (buf, delim);
+      token = strtok (NULL, delim);
+      if (strstr(token, "0xd4f")) {
+        strncpy(name, "ARM Neoverse V2", len);
+        break;
+      }      
     }   
 #endif
   }
diff --git a/src/common/craypat.F90 b/src/common/craypat.F90
index dac30814671..8f3dba40a27 100644
--- a/src/common/craypat.F90
+++ b/src/common/craypat.F90
@@ -4,7 +4,7 @@ module craypat
   use stack, only : stack_i4_t
   use utils, only : neko_error
   implicit none
-  
+
   type(stack_i4_t), private :: region_depth
   logical, private :: craypat_on = .false.
 #ifdef CRAYPAT
@@ -31,7 +31,7 @@ end subroutine craypat_record_stop
   subroutine craypat_region_begin(name)
     character(kind=c_char,len=*) :: name
     integer :: ierr, region_id
-    
+
     if (craypat_on) then
        !> @todo Don't hardcode region names...
        if (name .eq. 'Time-Step') then
@@ -43,13 +43,13 @@ subroutine craypat_region_begin(name)
        else if (name .eq. 'gather-scatter') then
           region_id = 4
        else ! User defined region
-          region_id = 99 
+          region_id = 99
        end if
-       
+
        call region_depth%push(region_id)
        call PAT_region_begin(region_id, name, ierr)
     end if
-    
+
   end subroutine craypat_region_begin
 
   !> End a CrayPat region
@@ -62,9 +62,9 @@ subroutine craypat_region_end
        end if
        call PAT_region_end(region_depth%pop(), ierr)
     end if
-    
+
   end subroutine craypat_region_end
 
 #endif
- 
+
 end module craypat
diff --git a/src/common/datadist.f90 b/src/common/datadist.f90
index 83d2f771590..27ba56e2e19 100644
--- a/src/common/datadist.f90
+++ b/src/common/datadist.f90
@@ -40,7 +40,7 @@ module datadist
      type(MPI_Comm) :: comm !< Communicator on which the dist. is defined
      integer :: pe_rank     !< Pe's rank in the given distribution
      integer :: pe_size     !< Size of communicator in the given dist.
-     integer :: L               
+     integer :: L
      integer :: R
      integer :: M !< Total, global, size
      integer :: Ip !< Number of local values on this process
@@ -48,7 +48,7 @@ module datadist
 
   !> Load-balanced linear distribution \f$ M = PL + R \f$
   type, extends(dist_t) :: linear_dist_t
-   contains 
+   contains
      procedure :: num_local => linear_dist_Ip
      procedure :: num_global => linear_dist_M
      procedure :: start_idx => linear_dist_start
@@ -62,7 +62,7 @@ module datadist
   public :: linear_dist_t
 
 contains
-  
+
   function linear_dist_init(n, rank, size, comm) result(this)
     integer, intent(in) :: n    !< Total size
     integer :: rank             !< PE's rank to define the dist. over
@@ -74,11 +74,11 @@ function linear_dist_init(n, rank, size, comm) result(this)
     this%comm = comm
     this%pe_rank = rank
     this%pe_size = size
-    
+
     this%L = floor(dble(this%M) / dble(this%pe_size))
     this%R = modulo(this%M, this%pe_size)
     this%Ip = floor((dble(this%M) + dble(this%pe_size) - &
-         dble(this%pe_rank) - 1d0) / dble(this%pe_size))    
+         dble(this%pe_rank) - 1d0) / dble(this%pe_size))
   end function linear_dist_init
 
   pure function linear_dist_Ip(this) result(n)
diff --git a/src/common/distdata.f90 b/src/common/distdata.f90
index 5e58f5aaa43..2311a7d1315 100644
--- a/src/common/distdata.f90
+++ b/src/common/distdata.f90
@@ -40,14 +40,14 @@ module distdata
 
   type, public :: distdata_t
      type(stack_i4t2_t) :: shared_el_facet !< Elemenets with shared facets
-     
+
      type(uset_i4_t) :: shared_facet    !< List of shared facets
      type(uset_i4_t) :: shared_edge     !< List of shared edges
      type(uset_i4_t) :: shared_point    !< List of shared points
-     
+
      integer, allocatable :: local_to_global_facet(:)!< Local to global (facets)
      integer, allocatable :: local_to_global_edge(:) !< Local to global (edges)
-     
+
   end type distdata_t
 
   public :: distdata_init, distdata_free, distdata_set_shared_el_facet, &
@@ -62,13 +62,13 @@ subroutine distdata_init(ddata)
     type(distdata_t), intent(inout) :: ddata
 
     call distdata_free(ddata)
-    
+
     call ddata%shared_el_facet%init()
 
     call ddata%shared_facet%init()
-    call ddata%shared_edge%init()   
+    call ddata%shared_edge%init()
     call ddata%shared_point%init()
-    
+
   end subroutine distdata_init
 
   !> Free a distdata type
@@ -76,7 +76,7 @@ subroutine distdata_free(ddata)
     type(distdata_t), intent(inout) :: ddata
 
     call ddata%shared_el_facet%free()
-    
+
     call ddata%shared_facet%free()
     call ddata%shared_edge%free()
     call ddata%shared_point%free()
@@ -84,11 +84,11 @@ subroutine distdata_free(ddata)
     if (allocated(ddata%local_to_global_facet)) then
        deallocate(ddata%local_to_global_facet)
     end if
-    
+
     if (allocated(ddata%local_to_global_edge)) then
        deallocate(ddata%local_to_global_edge)
     end if
-    
+
   end subroutine distdata_free
 
   !> Mark an element's facet as shared
@@ -100,7 +100,7 @@ subroutine distdata_set_shared_el_facet(ddata, element, side)
 
     t%x = (/ element, side /)
     call ddata%shared_el_facet%push(t)
-    
+
   end subroutine distdata_set_shared_el_facet
 
   !> Mark a facet as shared
@@ -109,17 +109,17 @@ subroutine distdata_set_shared_facet(ddata, facet)
     integer, value :: facet     !< Facet index (local numbering)
 
     call ddata%shared_facet%add(facet)
-    
+
   end subroutine distdata_set_shared_facet
-  
+
   !> Mark an element's edge as shared
   !! @attention only defined for elements where facet .ne. edges
   subroutine distdata_set_shared_edge(ddata, edge)
     type(distdata_t), intent(inout) :: ddata
-    integer, value :: edge      !< Edge index (local numbering) 
+    integer, value :: edge      !< Edge index (local numbering)
 
     call ddata%shared_edge%add(edge)
-    
+
   end subroutine distdata_set_shared_edge
 
   !> Mark a point as shared
@@ -128,7 +128,7 @@ subroutine distdata_set_shared_point(ddata, point)
     integer, value :: point !< Point index (local numbering)
 
     call ddata%shared_point%add(point)
-    
+
   end subroutine distdata_set_shared_point
 
   !> Set local to global mapping (facets)
@@ -138,7 +138,7 @@ subroutine distdata_set_local_to_global_facet(ddata, local, global)
     integer, intent(in), value :: global !< Global facet index
 
     ddata%local_to_global_facet(local) = global
-    
+
   end subroutine distdata_set_local_to_global_facet
 
   !> Set local to global mapping (edges)
@@ -148,7 +148,7 @@ subroutine distdata_set_local_to_global_edge(ddata, local, global)
     integer, intent(in) , value :: global !< Global edge index
 
     ddata%local_to_global_edge(local) = global
-    
+
   end subroutine distdata_set_local_to_global_edge
-  
+
 end module distdata
diff --git a/src/common/global_interpolation.F90 b/src/common/global_interpolation.F90
index ed58ab06743..5d9e703182b 100644
--- a/src/common/global_interpolation.F90
+++ b/src/common/global_interpolation.F90
@@ -34,7 +34,7 @@
 !! @note This modules uses functions from `gslib`, namely `findpts_setup`,
 !! `findpts`, and `findpts_eval`. A full description of these subroutines can
 !! be found at https://github.com/Nek5000/gslib/blob/master/src/findpts.c
-!! 
+!!
 module global_interpolation
   use num_types, only: rp
   use space, only: space_t
@@ -49,13 +49,13 @@ module global_interpolation
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  !> Implements global interpolation for arbitrary points in the domain. 
+  !> Implements global interpolation for arbitrary points in the domain.
   type, public :: global_interpolation_t
      !> Dofmap from which we interpolate the points
      type(dofmap_t), pointer :: dof
      !> Mesh on which we interpolate
      type(mesh_t), pointer :: mesh
-     !> Space 
+     !> Space
      type(space_t), pointer :: Xh
      !> Interpolator for local points
      type(local_interpolator_t) :: local_interp
@@ -123,14 +123,14 @@ subroutine global_interpolation_init(this, dof, tol)
     if(present(tol)) this%tol = tol
 
 #ifdef HAVE_GSLIB
-    
+
     lx = this%Xh%lx
     ly = this%Xh%ly
     lz = this%Xh%lz
     nelv = this%mesh%nelv
     !Number of points to iterate on simultaneosuly
     max_pts_per_iter = 128
-    
+
     call fgslib_findpts_setup(this%gs_handle, &
          NEKO_COMM, pe_size, &
          this%mesh%gdim, &
@@ -159,10 +159,10 @@ subroutine global_interpolation_free(this)
     call this%free_points()
 
 #ifdef HAVE_GSLIB
-   if (this%gs_init) then
-      call fgslib_findpts_free(this%gs_handle)
-      this%gs_init = .false.
-   end if
+    if (this%gs_init) then
+       call fgslib_findpts_free(this%gs_handle)
+       this%gs_init = .false.
+    end if
 #endif
 
   end subroutine global_interpolation_free
@@ -186,8 +186,8 @@ subroutine global_interpolation_free_points(this)
     end if
 
   end subroutine global_interpolation_free_points
-  
-  !> Common routine for finding the points. 
+
+  !> Common routine for finding the points.
   subroutine global_interpolation_find_common(this)
     class(global_interpolation_t), intent(inout) :: this
     !!Perhaps this should be kind dp
@@ -243,22 +243,22 @@ subroutine global_interpolation_find_common(this)
     allocate(x_check(this%n_points))
     allocate(y_check(this%n_points))
     allocate(z_check(this%n_points))
-    
+
     call fgslib_findpts_eval(this%gs_handle, x_check, &
                              1, this%error_code, 1, &
-                             this%proc_owner, 1, this%el_owner, 1, &  
+                             this%proc_owner, 1, this%el_owner, 1, &
                              this%rst, this%mesh%gdim, &
                              this%n_points, this%dof%x)
- 
+
     call fgslib_findpts_eval(this%gs_handle, y_check, &
                              1, this%error_code, 1, &
-                             this%proc_owner, 1, this%el_owner, 1, &  
+                             this%proc_owner, 1, this%el_owner, 1, &
                              this%rst, this%mesh%gdim, &
                              this%n_points, this%dof%y)
- 
+
     call fgslib_findpts_eval(this%gs_handle, z_check, &
                              1, this%error_code, 1, &
-                             this%proc_owner, 1, this%el_owner, 1, &  
+                             this%proc_owner, 1, this%el_owner, 1, &
                              this%rst, this%mesh%gdim, &
                              this%n_points, this%dof%z)
 
@@ -286,7 +286,7 @@ subroutine global_interpolation_find_common(this)
        end if
 
     end do
-    
+
     deallocate(x_check)
     deallocate(y_check)
     deallocate(z_check)
@@ -299,8 +299,8 @@ subroutine global_interpolation_find_common(this)
     call neko_error('Neko needs to be built with GSLIB support')
 #endif
   end subroutine global_interpolation_find_common
-  
-  !> Finds the corresponding r,s,t coordinates 
+
+  !> Finds the corresponding r,s,t coordinates
   !! in the correct global element as well as which process that owns the point.
   !! After this the values at these points can be evaluated.
   !! If the locations of the points change this must be called again.
@@ -328,7 +328,7 @@ subroutine global_interpolation_find_coords(this, x, y, z, n_points)
     this%n_points = n_points
 
     call global_interpolation_init_point_arrays(this)
-    
+
     do i = 1, n_points
        this%xyz(1,i) = x(i,1,1,1)
        this%xyz(2,i) = y(i,1,1,1)
@@ -354,7 +354,7 @@ subroutine global_interpolation_init_point_arrays(this)
 
   end subroutine global_interpolation_init_point_arrays
 
-  !> Finds the corresponding r,s,t coordinates 
+  !> Finds the corresponding r,s,t coordinates
   !! in the correct global element as well as which process that owns the point.
   !! After this the values at these points can be evaluated.
   !! If the locations of the points change this must be called again.
@@ -376,7 +376,7 @@ subroutine global_interpolation_find_xyz(this, xyz, n_points)
     this%n_points = n_points
 
     call global_interpolation_init_point_arrays(this)
-    
+
     !> make deep copy incase xyz goes out of scope or deallocated
     call copy(this%xyz,xyz,3*n_points)
 
@@ -406,7 +406,7 @@ subroutine global_interpolation_find_and_redist(this, xyz, n_points)
 
     this%n_points = n_points
     call global_interpolation_init_point_arrays(this)
-    
+
     !> make deep copy incase xyz goes out of scope or deallocated
     call copy(this%xyz,xyz,3*n_points)
 
@@ -414,14 +414,14 @@ subroutine global_interpolation_find_and_redist(this, xyz, n_points)
     !> Sets new points and redistributes them
     call global_interpolation_redist(this)
     call global_interpolation_find_common(this)
-  
+
     do i = 1, this%n_points
        if (this%proc_owner(i) .ne. pe_rank) then
           write(*,*) 'Redistribution failed on rank: ', pe_rank,&
                      'for point with coord: ', &
                      this%xyz(1,i),this%xyz(2,i),this%xyz(3,i)
           exit
-       end if          
+       end if
     end do
 
     n_points = this%n_points
@@ -462,7 +462,7 @@ subroutine global_interpolation_redist(this)
        call MPI_Gather(n_points_per_pe(i), 1, MPI_INTEGER,&
                       n_points_from_pe, 1, MPI_INTEGER, i, NEKO_COMM, ierr)
     end do
-    
+
     allocate(n_point_offset_from_pe(0:(pe_size-1)))
     n_point_offset_from_pe(0) = 0
     do i = 1,(pe_size-1)
@@ -522,7 +522,7 @@ subroutine global_interpolation_evaluate(this, interp_values, field)
     if (.not. this%all_points_local) then
        call fgslib_findpts_eval(this%gs_handle, interp_values, &
                                 1, this%error_code, 1, &
-                                this%proc_owner, 1, this%el_owner, 1, &  
+                                this%proc_owner, 1, this%el_owner, 1, &
                                 this%rst, this%mesh%gdim, &
                                 this%n_points, field)
     else
diff --git a/src/common/jobctrl.f90 b/src/common/jobctrl.f90
index 16abd336c44..5c415cd41c1 100644
--- a/src/common/jobctrl.f90
+++ b/src/common/jobctrl.f90
@@ -39,7 +39,7 @@ module jobctrl
   use logger
   implicit none
   private
-  
+
   interface jobctrl_set_time_limit
      module procedure jobctrl_set_time_limit_sec, jobctrl_set_time_limit_str
   end interface jobctrl_set_time_limit
@@ -70,7 +70,7 @@ subroutine jobctrl_set_time_limit_str(limit_str)
     integer :: str_len, sep_h, h, m, s
 
     str_len = len_trim(limit_str)
-    
+
     if (str_len .lt. 8) then
        call neko_error('Invalid job limit')
     end if
@@ -86,9 +86,9 @@ subroutine jobctrl_set_time_limit_str(limit_str)
     read(limit_str(sep_h+4:str_len), *) s
 
     call jobctrl_set_time_limit_sec(h*3600 + m * 60 + s)
-    
+
   end subroutine jobctrl_set_time_limit_str
-  
+
   !> Set a job's time limit (in seconds)
   subroutine jobctrl_set_time_limit_sec(sec)
     integer :: sec
@@ -98,9 +98,9 @@ subroutine jobctrl_set_time_limit_sec(sec)
        jstop_sec = sec - jobctrl_jobtime()
        call signal_set_timeout(jstop_sec)
     end if
-    
+
   end subroutine jobctrl_set_time_limit_sec
-  
+
   !> Check if the job's time limit has been reached
   function jobctrl_time_limit() result(jstop)
     logical :: jstop
@@ -110,13 +110,14 @@ function jobctrl_time_limit() result(jstop)
     jstop = (signal_timeout() .or. signal_usr(1))
 
     if (jstop) then
+       ! Todo: This might be a warning instead of a message?
        write(log_buf, '(A)') '! stop at job limit >>>'
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
     end if
 
     ! Let rank zero decide if we should stop
     call MPI_Bcast(jstop, 1, MPI_LOGICAL, 0, NEKO_COMM, ierr)
-    
+
   end function jobctrl_time_limit
 
   !> Returns a job's time in seconds relative to the first call
@@ -124,13 +125,13 @@ function jobctrl_jobtime() result(jobtime)
     real(kind=rp), save :: stime
     real(kind=rp) :: jobtime
     logical, save :: init = .false.
-    
+
     if (.not. init) then
        stime = MPI_WTIME()
        init = .true.
     end if
-    
+
     jobtime = MPI_WTIME() - stime
   end function jobctrl_jobtime
-  
+
 end module jobctrl
diff --git a/src/common/json_utils.f90 b/src/common/json_utils.f90
index 0d997d8a05f..0a3cda4270c 100644
--- a/src/common/json_utils.f90
+++ b/src/common/json_utils.f90
@@ -32,162 +32,199 @@
 !
 !> Utilities for retrieving parameters from the case files.
 module json_utils
-  use num_types, only : rp
+  use num_types, only : rp, dp, sp
   use json_module, only : json_file, json_value, json_core
   use utils, only : neko_error
   implicit none
   private
 
-  public :: json_get, json_get_or_default
-  
+  public :: json_get, json_get_or_default, json_extract_item
+
   !> Retrieves a parameter by name or throws an error
   interface json_get
-    module procedure json_get_real, json_get_integer, json_get_logical, &
-                     json_get_string, json_get_real_array, &
-                     json_get_integer_array, json_get_logical_array, &
-                     json_get_string_array
+     module procedure json_get_real, json_get_double, json_get_integer, &
+       json_get_logical, json_get_string, json_get_real_array, &
+       json_get_double_array, json_get_integer_array, json_get_logical_array, &
+       json_get_string_array
   end interface json_get
 
   !> Retrieves a parameter by name or assigns a provided default value.
   !! In the latter case also adds the missing paramter to the json
   interface json_get_or_default
-    module procedure json_get_or_default_real, json_get_or_default_integer,&
-                     json_get_or_default_string, json_get_or_default_logical
+     module procedure json_get_or_default_real,json_get_or_default_double, &
+       json_get_or_default_integer, &
+       json_get_or_default_string, json_get_or_default_logical
   end interface json_get_or_default
-  
+
 contains
-  
+
   !> Retrieves a real parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_real(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
-    real(kind=rp), intent(out) :: value
+    real(kind=sp), intent(out) :: value
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_real
 
+  !> Retrieves a double precision real parameter by name or throws an error
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
+  subroutine json_get_double(json, name, value)
+    type(json_file), intent(inout) :: json
+    character(len=*), intent(in) :: name
+    real(kind=dp), intent(out) :: value
+    logical :: found
+
+    call json%get(name, value, found)
+
+    if (.not. found) then
+       call neko_error("Parameter "//name//" missing from the case file")
+    end if
+  end subroutine json_get_double
+
   !> Retrieves an integer parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_integer(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     integer, intent(out) :: value
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_integer
 
   !> Retrieves a logical parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_logical(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     logical, intent(out) :: value
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_logical
 
   !> Retrieves a string parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_string(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     character(len=:), allocatable, intent(out) :: value
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_string
 
   !> Retrieves a real array parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_real_array(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
-    real(kind=rp), allocatable, intent(out) :: value(:)
+    real(kind=sp), allocatable, intent(out) :: value(:)
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_real_array
 
+  !> Retrieves a real array parameter by name or throws an error
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
+  subroutine json_get_double_array(json, name, value)
+    type(json_file), intent(inout) :: json
+    character(len=*), intent(in) :: name
+    real(kind=dp), allocatable, intent(out) :: value(:)
+    logical :: found
+
+    call json%get(name, value, found)
+
+    if (.not. found) then
+       call neko_error("Parameter "//name//" missing from the case file")
+    end if
+  end subroutine json_get_double_array
+
   !> Retrieves a integer array parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_integer_array(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     integer, allocatable, intent(out) :: value(:)
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_integer_array
 
   !> Retrieves a logical array parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_logical_array(json, name, value)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     logical, allocatable, intent(out) :: value(:)
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        call neko_error("Parameter "//name//" missing from the case file")
     end if
   end subroutine json_get_logical_array
 
   !> Retrieves a string array parameter by name or throws an error
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
-  subroutine json_get_string_array(json, name, value)
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
+  !! @param[in] filler The default string to fill empty array items with.
+  subroutine json_get_string_array(json, name, value, filler)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     character(len=*), allocatable, intent(out) :: value(:)
-    logical :: found, logical_val
+    character(len=*), optional, intent(in) :: filler
+    logical :: found
     type(json_value), pointer :: json_val, val_ptr
     type(json_core) :: core
-    character(len=:), allocatable :: string_value        
+    character(len=:), allocatable :: string_value
     integer :: i, n_children
 
     if (.not. json%valid_path(name)) then
@@ -201,55 +238,77 @@ subroutine json_get_string_array(json, name, value)
        deallocate(value)
        allocate(value(n_children))
     end if
-    
+
     call json%get(name, json_val, found)
     call json%get_core(core)
 
     do i = 1, n_children
        call core%get_child(json_val, i, val_ptr, found)
        call core%get(val_ptr, string_value)
-          
+
        if (len(string_value) .gt. 0) then
           value(i) = string_value
+       else if(present(filler)) then
+          value(i) = filler
        end if
     end do
-    
+
   end subroutine json_get_string_array
 
   !> Retrieves a real parameter by name or assigns a provided default value.
   !! In the latter case also adds the missing paramter to the json.
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_or_default_real(json, name, value, default)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
-    real(kind=rp), intent(out) :: value
-    real(kind=rp), intent(in) :: default
+    real(kind=sp), intent(out) :: value
+    real(kind=sp), intent(in) :: default
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        value = default
        call json%add(name, value)
     end if
   end subroutine json_get_or_default_real
 
+  !> Retrieves a real parameter by name or assigns a provided default value.
+  !! In the latter case also adds the missing paramter to the json.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
+  subroutine json_get_or_default_double(json, name, value, default)
+    type(json_file), intent(inout) :: json
+    character(len=*), intent(in) :: name
+    real(kind=dp), intent(out) :: value
+    real(kind=dp), intent(in) :: default
+    logical :: found
+
+    call json%get(name, value, found)
+
+    if (.not. found) then
+       value = default
+       call json%add(name, value)
+    end if
+  end subroutine json_get_or_default_double
+
   !> Retrieves an integer parameter by name or assigns a provided default value.
   !! In the latter case also adds the missing paramter to the json.
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_or_default_integer(json, name, value, default)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     integer, intent(out) :: value
     integer, intent(in) :: default
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        value = default
        call json%add(name, value)
@@ -258,18 +317,18 @@ end subroutine json_get_or_default_integer
 
   !> Retrieves a logical parameter by name or assigns a provided default value.
   !! In the latter case also adds the missing paramter to the json.
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_or_default_logical(json, name, value, default)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     logical, intent(out) :: value
     logical, intent(in) :: default
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        value = default
        call json%add(name, value)
@@ -278,22 +337,42 @@ end subroutine json_get_or_default_logical
 
   !> Retrieves a string parameter by name or assigns a provided default value.
   !! In the latter case also adds the missing paramter to the json.
-  !! @param json The json to retrieve the parameter from.
-  !! @param name The full path to the parameter.
-  !! @value value The variable to be populated with the retrieved parameter.
+  !! @param[inout] json The json to retrieve the parameter from.
+  !! @param[in] name The full path to the parameter.
+  !! @param[out] value The variable to be populated with the retrieved parameter.
   subroutine json_get_or_default_string(json, name, value, default)
     type(json_file), intent(inout) :: json
     character(len=*), intent(in) :: name
     character(len=:), allocatable, intent(out) :: value
     character(len=*), intent(in) :: default
     logical :: found
-    
+
     call json%get(name, value, found)
-    
+
     if (.not. found) then
        value = default
        call json%add(name, value)
     end if
   end subroutine json_get_or_default_string
 
+  !> Extract `i`th item from a JSON array as a separate JSON object.
+  !! @param[inout] core JSON core object.
+  !! @param[in] array The JSON object with the array.
+  !! @param[in] i The index of the item to extract.
+  !! @param[inout] item JSON object object to be filled with the subdict.
+  subroutine json_extract_item(core, array, i, item)
+    type(json_core), intent(inout) :: core
+    type(json_value), pointer, intent(in) :: array
+    integer, intent(in) :: i
+    type(json_file), intent(inout) :: item
+    type(json_value), pointer :: ptr
+    logical :: found
+    character(len=:), allocatable :: buffer
+
+    call core%get_child(array, i, ptr, found)
+    call core%print_to_string(ptr, buffer)
+    call item%load_from_string(buffer)
+
+  end subroutine json_extract_item
+
 end module json_utils
diff --git a/src/common/log.f90 b/src/common/log.f90
index b003f250400..86ed3c685d6 100644
--- a/src/common/log.f90
+++ b/src/common/log.f90
@@ -38,7 +38,7 @@ module logger
   private
 
   integer, public, parameter :: LOG_SIZE = 80
-  
+
   type, public :: log_t
      integer :: indent_
      integer :: section_id_
@@ -48,7 +48,7 @@ module logger
      procedure, pass(this) :: begin => log_begin
      procedure, pass(this) :: end => log_end
      procedure, pass(this) :: indent => log_indent
-     procedure, nopass :: newline => log_newline          
+     procedure, nopass :: newline => log_newline
      procedure, pass(this) :: message => log_message
      procedure, pass(this) :: section => log_section
      procedure, pass(this) :: status => log_status
@@ -56,10 +56,18 @@ module logger
      procedure, pass(this) :: warning => log_warning
      procedure, pass(this) :: end_section => log_end_section
   end type log_t
-  
+
   !> Global log stream
   type(log_t), public :: neko_log
-  
+  !> Always logged
+  integer, public, parameter :: NEKO_LOG_QUIET = 0
+  !> Default log level
+  integer, public, parameter :: NEKO_LOG_INFO = 1
+  !> Verbose log level
+  integer, public, parameter :: NEKO_LOG_VERBOSE = 2
+  !> Debug log level
+  integer, public, parameter :: NEKO_LOG_DEBUG = 10
+
 contains
 
   !> Initialize a log
@@ -75,9 +83,9 @@ subroutine log_init(this)
     if (envvar_len .gt. 0) then
        read(log_level(1:envvar_len), *) this%level_
     else
-       this%level_ = 1
+       this%level_ = NEKO_LOG_INFO
     end if
-    
+
   end subroutine log_init
 
   !> Increase indention level
@@ -87,7 +95,7 @@ subroutine log_begin(this)
     if (pe_rank .eq. 0) then
        this%indent_ = this%indent_ + 1
     end if
-    
+
   end subroutine log_begin
 
   !> Decrease indention level
@@ -97,20 +105,20 @@ subroutine log_end(this)
     if (pe_rank .eq. 0) then
        this%indent_ = this%indent_ - 1
     end if
-    
+
   end subroutine log_end
-    
-  !> Indent a log 
+
+  !> Indent a log
   subroutine log_indent(this)
     class(log_t), intent(in) :: this
     integer :: i
 
     if (pe_rank .eq. 0) then
        do i = 1, this%indent_
-          write(*,'(A)', advance='no') ' '        
+          write(*,'(A)', advance='no') ' '
        end do
     end if
-    
+
   end subroutine log_indent
 
   !> Write a new line to a log
@@ -119,26 +127,31 @@ subroutine log_newline
     if (pe_rank .eq. 0) then
        write(*,*) ' '
     end if
-    
+
   end subroutine log_newline
 
   !> Write a message to a log
   subroutine log_message(this, msg, lvl)
     class(log_t), intent(in) :: this
     character(len=*), intent(in) :: msg
-    integer, optional, intent(in) :: lvl
+    integer, optional :: lvl
+    integer :: lvl_
 
     if (present(lvl)) then
-       if (lvl .gt. this%level_) then
-          return
-       end if
+       lvl_ = lvl
+    else
+       lvl_ = NEKO_LOG_INFO
+    end if
+
+    if (lvl_ .gt. this%level_) then
+       return
     end if
 
     if (pe_rank .eq. 0) then
        call this%indent()
        write(*, '(A)') trim(msg)
     end if
-    
+
   end subroutine log_message
 
   !> Write an error message to a log
@@ -148,7 +161,7 @@ subroutine log_error(this, msg)
 
     if (pe_rank .eq. 0) then
        call this%indent()
-       write(*, '(A,A,A)') '*** ERROR: ', trim(msg),'  ***'       
+       write(*, '(A,A,A)') '*** ERROR: ', trim(msg),'  ***'
     end if
 
   end subroutine log_error
@@ -160,7 +173,7 @@ subroutine log_warning(this, msg)
 
     if (pe_rank .eq. 0) then
        call this%indent()
-       write(*, '(A,A,A)') '*** WARNING: ', trim(msg),'  ***'       
+       write(*, '(A,A,A)') '*** WARNING: ', trim(msg),'  ***'
     end if
 
   end subroutine log_warning
@@ -177,20 +190,20 @@ subroutine log_section(this, msg)
        this%section_id_ = this%section_id_ + 1
 
        pre = (30 - len_trim(msg)) / 2
-       
+
        write(*,*) ' '
        call this%indent()
        do i = 1, pre
           write(*,'(A)', advance='no') '-'
        end do
-       
+
        write(*,'(A)', advance='no') trim(msg)
        do i = 1, 30 - (len_trim(msg) + pre)
           write(*,'(A)', advance='no') '-'
        end do
        write(*,*) ' '
     end if
-    
+
   end subroutine log_section
 
   !> End a log section
@@ -199,16 +212,16 @@ subroutine log_end_section(this, msg)
     character(len=*), intent(in), optional :: msg
 
     if (present(msg)) then
-       call this%message(msg)
+       call this%message(msg, NEKO_LOG_QUIET)
     end if
-    
+
     if (pe_rank .eq. 0) then
        this%section_id_ = this%section_id_ - 1
        this%indent_ = this%indent_ - this%section_id_
     end if
-    
-  end subroutine log_end_Section
-  
+
+  end subroutine log_end_section
+
   !> Write status banner
   !! @todo move to a future Time module
   subroutine log_status(this, t, T_end)
@@ -218,28 +231,30 @@ subroutine log_status(this, t, T_end)
     character(len=LOG_SIZE) :: log_buf
     real(kind=rp) :: t_prog
 
-     t_prog = 100d0 * t / T_end
+    t_prog = 100d0 * t / T_end
 
-    call this%message('----------------------------------------------------------------')
-    write(log_buf, '(A,E15.7,A,F6.2,A)') 't = ', t,&
-         '                                  [ ',t_prog,'% ]'
+    call this%message('----------------------------------------------------------------', &
+                      NEKO_LOG_QUIET)
+    write(log_buf, '(A,E15.7,A,F6.2,A)') &
+    't = ', t, '                                  [ ',t_prog,'% ]'
 
-    call this%message(log_buf)
-    call this%message('----------------------------------------------------------------')
+    call this%message(log_buf, NEKO_LOG_QUIET)
+    call this%message('----------------------------------------------------------------', &
+                      NEKO_LOG_QUIET)
   end subroutine log_status
-  
+
   !
   ! Rudimentary C interface
   !
 
   !> Write a message to a log (from C)
-  !! @note This assumes the global log stream @a neko_log  
-  subroutine log_message_c(c_msg) bind(c, name='log_message') 
+  !! @note This assumes the global log stream @a neko_log
+  subroutine log_message_c(c_msg) bind(c, name='log_message')
     use, intrinsic :: iso_c_binding
     character(kind=c_char), dimension(*), intent(in) :: c_msg
     character(len=LOG_SIZE) :: msg
     integer :: len, i
-  
+
     if (pe_rank .eq. 0) then
        len = 0
        do
@@ -247,21 +262,21 @@ subroutine log_message_c(c_msg) bind(c, name='log_message')
           len = len + 1
           msg(len:len) = c_msg(len)
        end do
-       
+
        call neko_log%indent()
        write(*, '(A)') trim(msg(1:len))
     end if
-    
+
   end subroutine log_message_c
 
   !> Write an error message to a log (from C)
-  !! @note This assumes the global log stream @a neko_log  
+  !! @note This assumes the global log stream @a neko_log
   subroutine log_error_c(c_msg) bind(c, name="log_error")
     use, intrinsic :: iso_c_binding
     character(kind=c_char), dimension(*), intent(in) :: c_msg
     character(len=LOG_SIZE) :: msg
     integer :: len, i
-    
+
     if (pe_rank .eq. 0) then
        len = 0
        do
@@ -269,21 +284,21 @@ subroutine log_error_c(c_msg) bind(c, name="log_error")
           len = len + 1
           msg(len:len) = c_msg(len)
        end do
-       
+
        call neko_log%indent()
-       write(*, '(A,A,A)') '*** ERROR: ',trim(msg(1:len)),'  ***'       
+       write(*, '(A,A,A)') '*** ERROR: ',trim(msg(1:len)),'  ***'
     end if
-    
+
   end subroutine log_error_c
 
   !> Write a warning message to a log (from C)
-  !! @note This assumes the global log stream @a neko_log  
+  !! @note This assumes the global log stream @a neko_log
   subroutine log_warning_c(c_msg) bind(c, name="log_warning")
     use, intrinsic :: iso_c_binding
     character(kind=c_char), dimension(*), intent(in) :: c_msg
     character(len=LOG_SIZE) :: msg
     integer :: len, i
-    
+
     if (pe_rank .eq. 0) then
        len = 0
        do
@@ -291,21 +306,21 @@ subroutine log_warning_c(c_msg) bind(c, name="log_warning")
           len = len + 1
           msg(len:len) = c_msg(len)
        end do
-       
+
        call neko_log%indent()
-       write(*, '(A,A,A)') '*** WARNING: ',trim(msg(1:len)),'  ***'       
+       write(*, '(A,A,A)') '*** WARNING: ',trim(msg(1:len)),'  ***'
     end if
-    
+
   end subroutine log_warning_c
 
   !> Begin a new log section (from C)
-  !! @note This assumes the global log stream @a neko_log  
+  !! @note This assumes the global log stream @a neko_log
   subroutine log_section_c(c_msg) bind(c, name="log_section")
     use, intrinsic :: iso_c_binding
     character(kind=c_char), dimension(*), intent(in) :: c_msg
     character(len=LOG_SIZE) :: msg
     integer :: len, i
-    
+
     if (pe_rank .eq. 0) then
        len = 0
        do
@@ -316,15 +331,15 @@ subroutine log_section_c(c_msg) bind(c, name="log_section")
 
        call neko_log%section(trim(msg(1:len)))
     end if
-    
+
   end subroutine log_section_c
 
   !> End a log section (from C)
   !! @note This assumes the global log stream @a neko_log
   subroutine log_end_section_c() bind(c, name="log_end_section")
-  
+
     call neko_log%end_section()
-    
+
   end subroutine log_end_section_c
-  
+
 end module logger
diff --git a/src/common/material_properties.f90 b/src/common/material_properties.f90
index 950d0374b00..bb0bd69850f 100644
--- a/src/common/material_properties.f90
+++ b/src/common/material_properties.f90
@@ -33,16 +33,16 @@
 !> Implements material_properties_t type.
 module material_properties
   use num_types, only: rp
-  use json_utils, only : json_get, json_get_or_default
-  use json_module, only : json_file, json_core, json_value
-  use logger, only : neko_log, LOG_SIZE
+  use json_utils, only : json_get
+  use json_module, only : json_file
+  use logger, only : neko_log, LOG_SIZE, NEKO_LOG_VERBOSE
   use user_intf, only : user_t, dummy_user_material_properties, &
                         user_material_properties
   use utils, only : neko_warning, neko_error
   use comm, only : pe_rank
   implicit none
   private
-  
+
   !> Contains all the material properties necessary in the simulation.
   type, public :: material_properties_t
      !> Density, f$\rho \f$.
@@ -58,7 +58,7 @@ module material_properties
      procedure, pass(this) :: init => material_properties_init
      !> Write final dimensional values to the log.
      procedure, private, pass(this) :: write_to_log
-   end type material_properties_t
+  end type material_properties_t
 
 contains
 
@@ -79,10 +79,10 @@ subroutine material_properties_init(this, params, user)
 
     if (.not. associated(user%material_properties, dummy_mp_ptr)) then
 
-          write(log_buf, '(A)') "Material properties must be set in the user&
-                              & file!"
-          call neko_log%message(log_buf)
-          call user%material_properties(0.0_rp, 0, this%rho, this%mu, &
+       write(log_buf, '(A)') "Material properties must be set in the user&
+       & file!"
+       call neko_log%message(log_buf)
+       call user%material_properties(0.0_rp, 0, this%rho, this%mu, &
                                         this%cp, this%lambda, params)
     else
 
@@ -94,19 +94,19 @@ subroutine material_properties_init(this, params, user)
        if (params%valid_path('case.fluid.Re') .and. &
            (params%valid_path('case.fluid.mu') .or. &
             params%valid_path('case.fluid.rho'))) then
-           call neko_error("To set the material properties for the fluid,&
-              & either provide Re OR mu and rho in the case file.")
+          call neko_error("To set the material properties for the fluid,&
+          & either provide Re OR mu and rho in the case file.")
 
-       ! Non-dimensional case
+          ! Non-dimensional case
        else if (params%valid_path('case.fluid.Re')) then
           nondimensional = .true.
 
           write(log_buf, '(A)') 'Non-dimensional fluid material properties &
-                              & input.'
-          call neko_log%message(log_buf, lvl=2)
+          & input.'
+          call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE)
           write(log_buf, '(A)') 'Density will be set to 1, dynamic viscosity to&
-                              & 1/Re.'
-          call neko_log%message(log_buf, lvl=2)
+          & 1/Re.'
+          call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE)
 
           ! Read Re into mu for further manipulation.
           call json_get(params, 'case.fluid.Re', this%mu)
@@ -119,8 +119,8 @@ subroutine material_properties_init(this, params, user)
           this%rho = 1.0_rp
           ! Invert the Re to get viscosity.
           this%mu = 1.0_rp/this%mu
-       ! Dimensional case
-       else 
+          ! Dimensional case
+       else
           call json_get(params, 'case.fluid.mu', this%mu)
           call json_get(params, 'case.fluid.rho', this%rho)
        end if
@@ -129,33 +129,33 @@ subroutine material_properties_init(this, params, user)
        ! Scalar
        !
        if (.not. params%valid_path('case.scalar')) then
-         ! Set dummy values
-         this%cp = 1.0_rp
-         this%lambda = 1.0_rp
-         call this%write_to_log(.false.)
-         return
+          ! Set dummy values
+          this%cp = 1.0_rp
+          this%lambda = 1.0_rp
+          call this%write_to_log(.false.)
+          return
        end if
 
        ! Incorrect user input
        if (nondimensional .and. &
            (params%valid_path('case.scalar.lambda') .or. &
             params%valid_path('case.scalar.cp'))) then
-           call neko_error("For non-dimensional setup set the Pe number for&
-                         & the scalar")
+          call neko_error("For non-dimensional setup set the Pe number for&
+          & the scalar")
        else if (.not. nondimensional .and. &
                 params%valid_path('case.scalar.Pe')) then
-           call neko_error("Dimensional material properties input detected,&
-                         & because you set rho and mu for the fluid. &
-                         & Please set cp and lambda for the scalar.")
+          call neko_error("Dimensional material properties input detected,&
+          & because you set rho and mu for the fluid. &
+          & Please set cp and lambda for the scalar.")
 
-       ! Non-dimensional case
+          ! Non-dimensional case
        else if (nondimensional) then
           write(log_buf, '(A)') 'Non-dimensional scalar material properties &
-                              & input.'
-          call neko_log%message(log_buf, lvl=2)
+          & input.'
+          call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE)
           write(log_buf, '(A)') 'Specific heat capacity will be set to 1, &
-                              & conductivity to 1/Pe.'
-          call neko_log%message(log_buf, lvl=2)
+          & conductivity to 1/Pe.'
+          call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE)
 
           ! Read Pe into lambda for further manipulation.
           call json_get(params, 'case.scalar.Pe', this%lambda)
@@ -167,8 +167,8 @@ subroutine material_properties_init(this, params, user)
           this%rho = 1.0_rp
           ! Invert the Pe to get conductivity
           this%lambda = 1.0_rp/this%lambda
-       ! Dimensional case
-       else 
+          ! Dimensional case
+       else
           call json_get(params, 'case.scalar.lambda', this%lambda)
           call json_get(params, 'case.scalar.cp', this%cp)
        end if
diff --git a/src/common/profiler.F90 b/src/common/profiler.F90
index 44b63ed642c..ae00c94f102 100644
--- a/src/common/profiler.F90
+++ b/src/common/profiler.F90
@@ -38,13 +38,17 @@ module profiler
   use roctx
   use craypat
   implicit none
+  private
+
+  public :: profiler_start, profiler_start_region, profiler_end_region, &
+            profiler_stop
 
 contains
 
   !> Start profiling
   subroutine profiler_start
     if ((NEKO_BCKND_CUDA .eq. 1)) then
-#if defined(HAVE_NVTX) 
+#if defined(HAVE_NVTX)
        call device_profiler_start
 #endif
     else
@@ -66,19 +70,24 @@ subroutine profiler_stop
 #endif
     end if
   end subroutine profiler_stop
-  
+
   !> Started a named (@a name) profiler region
-  subroutine profiler_start_region(name)
+  subroutine profiler_start_region(name, region_id)
     character(kind=c_char,len=*) :: name
+    integer, optional :: region_id
 
 #ifdef HAVE_NVTX
-    call nvtxStartRange(name)
+    if (present(region_id)) then
+       call nvtxStartRange(name, region_id)
+    else
+       call nvtxStartRange(name)
+    end if
 #elif HAVE_ROCTX
     call roctxStartRange(name)
 #elif CRAYPAT
- !   call craypat_region_begin(name)
+    !   call craypat_region_begin(name)
 #endif
-    
+
   end subroutine profiler_start_region
 
   !> End the most recently started profiler region
@@ -89,9 +98,9 @@ subroutine profiler_end_region
 #elif HAVE_ROCTX
     call roctxRangePop
 #elif CRAYPAT
- !   call craypat_region_end
+    !   call craypat_region_end
 #endif
-    
+
   end subroutine profiler_end_region
-  
+
 end module profiler
diff --git a/src/common/projection.f90 b/src/common/projection.f90
index 631a408ceb3..36b3005f3eb 100644
--- a/src/common/projection.f90
+++ b/src/common/projection.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Project x onto X, the space of old solutions and back again
@@ -75,6 +75,8 @@ module projection
   use profiler
   use logger
   use, intrinsic :: iso_c_binding
+  use time_step_controller
+
   implicit none
   private
 
@@ -93,25 +95,29 @@ module projection
      !logging variables
      real(kind=rp) :: proj_res
      integer :: proj_m = 0
+     integer :: activ_step ! steps to activate projection
    contains
+     procedure, pass(this) :: clear => bcknd_clear
      procedure, pass(this) :: project_on => bcknd_project_on
      procedure, pass(this) :: project_back => bcknd_project_back
      procedure, pass(this) :: log_info => print_proj_info
      procedure, pass(this) :: init => projection_init
      procedure, pass(this) :: free => projection_free
+     procedure, pass(this) :: pre_solving => projection_pre_solving
+     procedure, pass(this) :: post_solving => projection_post_solving
   end type projection_t
 
 contains
 
-  subroutine projection_init(this, n, L)
+  subroutine projection_init(this, n, L, activ_step)
     class(projection_t), target, intent(inout) :: this
     integer, intent(in) :: n
-    integer, optional, intent(in) :: L
+    integer, optional, intent(in) :: L, activ_step
     integer :: i
     integer(c_size_t) :: ptr_size
     type(c_ptr) :: ptr
     real(c_rp) :: dummy
-     
+
     call this%free()
     
     if (present(L)) then
@@ -120,7 +126,13 @@ subroutine projection_init(this, n, L)
        this%L = 20
     end if
 
-    this%m = 0 
+    if (present(activ_step)) then
+       this%activ_step = activ_step
+    else
+       this%activ_step = 5
+    end if
+
+    this%m = 0
 
     allocate(this%xx(n,this%L))
     allocate(this%bb(n,this%L))
@@ -152,10 +164,12 @@ subroutine projection_init(this, n, L)
        ptr_size = c_sizeof(C_NULL_PTR) * this%L
        call device_alloc(this%xx_d_d, ptr_size)
        ptr = c_loc(this%xx_d)
-       call device_memcpy(ptr,this%xx_d_d, ptr_size, HOST_TO_DEVICE)
+       call device_memcpy(ptr,this%xx_d_d, ptr_size, &
+                          HOST_TO_DEVICE, sync=.false.)
        call device_alloc(this%bb_d_d, ptr_size)
        ptr = c_loc(this%bb_d)
-       call device_memcpy(ptr,this%bb_d_d, ptr_size, HOST_TO_DEVICE)
+       call device_memcpy(ptr,this%bb_d_d, ptr_size, &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
 
@@ -202,12 +216,63 @@ subroutine projection_free(this)
 
   end subroutine projection_free
 
+  subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string)
+    class(projection_t), intent(inout) :: this
+    integer, intent(inout) :: n
+    real(kind=rp), intent(inout), dimension(n) :: b
+    integer, intent(in) :: tstep
+    class(coef_t), intent(inout) :: coef
+    type(time_step_controller_t), intent(in) :: dt_controller
+    character(len=*), optional :: string
+
+    if( tstep .gt. this%activ_step .and. this%L .gt. 0) then
+       if (dt_controller%if_variable_dt) then
+          if (dt_controller%dt_last_change .eq. 0) then ! the time step at which dt is changed
+             call this%clear(n) 
+          else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then
+             ! activate projection some steps after dt is changed
+             ! note that dt_last_change start from 0
+             call this%project_on(b, coef, n)
+             if (present(string)) then
+                call this%log_info(string)
+             end if
+          end if
+       else
+          call this%project_on(b, coef, n)
+          if (present(string)) then
+             call this%log_info(string)
+          end if
+       end if
+    end if
+
+  end subroutine projection_pre_solving
+
+  subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller)
+    class(projection_t), intent(inout) :: this
+    integer, intent(inout) :: n
+    class(Ax_t), intent(inout) :: Ax
+    class(coef_t), intent(inout) :: coef
+    class(bc_list_t), intent(inout) :: bclst
+    type(gs_t), intent(inout) :: gs_h
+    real(kind=rp), intent(inout), dimension(n) :: x
+    integer, intent(in) :: tstep
+    type(time_step_controller_t), intent(in) :: dt_controller
+
+    if (tstep .gt. this%activ_step .and. this%L .gt. 0) then
+       if (.not.(dt_controller%if_variable_dt) .or. &
+       (dt_controller%dt_last_change .gt. this%activ_step - 1)) then
+          call this%project_back(x, Ax, coef, bclst, gs_h, n)
+       end if
+    end if
+
+  end subroutine projection_post_solving
+
   subroutine bcknd_project_on(this, b, coef, n)
     class(projection_t), intent(inout) :: this
     integer, intent(inout) :: n
-    class(coef_t), intent(inout) :: coef   
-    real(kind=rp), intent(inout), dimension(n) :: b 
-    call profiler_start_region('Project on')
+    class(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(inout), dimension(n) :: b
+    call profiler_start_region('Project on', 16)
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_project_on(this, b, coef, n)
     else
@@ -215,19 +280,18 @@ subroutine bcknd_project_on(this, b, coef, n)
     end if
     call profiler_end_region
   end subroutine bcknd_project_on
-  
+
   subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
     class(projection_t) :: this
     integer, intent(inout) :: n
-    class(Ax_t), intent(inout) :: Ax    
-    class(coef_t), intent(inout) :: coef   
+    class(Ax_t), intent(inout) :: Ax
+    class(coef_t), intent(inout) :: coef
     class(bc_list_t), intent(inout) :: bclst
     type(gs_t), intent(inout) :: gs_h
-    real(kind=rp), intent(inout), dimension(n) :: x 
+    real(kind=rp), intent(inout), dimension(n) :: x
     type(c_ptr) :: x_d
 
-    call profiler_start_region('Project back')
-    this%m = min(this%m+1,this%L)
+    call profiler_start_region('Project back', 17)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        x_d = device_get_ptr(x)
@@ -242,7 +306,12 @@ subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
 
     else
        if (this%m.gt.0) call add2(x,this%xbar,n)      ! Restore desired solution
-       this%m = min(this%m+1,this%L)
+       if (this%m .eq. this%L) then
+          this%m = 1
+       else
+          this%m = min(this%m+1,this%L)
+       end if
+       
        call copy        (this%xx(1,this%m),x,n)   ! Update (X,B)
     end if
 
@@ -253,43 +322,43 @@ subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n)
     if (NEKO_BCKND_DEVICE .eq. 1)  then
        call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n)
     else
-       call cpu_proj_ortho  (this,this%xx,this%bb,coef%mult,n) 
+       call cpu_proj_ortho  (this,this%xx,this%bb,coef%mult,n)
     end if
     call profiler_end_region
   end subroutine bcknd_project_back
- 
+
 
 
   subroutine cpu_project_on(this, b, coef, n)
     class(projection_t), intent(inout) :: this
     integer, intent(inout) :: n
-    class(coef_t), intent(inout) :: coef   
-    real(kind=rp), intent(inout), dimension(n) :: b 
+    class(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(inout), dimension(n) :: b
     integer :: i, j, k, ierr
     real(kind=rp) :: work(this%L), alpha(this%L)
 
     associate(xbar => this%xbar, xx => this%xx, &
               bb => this%bb)
-    
+
       if (this%m .le. 0) return
-            
+
       !First round of CGS
       call rzero(alpha, this%m)
-      this%proj_res = glsc3(b,b,coef%mult,n)
+      this%proj_res = sqrt(glsc3(b,b,coef%mult,n)/coef%volume)
       this%proj_m = this%m
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
-         do k = 1, this%m 
+         do k = 1, this%m
             alpha(k) = alpha(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j)
          end do
       end do
-      
+
       !First one outside loop to avoid zeroing xbar and bbar
       call MPI_Allreduce(MPI_IN_PLACE, alpha, this%m, &
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-      
+
       call rzero(work, this%m)
-      
+
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
          call cmult2(xbar(i), xx(i,1), alpha(1), j)
@@ -303,10 +372,10 @@ subroutine cpu_project_on(this, b, coef, n)
             work(k) = work(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j)
          end do
       end do
-      
+
       call MPI_Allreduce(work, alpha, this%m, &
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-      
+
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
          do k = 1,this%m
@@ -320,8 +389,8 @@ end subroutine cpu_project_on
   subroutine device_project_on(this, b, coef, n)
     class(projection_t), intent(inout) :: this
     integer, intent(inout) :: n
-    class(coef_t), intent(inout) :: coef   
-    real(kind=rp), intent(inout), dimension(n) :: b 
+    class(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(inout), dimension(n) :: b
     real(kind=rp) :: alpha(this%L)
     type(c_ptr) :: b_d
     integer :: i
@@ -329,12 +398,12 @@ subroutine device_project_on(this, b, coef, n)
 
     associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, &
               bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
-    
+
       if (this%m .le. 0) return
 
 
 
-      this%proj_res = device_glsc3(b_d,b_d,coef%mult_d,n)
+      this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume)
       this%proj_m = this%m
       if (NEKO_DEVICE_MPI .and. (NEKO_BCKND_OPENCL .ne. 1)) then
          call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, &
@@ -346,12 +415,13 @@ subroutine device_project_on(this, b, coef, n)
             end do
          else
             call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
-            call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE) 
+            call device_memcpy(alpha, alpha_d, this%m, &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
          call device_rzero(xbar_d, n)
          if (NEKO_BCKND_OPENCL .eq. 1) then
             do i = 1, this%m
-               call device_add2s2(xbar_d, xx_d(i), alpha(i), n)            
+               call device_add2s2(xbar_d, xx_d(i), alpha(i), n)
             end do
             call cmult(alpha, -1.0_rp, this%m)
          else
@@ -367,7 +437,8 @@ subroutine device_project_on(this, b, coef, n)
          else
             call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
             call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n)
-            call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE)
+            call device_memcpy(alpha, alpha_d, this%m, &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
 
          if (NEKO_BCKND_OPENCL .eq. 1) then
@@ -382,7 +453,7 @@ subroutine device_project_on(this, b, coef, n)
             call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n)
          end if
       end if
-      
+
     end associate
   end subroutine device_project_on
 
@@ -398,7 +469,7 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
 
     associate(m => this%m,  xx_d_d => this%xx_d_d, &
               bb_d_d => this%bb_d_d, alpha_d => this%alpha_d)
-      
+
       if(m .le. 0) return
 
       if (NEKO_DEVICE_MPI .and. (NEKO_BCKND_OPENCL .ne. 1)) then
@@ -418,14 +489,15 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
             do i = 1, m - 1
                call device_add2s2(xx_d(m),xx_d(i),alpha(i), n)
                call device_add2s2(bb_d(m),bb_d(i),alpha(i),n)
-         
+
                alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n)
             end do
          else
-            call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE) 
+            call device_memcpy(alpha, alpha_d, this%m, &
+                               HOST_TO_DEVICE, sync=.false.)
             call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
             call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
-         
+
             call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
          end if
          call cmult(alpha, -1.0_rp,m)
@@ -436,22 +508,23 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
                alpha(i) =  device_glsc3(bb_d(m),xx_d(i),w_d,n)
             end do
          else
-            call device_memcpy(alpha, alpha_d, m, HOST_TO_DEVICE) 
+            call device_memcpy(alpha, alpha_d, m, &
+                               HOST_TO_DEVICE, sync=.false.)
             call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n)
             call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n)
             call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n)
          end if
       end if
-      
+
       alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n)
       alpha(m) = sqrt(alpha(m))
 
-      if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent    
-         scl = 1.0_rp / alpha(m) 
-         call device_cmult(xx_d(m), scl, n)   
-         call device_cmult(bb_d(m), scl, n)   
+      if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
+         scl = 1.0_rp / alpha(m)
+         call device_cmult(xx_d(m), scl, n)
+         call device_cmult(bb_d(m), scl, n)
+
 
-        
       else !New vector is not linearly independent, forget about it
          if(pe_rank .eq. 0) then
             call neko_warning('New vector not linearly indepependent!')
@@ -462,8 +535,8 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n)
     end associate
 
   end subroutine device_proj_ortho
-  
-  
+
+
   subroutine cpu_proj_ortho(this, xx, bb, w, n)
     type(projection_t)  :: this
     integer, intent(inout) :: n
@@ -474,11 +547,11 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
     integer :: i, j, k, h, ierr
 
     associate(m => this%m)
-      
-      if(m .le. 0) return !No vectors to ortho-normalize 
+
+      if(m .le. 0) return !No vectors to ortho-normalize
 
       ! AX = B
-      ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb     
+      ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb
       call rzero(alpha, m)
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
@@ -487,13 +560,13 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
                  + vlsc3(bb(i,k), w(i), xx(i,m), j))
          end do
       end do
-      
+
       call MPI_Allreduce(MPI_IN_PLACE, alpha, this%m, &
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-      
+
       nrm = sqrt(alpha(m)) !Calculate A-norm of new vector
-      
-    
+
+
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
          do k = 1,m-1
@@ -502,7 +575,7 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
          end do
       end do
       call rzero(beta,m)
-     
+
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE, n-i+1)
          do k = 1,m-1
@@ -515,7 +588,7 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
 
       alpha(m) = 0.0_rp
-      
+
       do i = 1, n, NEKO_BLK_SIZE
          j = min(NEKO_BLK_SIZE,n-i+1)
          do k = 1, m-1
@@ -527,37 +600,19 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
       do k = 1, m-1
          alpha(k) = alpha(k) + beta(k)
       end do
-      
-      !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n) 
+
+      !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n)
       call MPI_Allreduce(MPI_IN_PLACE, alpha(m), 1, &
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
       alpha(m) = sqrt(alpha(m))
       !dx and db now stored in last column of xx and bb
 
-      if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent    
+      if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent
          !Normalize dx and db
-         scl1 = 1.0_rp / alpha(m) 
-         call cmult(xx(1,m), scl1, n)   
-         call cmult(bb(1,m), scl1, n)   
-
-         !We want to throw away the oldest information
-         !The below propagates newest information to first vector.
-         !This will make the first vector a scalar 
-         !multiple of x.
-         do k = m, 2, -1
-            h = k - 1   
-            call givens_rotation(alpha(h), alpha(k), c, s, nrm)
-            alpha(h) = nrm     
-            do i = 1, n !Apply rotation to xx and bb
-               scl1 = c*xx(i,h) + s*xx(i,k)
-               xx(i,k) = -s*xx(i,h) + c*xx(i,k)
-               xx(i,h) = scl1       
-               scl2 = c*bb(i,h) + s*bb(i,k)
-               bb(i,k) = -s*bb(i,h) + c*bb(i,k)    
-               bb(i,h) = scl2        
-            end do
-         end do
-         
+         scl1 = 1.0_rp / alpha(m)
+         call cmult(xx(1,m), scl1, n)
+         call cmult(bb(1,m), scl1, n)
+
       else !New vector is not linearly independent, forget about it
          k = m !location of rank deficient column
          if(pe_rank .eq. 0) then
@@ -567,40 +622,43 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n)
       endif
 
     end associate
-    
+
   end subroutine cpu_proj_ortho
-  
-  subroutine givens_rotation(a, b, c, s, r)
-    real(kind=rp), intent(inout) :: a, b, c, s, r
-    real(kind=rp) ::  h, d
-
-    if(b .ne. 0.0_rp) then
-       h = hypot(a, b) 
-       d = 1.0_rp / h
-       c = abs(a) * d
-       s = sign(d, a) * b
-       r = sign(1.0_rp, a) * h
-    else
-       c = 1.0_rp
-       s = 0.0_rp
-       r = a
-    endif
-      
-    return
-  end subroutine givens_rotation
 
   subroutine print_proj_info(this,string)
     class(projection_t) :: this
     character(len=*) :: string
     character(len=LOG_SIZE) :: log_buf
-
-    write(log_buf, '(A,A)') 'Projection ', string
-    call neko_log%message(log_buf)
-    write(log_buf, '(A,A)') 'Proj. vec.:','   Orig. residual:'
-    call neko_log%message(log_buf)
-    write(log_buf, '(I11,3x, E15.7,5x)')  this%proj_m, this%proj_res
-    call neko_log%message(log_buf)
+    
+    if (this%proj_m .gt. 0) then
+       write(log_buf, '(A,A)') 'Projection ', string
+       call neko_log%message(log_buf)
+       write(log_buf, '(A,A)') 'Proj. vec.:','   Orig. residual:'
+       call neko_log%message(log_buf)
+       write(log_buf, '(I11,3x, E15.7,5x)')  this%proj_m, this%proj_res
+       call neko_log%message(log_buf)
+    end if
 
   end subroutine print_proj_info
 
+  subroutine bcknd_clear(this, n)
+    class(projection_t) :: this
+    integer, intent(in) :: n
+    integer :: i
+
+    this%m = 0
+    this%proj_m = 0
+    
+    do i = 1, this%L
+       if (NEKO_BCKND_DEVICE .eq. 1) then
+          call device_rzero(this%xx_d(i), n)
+          call device_rzero(this%xx_d(i), n)
+       else
+          call rzero(this%xx(1,i),n)
+          call rzero(this%bb(1,i),n)
+       end if
+    end do
+
+  end subroutine bcknd_clear
+
 end module projection
diff --git a/src/common/rhs_maker.f90 b/src/common/rhs_maker.f90
index dce8aa36fd8..287649214c2 100644
--- a/src/common/rhs_maker.f90
+++ b/src/common/rhs_maker.f90
@@ -90,11 +90,10 @@ end subroutine rhs_maker_ext
   end interface
 
   abstract interface
-     subroutine scalar_rhs_maker_ext(temp1, fs_lag, fs_laglag, fs, rho, &
+     subroutine scalar_rhs_maker_ext(fs_lag, fs_laglag, fs, rho, &
           ext_coeffs, n)
        import field_t
        import rp
-       type(field_t), intent(inout) :: temp1
        type(field_t), intent(inout) :: fs_lag
        type(field_t), intent(inout) :: fs_laglag
        real(kind=rp), intent(inout) :: rho, ext_coeffs(4)
@@ -111,7 +110,7 @@ subroutine rhs_maker_bdf(ulag, vlag, wlag, bfx, bfy, bfz, &
        import rp
        integer, intent(in) :: n, nbd
        type(field_t), intent(in) :: u, v, w
-       type(field_series_t), intent(in) :: ulag, vlag, wlag        
+       type(field_series_t), intent(in) :: ulag, vlag, wlag
        real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n)
        real(kind=rp), intent(in) :: B(n)
        real(kind=rp), intent(in) :: dt, rho, bd(4)
@@ -119,13 +118,12 @@ end subroutine rhs_maker_bdf
   end interface
 
   abstract interface
-     subroutine scalar_rhs_maker_bdf(temp1, temp2, s_lag, fs, s, B, rho, dt,&
+     subroutine scalar_rhs_maker_bdf(s_lag, fs, s, B, rho, dt,&
           bd, nbd, n)
        import field_series_t
        import field_t
        import rp
        integer, intent(in) :: n, nbd
-       type(field_t), intent(inout) :: temp1, temp2
        type(field_t), intent(in) :: s
        type(field_series_t), intent(in) :: s_lag
        real(kind=rp), intent(inout) :: fs(n)
diff --git a/src/common/rhs_maker_fctry.f90 b/src/common/rhs_maker_fctry.f90
index 44428a13d7c..462b5008f8d 100644
--- a/src/common/rhs_maker_fctry.f90
+++ b/src/common/rhs_maker_fctry.f90
@@ -38,6 +38,9 @@ module rhs_maker_fctry
   use rhs_maker_device
   use neko_config
   implicit none
+  private
+
+  public :: rhs_maker_sumab_fctry, rhs_maker_ext_fctry, rhs_maker_bdf_fctry
 
 contains
 
@@ -86,7 +89,7 @@ subroutine rhs_maker_bdf_fctry(makebdf)
        allocate(rhs_maker_bdf_sx_t::makebdf)
     else if (NEKO_BCKND_DEVICE .eq. 1) then
        allocate(rhs_maker_bdf_device_t::makebdf)
-    else       
+    else
        allocate(rhs_maker_bdf_cpu_t::makebdf)
     end if
 
diff --git a/src/common/sampler.f90 b/src/common/sampler.f90
index 3ca495bf077..13cbfcb2ab5 100644
--- a/src/common/sampler.f90
+++ b/src/common/sampler.f90
@@ -30,9 +30,10 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Defines a sampler 
+!> Defines a sampler
 module sampler
-  use output
+  use output, only: output_t
+  use fld_file, only: fld_file_t
   use comm
   use logger
   use utils, only : neko_error
@@ -56,7 +57,7 @@ module sampler
      !> Number of outputs.
      integer :: n
      !> Number of entries in the list.
-     integer :: size 
+     integer :: size
      !> Final time of the simulation.
      real(kind=rp) :: T_end
    contains
@@ -109,10 +110,10 @@ subroutine sampler_free(this)
     class(sampler_t), intent(inout) :: this
 
     if (allocated(this%output_list)) then
-       deallocate(this%output_list)       
+       deallocate(this%output_list)
     end if
     if (allocated(this%controllers)) then
-       deallocate(this%controllers)       
+       deallocate(this%controllers)
     end if
 
     this%n = 0
@@ -130,6 +131,7 @@ subroutine sampler_add(this, out, write_par, write_control)
     type(time_based_controller_t), allocatable :: tmp_ctrl(:)
     character(len=LOG_SIZE) :: log_buf
     integer :: n
+    class(*), pointer :: ft
 
     if (this%n .ge. this%size) then
        allocate(tmp(this%size * 2))
@@ -152,37 +154,48 @@ subroutine sampler_add(this, out, write_par, write_control)
     else
        call this%controllers(n)%init(this%T_end, write_control, write_par)
     end if
-    
+
     ! The code below only prints to console
     call neko_log%section('Adding write output')
-    call neko_log%message('File name: '// &
+    call neko_log%message('File name        : '// &
           trim(this%output_list(this%n)%outp%file_%file_type%fname))
-    call neko_log%message( 'Write control: '//trim(write_control))
-    if (trim(write_control) .eq. 'simulationtime') then
-        write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', &
+    call neko_log%message('Write control    : '//trim(write_control))
+
+    ! Show the output precision if we are outputting an fld file
+    select type(ft => out%file_%file_type)
+    type is (fld_file_t)
+       if (ft%dp_precision) then
+          call neko_log%message('Output precision : double')
+       else
+          call neko_log%message('Output precision : single')
+       end if
+    end select
+
+   if (trim(write_control) .eq. 'simulationtime') then
+       write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', &
              this%controllers(n)%frequency
-        call neko_log%message(log_buf)
-        write(log_buf, '(A,ES13.6)') 'Time between writes: ', & 
+       call neko_log%message(log_buf)
+       write(log_buf, '(A,ES13.6)') 'Time between writes: ', &
           this%controllers(n)%time_interval
-        call neko_log%message(log_buf)
+       call neko_log%message(log_buf)
     else if (trim(write_control) .eq. 'nsamples') then
-        write(log_buf, '(A,I13)') 'Total samples: ',  int(write_par)
-        call neko_log%message(log_buf)
-        write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ',  &
+       write(log_buf, '(A,I13)') 'Total samples: ',  int(write_par)
+       call neko_log%message(log_buf)
+       write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ',  &
              this%controllers(n)%frequency
-        call neko_log%message(log_buf)
-        write(log_buf, '(A,ES13.6)') 'Time between writes: ', & 
+       call neko_log%message(log_buf)
+       write(log_buf, '(A,ES13.6)') 'Time between writes: ', &
           this%controllers(n)%time_interval
-        call neko_log%message(log_buf)
-    else if (trim(write_control) .eq. 'tsteps') then 
-        write(log_buf, '(A,I13)') 'Time step interval: ',  int(write_par)
-        call neko_log%message(log_buf)
+       call neko_log%message(log_buf)
+    else if (trim(write_control) .eq. 'tsteps') then
+       write(log_buf, '(A,I13)') 'Time step interval: ',  int(write_par)
+       call neko_log%message(log_buf)
     else if (trim(write_control) .eq. 'org') then
-        write(log_buf, '(A)') &
+       write(log_buf, '(A)') &
              'Write control not set, defaulting to first output settings'
-        call neko_log%message(log_buf)
+       call neko_log%message(log_buf)
     end if
-    
+
     call neko_log%end_section()
   end subroutine sampler_add
 
@@ -212,13 +225,13 @@ subroutine sampler_sample(this, t, tstep, ifforce)
     write_output = .false.
     ! Determine if at least one output needs to be written
     ! We should not need this extra select block, and it works great
-    ! without it for GNU, Intel and NEC, but breaks horribly on Cray         
-    ! (>11.0.x) when using high opt. levels.  
+    ! without it for GNU, Intel and NEC, but breaks horribly on Cray
+    ! (>11.0.x) when using high opt. levels.
     select type (samp => this)
     type is (sampler_t)
        do i = 1, samp%n
           if (this%controllers(i)%check(t, tstep, force)) then
-             write_output = .true.            
+             write_output = .true.
              exit
           end if
        end do
@@ -230,16 +243,16 @@ subroutine sampler_sample(this, t, tstep, ifforce)
 
     ! Loop through the outputs and write if necessary.
     ! We should not need this extra select block, and it works great
-    ! without it for GNU, Intel and NEC, but breaks horribly on Cray         
-    ! (>11.0.x) when using high opt. levels.  
+    ! without it for GNU, Intel and NEC, but breaks horribly on Cray
+    ! (>11.0.x) when using high opt. levels.
     select type (samp => this)
     type is (sampler_t)
        do i = 1, this%n
           if (this%controllers(i)%check(t, tstep, force)) then
-             call neko_log%message('File name: '// &
+             call neko_log%message('File name     : '// &
                   trim(samp%output_list(i)%outp%file_%file_type%fname))
 
-             write(log_buf, '(A,I6)') 'Output number:', &
+             write(log_buf, '(A,I6)') 'Output number :', &
                   int(this%controllers(i)%nexecutions)
              call neko_log%message(log_buf)
 
@@ -251,7 +264,7 @@ subroutine sampler_sample(this, t, tstep, ifforce)
     class default
        call neko_error('Invalid sampler output list')
     end select
-    
+
     call MPI_Barrier(NEKO_COMM, ierr)
     sample_end_time = MPI_WTIME()
 
@@ -281,9 +294,9 @@ subroutine sampler_set_counter(this, t)
           call this%output_list(i)%outp%set_start_counter(this%controllers(i)%nexecutions)
        end if
     end do
-    
+
   end subroutine sampler_set_counter
- 
+
   !> Set sampling counter (after restart) explicitly
   subroutine sampler_set_sample_count(this, sample_number)
     class(sampler_t), intent(inout) :: this
@@ -295,8 +308,8 @@ subroutine sampler_set_sample_count(this, sample_number)
        call this%output_list(i)%outp%set_counter(this%controllers(i)%nexecutions)
        call this%output_list(i)%outp%set_start_counter(this%controllers(i)%nexecutions)
     end do
-    
+
   end subroutine sampler_set_sample_count
-  
- 
+
+
 end module sampler
diff --git a/src/common/signal.f90 b/src/common/signal.f90
index fdc3a3f6c8f..29358e24e1e 100644
--- a/src/common/signal.f90
+++ b/src/common/signal.f90
@@ -49,7 +49,7 @@ integer (c_int8_t) function sighdl_usr() &
        use, intrinsic :: iso_c_binding
      end function sighdl_usr
   end interface
-  
+
   interface
      integer (c_int) function sighdl_set_timeout(sec) &
           bind(c, name='sighdl_set_timeout')
@@ -58,7 +58,7 @@ integer (c_int) function sighdl_set_timeout(sec) &
        integer(c_int) :: sec
      end function sighdl_set_timeout
   end interface
-  
+
   interface
      integer (c_int) function sighdl_trap_cpulimit() &
           bind(c, name='sighdl_trap_cpulimit')
@@ -74,7 +74,7 @@ end function sighdl_trap_usr
   end interface
 
 contains
-  
+
   !> Check if any timeout has occurred (either SIGXCPU or SIGALRM)
   function signal_timeout() result(timeout)
     logical :: timeout
@@ -84,7 +84,7 @@ function signal_timeout() result(timeout)
     else
        timeout = .false.
     end if
-    
+
   end function signal_timeout
 
   !> Check if a user signal has been raised
@@ -98,15 +98,15 @@ function signal_usr(usr) result(raised)
     end if
 
     usr12 = sighdl_usr()
-    
+
     if (bge(usr12, usr)) then
        raised = .true.
     else
        raised = .false.
     end if
-    
+
   end function signal_usr
-  
+
   !> Set a timeout after @a seconds
   subroutine signal_set_timeout(sec)
     integer(kind=c_int) :: sec
@@ -114,9 +114,9 @@ subroutine signal_set_timeout(sec)
     if (sighdl_set_timeout(sec) .lt. 0) then
        call neko_error('sighdl failed to set SIGALRM')
     end if
-    
+
   end subroutine signal_set_timeout
-  
+
   !> Initialize signal handler to trap SIGXCPU
   subroutine signal_trap_cpulimit()
     logical, save :: initialized = .false.
@@ -127,7 +127,7 @@ subroutine signal_trap_cpulimit()
        end if
        initialized = .true.
     end if
-    
+
   end subroutine signal_trap_cpulimit
 
   !> Initialize signal handler to trap SIGUSR1 and SIGUSR2
@@ -140,7 +140,7 @@ subroutine signal_trap_usr()
        end if
        initialized = .true.
     end if
-    
+
   end subroutine signal_trap_usr
-  
+
 end module signal
diff --git a/src/common/statistics.f90 b/src/common/statistics.f90
index 4b5c8fde555..074c77ade6f 100644
--- a/src/common/statistics.f90
+++ b/src/common/statistics.f90
@@ -67,7 +67,7 @@ subroutine stats_init(this, T_begin, samp_interval, size)
     integer, intent(in) :: samp_interval
     integer, intent(inout), optional ::size
     integer :: n, i
-    
+
     call this%free()
 
     if (present(size)) then
@@ -87,7 +87,7 @@ subroutine stats_init(this, T_begin, samp_interval, size)
     this%T_begin = T_begin
     this%samp_interval = samp_interval
     this%t_diff = 0.0
-    
+
   end subroutine stats_init
 
   !> Deallocate
@@ -99,7 +99,7 @@ subroutine stats_free(this)
     end if
 
     this%n = 0
-    this%size = 0    
+    this%size = 0
   end subroutine stats_free
 
   !> Add a statistic quantitiy @a quant to the backend
@@ -126,7 +126,7 @@ subroutine stats_eval(this, t, dt, tstep)
     real(kind=rp), intent(in) :: dt
     integer, intent(in) :: tstep
     integer :: i, ierr
-    character(len=LOG_SIZE) :: log_buf    
+    character(len=LOG_SIZE) :: log_buf
     real(kind=rp) :: sample_start_time, sample_end_time
     real(kind=dp) :: sample_time
 
diff --git a/src/common/stats_quant.f90 b/src/common/stats_quant.f90
index 6705cd1b55b..750321a68b6 100644
--- a/src/common/stats_quant.f90
+++ b/src/common/stats_quant.f90
@@ -51,5 +51,5 @@ subroutine stats_quant_update(this, k)
        real(kind=rp), intent(in) :: k
      end subroutine stats_quant_update
   end interface
-  
+
 end module stats_quant
diff --git a/src/common/structs.f90 b/src/common/structs.f90
index 97c03c4e205..511d75b0d79 100644
--- a/src/common/structs.f90
+++ b/src/common/structs.f90
@@ -3,11 +3,11 @@ module structs
   use num_types
   implicit none
   private
-  
+
   type, public :: struct_curve_t
-    real(kind=dp) :: curve_data(5,12)
-    integer :: curve_type(12)
-    integer :: el_idx
+     real(kind=dp) :: curve_data(5,12)
+     integer :: curve_type(12)
+     integer :: el_idx
   end type struct_curve_t
 
   !> Pointer to array
diff --git a/src/common/system.f90 b/src/common/system.f90
index 117c80ddd3b..4de8c37d4b5 100644
--- a/src/common/system.f90
+++ b/src/common/system.f90
@@ -51,21 +51,21 @@ end subroutine system_cpuid
 
   public :: system_cpuid, system_cpu_name
 
- contains
+contains
 
-   !> Retrieve the system's CPU name (type)
-   !! @param name Stores the retrieved name.
-   subroutine system_cpu_name(name)
-     character(len=*), intent(inout) :: name
-     character(kind=c_char, len=80), target :: c_name
-     integer :: end_pos
+  !> Retrieve the system's CPU name (type)
+  !! @param name Stores the retrieved name.
+  subroutine system_cpu_name(name)
+    character(len=*), intent(inout) :: name
+    character(kind=c_char, len=80), target :: c_name
+    integer :: end_pos
 
-     call system_cpuid(c_loc(c_name), 80)
+    call system_cpuid(c_loc(c_name), 80)
+
+    end_pos = scan(c_name, C_NULL_CHAR)
+    if(end_pos .ge. 2) then
+       name(1:end_pos-1) = c_name(1:end_pos-1)
+    end if
+  end subroutine system_cpu_name
 
-     end_pos = scan(c_name, C_NULL_CHAR)
-     if(end_pos .ge. 2) then
-        name(1:end_pos-1) = c_name(1:end_pos-1)
-     end if
-   end subroutine system_cpu_name
-  
 end module system
diff --git a/src/common/time_based_controller.f90 b/src/common/time_based_controller.f90
index f35072489b1..f7fb1d1fed7 100644
--- a/src/common/time_based_controller.f90
+++ b/src/common/time_based_controller.f90
@@ -57,16 +57,21 @@ module time_based_controller
      integer :: nexecutions = 0
      !> Whether to never output.
      logical :: never = .false.
+     !> Control mode defining the meaning of `control_value`.
+     !> Can be `simulationtime`, `tsteps`, `nsamples` or `never`.
+     character(len=:), allocatable :: control_mode
+     !> Defines the frequency of writes.
+     real(kind=rp) :: control_value
 
    contains
-    !> Constructor.
+     !> Constructor.
      procedure, pass(this) :: init => time_based_controller_init
-    !> Check if the execution should be performed.
+     !> Check if the execution should be performed.
      procedure, pass(this) :: check => time_based_controller_check
-    !> Increment `nexectutions`.
+     !> Increment `nexectutions`.
      procedure, pass(this) :: register_execution => &
        time_based_controller_register_execution
-    !> Set the counter based on a time (for restarts)
+     !> Set the counter based on a time (for restarts)
      procedure, pass(this) :: set_counter => &
        time_based_controller_set_counter
 
@@ -77,7 +82,7 @@ module time_based_controller
   end interface assignment(=)
 
 contains
-  
+
   !> Constructor.
   !! @param end_time The final simulation time.
   !! @param control_mode The way to interpret the `control_value` parameter.
@@ -90,29 +95,31 @@ subroutine time_based_controller_init(this, end_time, control_mode, &
     real(kind=rp), intent(in) :: control_value
 
     this%end_time = end_time
+    this%control_mode = control_mode
+    this%control_value = control_value
 
     if (trim(control_mode) .eq. 'simulationtime') then
-        this%time_interval = control_value
-        this%frequency = 1/this%time_interval
-        this%nsteps = 0 
+       this%time_interval = control_value
+       this%frequency = 1/this%time_interval
+       this%nsteps = 0
     else if (trim(control_mode) .eq. 'nsamples') then
-        if (control_value .le. 0) then
-           call neko_error("nsamples must be positive")
-        end if
-
-        this%frequency = control_value / end_time
-        this%time_interval = 1.0_rp / this%frequency
-        this%nsteps = 0
-    else if (trim(control_mode) .eq. 'tsteps') then 
-        this%nsteps = control_value
-        ! if the timestep will be variable, we cannot compute these.
-        this%frequency = 0
-        this%time_interval = 0
-    else if (trim(control_mode) .eq. 'never') then 
-        this%never = .true.
+       if (control_value .le. 0) then
+          call neko_error("nsamples must be positive")
+       end if
+
+       this%frequency = control_value / end_time
+       this%time_interval = 1.0_rp / this%frequency
+       this%nsteps = 0
+    else if (trim(control_mode) .eq. 'tsteps') then
+       this%nsteps = control_value
+       ! if the timestep will be variable, we cannot compute these.
+       this%frequency = 0
+       this%time_interval = 0
+    else if (trim(control_mode) .eq. 'never') then
+       this%never = .true.
     else
-        call neko_error("The control parameter must be simulationtime, nsamples&
-          & tsteps, or never, but received "//trim(control_mode))
+       call neko_error("The control parameter must be simulationtime, nsamples&
+       & tsteps, or never, but received "//trim(control_mode))
     end if
   end subroutine time_based_controller_init
 
@@ -139,18 +146,18 @@ function time_based_controller_check(this, t, tstep, force) result(check)
 
     check = .false.
     if (ifforce) then
-        check = .true.
+       check = .true.
     else if (this%never) then
-        check = .false.
+       check = .false.
     else if ( (this%nsteps .eq. 0) .and. &
               (t .ge. this%nexecutions * this%time_interval) ) then
-        check = .true.
+       check = .true.
     else if (this%nsteps .gt. 0) then
        if (mod(tstep, this%nsteps) .eq. 0) then
-        check = .true.
+          check = .true.
        end if
     end if
-  end function
+  end function time_based_controller_check
 
   !> Assignment operator. Simply copies attribute values.
   !! @param ctrl1 Left-hand side.
diff --git a/src/common/time_interpolator.f90 b/src/common/time_interpolator.f90
index 8f73c56632d..5851ce53375 100644
--- a/src/common/time_interpolator.f90
+++ b/src/common/time_interpolator.f90
@@ -76,7 +76,7 @@ end subroutine time_interpolator_init
   !> Destructor
   subroutine time_interpolator_free(this)
     class(time_interpolator_t), intent(inout) :: this
- 
+
   end subroutine time_interpolator_free
 
   !> Interpolate a field at time t from fields at time t-dt and t+dt
@@ -88,26 +88,26 @@ end subroutine time_interpolator_free
   !! @param f_future time in future for interpolation
   subroutine time_interpolator_interpolate(this, t, f, t_past, f_past, t_future, f_future)
     class(time_interpolator_t), intent(inout) :: this
-    real(kind=rp), intent(inout) :: t, t_past, t_future 
+    real(kind=rp), intent(inout) :: t, t_past, t_future
     type(field_t), intent(inout) :: f, f_past, f_future
     real(kind=rp) :: w_past, w_future !Weights for the interpolation
     integer :: n
 
     if (this%order .eq. 2) then
 
-      n = f%dof%size()
-      w_past   = ( t_future - t ) / ( t_future - t_past )
-      w_future = ( t - t_past ) / ( t_future - t_past )
-
-      if (NEKO_BCKND_DEVICE .eq. 1) then
-        call device_add3s2(f%x_d, f_past%x_d, f_future%x_d, &
-        w_past, w_future, n) 
-      else
-        call add3s2(f%x, f_past%x, f_future%x, w_past, w_future, n)
-      end if
-    
+       n = f%dof%size()
+       w_past   = ( t_future - t ) / ( t_future - t_past )
+       w_future = ( t - t_past ) / ( t_future - t_past )
+
+       if (NEKO_BCKND_DEVICE .eq. 1) then
+          call device_add3s2(f%x_d, f_past%x_d, f_future%x_d, &
+        w_past, w_future, n)
+       else
+          call add3s2(f%x, f_past%x, f_future%x, w_past, w_future, n)
+       end if
+
     else
-      call neko_error("Time interpolation of required order is not implemented")
+       call neko_error("Time interpolation of required order is not implemented")
     end if
 
   end subroutine time_interpolator_interpolate
diff --git a/src/common/time_step_controller.f90 b/src/common/time_step_controller.f90
new file mode 100644
index 00000000000..0737ef21041
--- /dev/null
+++ b/src/common/time_step_controller.f90
@@ -0,0 +1,149 @@
+! Copyright (c) 2022, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements type time_step_controller.
+module time_step_controller
+  use num_types
+  use logger
+  use json_module, only : json_file
+  use json_utils, only : json_get_or_default
+  implicit none
+  private
+
+  !> Provides a tool to set time step dt
+  type, public :: time_step_controller_t
+     !> Components recording time stepping info
+     logical :: if_variable_dt
+     real(kind=rp) :: set_cfl
+     real(kind=rp) :: max_dt
+     integer :: max_update_frequency
+     integer :: dt_last_change
+     real(kind=rp) :: alpha !coefficient of running average
+     real(kind=rp) :: max_dt_increase_factor, min_dt_decrease_factor
+   contains
+     !> Initialize object.
+     procedure, pass(this) :: init => time_step_controller_init
+     !> Set time stepping
+     procedure, pass(this) :: set_dt => time_step_controller_set_dt
+
+  end type time_step_controller_t
+
+contains
+
+  !> Constructor
+  !! @param order order of the interpolation
+  subroutine time_step_controller_init(this, params)
+    class(time_step_controller_t), intent(inout) :: this
+    type(json_file), intent(inout) :: params
+
+    this%dt_last_change = 0
+    call json_get_or_default(params, 'case.variable_timestep',&
+                                    this%if_variable_dt, .false.)
+    call json_get_or_default(params, 'case.target_cfl',&
+                                    this%set_cfl, 0.4_rp)
+    call json_get_or_default(params, 'case.max_timestep',&
+                                    this%max_dt, huge(0.0_rp))
+    call json_get_or_default(params, 'case.cfl_max_update_frequency',&
+                                    this%max_update_frequency, 0)
+    call json_get_or_default(params, 'case.cfl_running_avg_coeff',&
+                                    this%alpha, 0.5_rp)
+    call json_get_or_default(params, 'case.max_dt_increase_factor',&
+                                    this%max_dt_increase_factor, 1.2_rp)
+    call json_get_or_default(params, 'case.min_dt_decrease_factor',&
+                                    this%min_dt_decrease_factor, 0.5_rp)
+
+  end subroutine time_step_controller_init
+
+  !> Set new dt based on cfl if requested
+  !! @param dt time step in case_t.
+  !! @param cfl courant number of current iteration.
+  !! @param cfl_avrg average Courant number.
+  !! @param tstep the current time step.
+  !! @Algorithm: 
+  !! 1. Set the first time step such that cfl is the set one;
+  !! 2. During time-stepping, adjust dt when cfl_avrg is offset by 20%.
+  subroutine time_step_controller_set_dt(this, dt, cfl, cfl_avrg, tstep)
+    implicit none
+    class(time_step_controller_t), intent(inout) :: this
+    real(kind=rp), intent(inout) :: dt
+    real(kind=rp), intent(in) :: cfl
+    real(kind=rp), intent(inout) :: cfl_avrg
+    real(kind=rp) :: dt_old, scaling_factor
+    character(len=LOG_SIZE) :: log_buf    
+    integer, intent(in):: tstep
+
+    if (this%if_variable_dt .eqv. .true.) then
+       if (tstep .eq. 1) then
+          ! set the first dt for desired cfl
+          dt = min(this%set_cfl/cfl*dt, this%max_dt)
+       else
+          ! Calculate the average of cfl over the desired interval
+          cfl_avrg = this%alpha * cfl + (1-this%alpha) * cfl_avrg
+
+          if (abs(cfl_avrg - this%set_cfl) .ge. 0.2*this%set_cfl .and. &
+             this%dt_last_change .ge. this%max_update_frequency) then
+
+             if (this%set_cfl/cfl .ge. 1) then 
+                ! increase of time step
+                scaling_factor = min(this%max_dt_increase_factor, this%set_cfl/cfl) 
+             else
+                ! reduction of time step
+                scaling_factor = max(this%min_dt_decrease_factor, this%set_cfl/cfl) 
+             end if
+
+             dt_old = dt
+             dt = scaling_factor * dt_old
+             dt = min(dt, this%max_dt)
+
+             write(log_buf, '(A,E15.7,1x,A,E15.7)') 'Avrg CFL:', cfl_avrg, &
+                         'set_cfl:', this%set_cfl
+             call neko_log%message(log_buf)
+
+             write(log_buf, '(A,E15.7,1x,A,E15.7)') 'old dt:', dt_old, &
+                         'new dt:', dt
+             call neko_log%message(log_buf)
+
+             this%dt_last_change = 0
+
+          else
+             this%dt_last_change = this%dt_last_change + 1
+          end if
+       end if
+
+    end if
+
+  end subroutine time_step_controller_set_dt
+
+
+
+
+end module time_step_controller
diff --git a/src/common/user_intf.f90 b/src/common/user_intf.f90
index d095a66c59f..ce8ee27111c 100644
--- a/src/common/user_intf.f90
+++ b/src/common/user_intf.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2023, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -33,14 +33,18 @@
 !> Interfaces for user interaction with NEKO
 module user_intf
   use field
+  use field_list, only : field_list_t
   use fluid_user_source_term
-  use source_scalar
+  use scalar_user_source_term
   use coefs
+  use bc, only: bc_list_t
   use mesh
   use usr_inflow
   use usr_scalar
+  use field_dirichlet, only: field_dirichlet_update
   use num_types
   use json_module, only : json_file
+  use utils, only : neko_error,  neko_warning
   implicit none
   private
 
@@ -57,6 +61,16 @@ subroutine useric(u, v, w, p, params)
      end subroutine useric
   end interface
 
+  !> Abstract interface for user defined scalar initial conditions
+  abstract interface
+     subroutine useric_scalar(s, params)
+       import field_t
+       import json_file
+       type(field_t), intent(inout) :: s
+       type(json_file), intent(inout) :: params
+     end subroutine useric_scalar
+  end interface
+
   !> Abstract interface for initilialization of modules
   abstract interface
      subroutine user_initialize_modules(t, u, v, w, p, coef, params)
@@ -131,15 +145,17 @@ end subroutine user_material_properties
 
   type, public :: user_t
      procedure(useric), nopass, pointer :: fluid_user_ic => null()
+     procedure(useric_scalar), nopass, pointer :: scalar_user_ic => null()
      procedure(user_initialize_modules), nopass, pointer :: user_init_modules => null()
      procedure(usermsh), nopass, pointer :: user_mesh_setup => null()
      procedure(usercheck), nopass, pointer :: user_check => null()
      procedure(user_final_modules), nopass, pointer :: user_finalize_modules => null()
      procedure(fluid_source_compute_pointwise), nopass, pointer :: fluid_user_f => null()
      procedure(fluid_source_compute_vector), nopass, pointer :: fluid_user_f_vector => null()
-     procedure(source_scalar_term_pw), nopass, pointer :: scalar_user_f => null()
-     procedure(source_scalar_term), nopass, pointer :: scalar_user_f_vector => null()
+     procedure(scalar_source_compute_pointwise), nopass, pointer :: scalar_user_f => null()
+     procedure(scalar_source_compute_vector), nopass, pointer :: scalar_user_f_vector => null()
      procedure(usr_inflow_eval), nopass, pointer :: fluid_user_if => null()
+     procedure(field_dirichlet_update), nopass, pointer :: user_dirichlet_update => null()
      procedure(usr_scalar_bc_eval), nopass, pointer :: scalar_user_bc => null()
      !> Routine to set material properties
      procedure(user_material_properties), nopass, pointer :: material_properties => null()
@@ -147,10 +163,10 @@ end subroutine user_material_properties
      procedure, pass(u) :: init => user_intf_init
   end type user_t
 
-  public :: useric, user_initialize_modules, usermsh, &
+  public :: useric, useric_scalar, user_initialize_modules, usermsh, &
             dummy_user_material_properties, user_material_properties
 contains
-  
+
   !> User interface initialization
   subroutine user_intf_init(u)
     class(user_t), intent(inout) :: u
@@ -159,10 +175,14 @@ subroutine user_intf_init(u)
        u%fluid_user_ic => dummy_user_ic
     end if
 
+    if (.not. associated(u%scalar_user_ic)) then
+       u%scalar_user_ic => dummy_user_ic_scalar
+    end if
+
     if (.not. associated(u%fluid_user_f)) then
        u%fluid_user_f => dummy_user_f
     end if
-    
+
     if (.not. associated(u%fluid_user_f_vector)) then
        u%fluid_user_f_vector => dummy_user_f_vector
     end if
@@ -170,7 +190,7 @@ subroutine user_intf_init(u)
     if (.not. associated(u%scalar_user_f)) then
        u%scalar_user_f => dummy_scalar_user_f
     end if
-    
+
     if (.not. associated(u%scalar_user_f_vector)) then
        u%scalar_user_f_vector => dummy_user_scalar_f_vector
     end if
@@ -178,6 +198,10 @@ subroutine user_intf_init(u)
     if (.not. associated(u%scalar_user_bc)) then
        u%scalar_user_bc => dummy_scalar_user_bc
     end if
+
+    if (.not. associated(u%user_dirichlet_update)) then
+       u%user_dirichlet_update => dirichlet_do_nothing
+    end if
     
     if (.not. associated(u%user_mesh_setup)) then
        u%user_mesh_setup => dummy_user_mesh_setup
@@ -200,7 +224,7 @@ subroutine user_intf_init(u)
     end if
   end subroutine user_intf_init
 
-  
+
   !
   ! Below is the dummy user interface
   ! when running in pure turboNEKO mode
@@ -213,14 +237,23 @@ subroutine dummy_user_ic(u, v, w, p, params)
     type(field_t), intent(inout) :: w
     type(field_t), intent(inout) :: p
     type(json_file), intent(inout) :: params
-    call neko_error('Dummy user defined initial condition set')    
+    call neko_error('Dummy user defined initial condition set')
   end subroutine dummy_user_ic
 
+  !> Dummy user initial condition for scalar field
+  !! @param s Scalar field.
+  !! @param params JSON parameters.
+  subroutine dummy_user_ic_scalar(s, params)
+    type(field_t), intent(inout) :: s
+    type(json_file), intent(inout) :: params
+    call neko_error('Dummy user defined scalar initial condition set')
+  end subroutine dummy_user_ic_scalar
+
   !> Dummy user (fluid) forcing
   subroutine dummy_user_f_vector(f, t)
     class(fluid_user_source_term_t), intent(inout) :: f
     real(kind=rp), intent(in) :: t
-    call neko_error('Dummy user defined vector valued forcing set')    
+    call neko_error('Dummy user defined vector valued forcing set')
   end subroutine dummy_user_f_vector
 
   !> Dummy user (fluid) forcing
@@ -233,14 +266,14 @@ subroutine dummy_user_f(u, v, w, j, k, l, e, t)
     integer, intent(in) :: l
     integer, intent(in) :: e
     real(kind=rp), intent(in) :: t
-    call neko_error('Dummy user defined forcing set')    
+    call neko_error('Dummy user defined forcing set')
   end subroutine dummy_user_f
 
   !> Dummy user (scalar) forcing
   subroutine dummy_user_scalar_f_vector(f, t)
-    class(source_scalar_t), intent(inout) :: f
+    class(scalar_user_source_term_t), intent(inout) :: f
     real(kind=rp), intent(in) :: t
-    call neko_error('Dummy user defined vector valued forcing set')    
+    call neko_error('Dummy user defined vector valued forcing set')
   end subroutine dummy_user_scalar_f_vector
 
   !> Dummy user (scalar) forcing
@@ -251,9 +284,9 @@ subroutine dummy_scalar_user_f(s, j, k, l, e, t)
     integer, intent(in) :: l
     integer, intent(in) :: e
     real(kind=rp), intent(in) :: t
-    call neko_error('Dummy user defined forcing set')    
+    call neko_error('Dummy user defined forcing set')
   end subroutine dummy_scalar_user_f
- 
+
   !> Dummy user boundary condition for scalar
   subroutine dummy_scalar_user_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep)
     real(kind=rp), intent(inout) :: s
@@ -269,17 +302,17 @@ subroutine dummy_scalar_user_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep
     integer, intent(in) :: ie
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
-    call neko_warning('Dummy scalar user bc set, applied on all non-labeled zones')    
+    call neko_warning('Dummy scalar user bc set, applied on all non-labeled zones')
   end subroutine dummy_scalar_user_bc
- 
+
   !> Dummy user mesh apply
   subroutine dummy_user_mesh_setup(msh)
     type(mesh_t), intent(inout) :: msh
   end subroutine dummy_user_mesh_setup
-  
+
   !> Dummy user check
   subroutine dummy_user_check(t, tstep, u, v, w, p, coef, params)
-    real(kind=rp), intent(in) :: t    
+    real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
     type(field_t), intent(inout) :: u
     type(field_t), intent(inout) :: v
@@ -304,6 +337,16 @@ subroutine dummy_user_final_no_modules(t, params)
     type(json_file), intent(inout) :: params
   end subroutine dummy_user_final_no_modules
 
+  subroutine dirichlet_do_nothing(dirichlet_field_list, dirichlet_bc_list, &
+       coef, t, tstep, which_solver)
+    type(field_list_t), intent(inout) :: dirichlet_field_list
+    type(bc_list_t), intent(inout) :: dirichlet_bc_list
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    character(len=*), intent(in) :: which_solver
+  end subroutine dirichlet_do_nothing
+  
   subroutine dummy_user_material_properties(t, tstep, rho, mu, cp, lambda,&
                                             params)
     real(kind=rp), intent(in) :: t
diff --git a/src/common/utils.f90 b/src/common/utils.f90
index 35fb782ab23..1a4cc6eab90 100644
--- a/src/common/utils.f90
+++ b/src/common/utils.f90
@@ -40,23 +40,23 @@ module utils
   interface neko_error
      module procedure neko_error_plain, neko_error_msg
   end interface neko_error
-  
+
 contains
-  
+
   !> Find position (in the string) of a filename's suffix
   pure function filename_suffix_pos(fname) result(suffix_pos)
     character(len=*), intent(in) :: fname
     integer :: suffix_pos
-    suffix_pos = scan(trim(fname), '.', back=.true.)    
+    suffix_pos = scan(trim(fname), '.', back=.true.)
   end function filename_suffix_pos
 
   !> Find position (in the string) of a filename's trailing slash
   pure function filename_tslash_pos(fname) result(tslash_pos)
     character(len=*), intent(in) :: fname
     integer :: tslash_pos
-    tslash_pos = scan(trim(fname), '/', back=.true.)    
+    tslash_pos = scan(trim(fname), '/', back=.true.)
   end function filename_tslash_pos
-  
+
   !> Extract a filename's suffix
   subroutine filename_suffix(fname, suffix)
     character(len=*) :: fname
@@ -76,12 +76,53 @@ subroutine filename_chsuffix(fname, new_fname, new_suffix)
 
   end subroutine filename_chsuffix
 
+  !> Split a string based on delimiter (tokenizer)
+  !! OBS: very hacky, this should really be improved, it is rather embarrasing code.
+  function split_string(string, delimiter) result(split_str)
+    character(len=*) :: string
+    character(len=*) :: delimiter
+    character(len=100), allocatable :: split_str(:)
+    integer :: length, i, i2,offset, j
+    i = 0
+    offset = 1
+    length = 1
+    if (len(trim(string)) .eq. 0) then 
+       allocate(split_str(1))
+       split_str(1) = trim(string)
+       return
+    end if
+    do while( .true.)
+       i = scan(string(offset:), delimiter, back=.false.) 
+       if (i .eq. 0) exit
+       length = length + 1
+       offset = offset + i
+    end do
+
+    allocate(split_str(length))
+    i = 0
+    j = 1
+    offset=1
+    do while( .true.)
+       i2 = scan(trim(string(offset:)), delimiter, back=.false.) 
+       if (i2 .eq. 0) then
+          split_str(j) = trim(string(offset:)) 
+          exit
+       end if
+       split_str(j) = trim(string(offset:offset+i2-2))
+       offset = offset+i2
+       j = j + 1
+    end do
+  end function split_string
+
+
+
+
   !> Compute the address of a (i,j,k,l) array
   !! with sizes (1:lx, 1:ly, 1:lz, :)
   pure function linear_index(i,j,k,l,lx,ly,lz) result(index)
     integer, intent(in) :: i, j, k, l, lx, ly, lz
     integer :: index
-    
+
     index = (i + lx * ((j - 1) + ly * ((k - 1) + lz * ((l - 1)))))
   end function linear_index
 
@@ -104,11 +145,11 @@ pure function index_is_on_facet(i, j, k, lx, ly, lz, facet) result(is_on)
     case(6)
        if (k .eq. lz) is_on = .true.
     end select
-  
+
 
   end function index_is_on_facet
-  
- 
+
+
   !> Compute (i,j,k,l) array given linear index
   !! with sizes (1:lx, 1:ly, 1:lz, :)
   pure function nonlinear_index(linear_index,lx,ly,lz) result(index)
@@ -116,7 +157,7 @@ pure function nonlinear_index(linear_index,lx,ly,lz) result(index)
     integer :: index(4)
     integer :: lin_idx
     lin_idx = linear_index -1
-    index(4) = lin_idx/(lx*ly*lz) 
+    index(4) = lin_idx/(lx*ly*lz)
     index(3) = (lin_idx-(lx*ly*lz)*index(4))/(lx*ly)
     index(2) = (lin_idx-(lx*ly*lz)*index(4)-(lx*ly)*index(3))/lx
     index(1) = (lin_idx-(lx*ly*lz)*index(4)-(lx*ly)*index(3)-lx*index(2))
@@ -150,5 +191,5 @@ subroutine neko_warning(warning_msg)
     character(len=*) :: warning_msg
     write(*,*) '*** WARNING: ', warning_msg,' ***'
   end subroutine neko_warning
-    
+
 end module utils
diff --git a/src/device/cuda/nvtx.F90 b/src/device/cuda/nvtx.F90
index 32d7bac9bce..2fac8243211 100644
--- a/src/device/cuda/nvtx.F90
+++ b/src/device/cuda/nvtx.F90
@@ -1,42 +1,85 @@
 !> Interface to NVTX
+!! Based on https://github.com/maxcuda/NVTX_example
 module nvtx
   use, intrinsic :: iso_c_binding
   implicit none
   private
 
   integer, parameter :: NVTX_MAX_LEN = 256
+  integer, parameter :: color(24) = [int(Z'00A6CEE3'), int(Z'001F78B4'), &
+                                     int(Z'00B2DF8A'), int(Z'0033A02C'), &
+                                     int(Z'00FB9A99'), int(Z'00E31A1C'), &
+                                     int(Z'00FDBF6F'), int(Z'00FF7F00'), &
+                                     int(Z'00CAB2D6'), int(Z'006A3D9A'), &
+                                     int(Z'00FFFF99'), int(Z'00B15928'), &
+                                     int(Z'008DD3C7'), int(Z'00FFFFB3'), &
+                                     int(Z'00BEBADA'), int(Z'00FB8072'), &
+                                     int(Z'0080B1D3'), int(Z'00FDB462'), &
+                                     int(Z'00B3DE69'), int(Z'00FCCDE5'), &
+                                     int(Z'00D9D9D9'), int(Z'00BC89BD'), &
+                                     int(Z'00CCEBC5'), int(Z'00FFED6F')]
 
 #ifdef HAVE_NVTX
+
+  type, bind(c) :: nvtxEventAttributes
+     integer(c_int16_t) :: version = 1
+     integer(c_int16_t) :: size = 48
+     integer(c_int32_t) :: category = 0
+     integer(c_int32_t) :: colortype = 1
+     integer(c_int32_t) :: color
+     integer(c_int32_t) :: payloadtype = 0
+     integer(c_int32_t) :: reserved0
+     integer(c_int64_t) :: payload
+     integer(c_int) :: messagetype = 1
+     type(c_ptr) :: message
+  end type nvtxEventAttributes
+
   interface nvtxRangePushA
      subroutine nvtxRangePushA(name) bind(C, name='nvtxRangePushA')
        use iso_c_binding
        character(kind=c_char) :: name(256)
      end subroutine nvtxRangePushA
   end interface nvtxRangePushA
-  
+
+  interface nvtxRangePushEx
+     subroutine nvtxRangePushEx(event) bind(C, name='nvtxRangePushEx')
+       use iso_c_binding
+       import :: nvtxEventAttributes
+       type(nvtxEventAttributes) :: event
+     end subroutine nvtxRangePushEx
+  end interface nvtxRangePushEx
+
   interface nvtxRangePop
      subroutine nvtxRangePop() bind(C, name='nvtxRangePop')
      end subroutine nvtxRangePop
   end interface nvtxRangePop
 
   public :: nvtxStartRange, nvtxRangePushA, nvtxRangePop
-  
+
 contains
-  
-  subroutine nvtxStartRange(name)
+
+  subroutine nvtxStartRange(name, region_id)
     character(kind=c_char,len=*) :: name
-    character :: c_name(NVTX_MAX_LEN)
-    integer:: i, str_len
-    
-    str_len = len(trim(name))    
+    integer, optional :: region_id
+    type(nvtxEventAttributes) :: event
+    character, target :: c_name(NVTX_MAX_LEN)
+    integer :: i, str_len
+
+    str_len = len(trim(name))
     do i = 1, len(trim(name))
        c_name(i) = name(i:i)
     end do
     c_name(str_len+1) = C_NULL_CHAR
-    
-    call nvtxRangePushA(c_name)
+
+    if (present(region_id)) then
+       event%color = color(mod(region_id, 24) + 1)
+       event%message = c_loc(c_name)
+       call nvtxRangePushEx(event)
+    else
+       call nvtxRangePushA(c_name)
+    end if
 
   end subroutine nvtxStartRange
-  
+
 #endif
 end module nvtx
diff --git a/src/device/cuda_intf.F90 b/src/device/cuda_intf.F90
index 8b86d53bc33..37549d2a84b 100644
--- a/src/device/cuda_intf.F90
+++ b/src/device/cuda_intf.F90
@@ -50,22 +50,22 @@ module cuda_intf
 
   !> Low priority stream setting
   integer :: STRM_LOW_PRIO
-  
+
   !> Enum @a cudaError
   enum, bind(c)
-     enumerator :: cudaSuccess = 0
-     enumerator :: cudaErrorInvalidValue = 1
-     enumerator :: cudaErrorMemoryAllocation = 2
-     enumerator :: cudaErrorInitializationError = 3
+    enumerator :: cudaSuccess = 0
+    enumerator :: cudaErrorInvalidValue = 1
+    enumerator :: cudaErrorMemoryAllocation = 2
+    enumerator :: cudaErrorInitializationError = 3
   end enum
 
   !> Enum @a cudaMemcpyKind
   enum, bind(c)
-     enumerator :: cudaMemcpyHostToHost = 0
-     enumerator :: cudaMemcpyHostToDevice = 1
-     enumerator :: cudaMemcpyDeviceToHost = 2
-     enumerator :: cudaMemcpyDevicetoDevice = 3
-     enumerator :: cudaMemcpyDefault = 4
+    enumerator :: cudaMemcpyHostToHost = 0
+    enumerator :: cudaMemcpyHostToDevice = 1
+    enumerator :: cudaMemcpyDeviceToHost = 2
+    enumerator :: cudaMemcpyDevicetoDevice = 3
+    enumerator :: cudaMemcpyDefault = 4
   end enum
 
   interface
@@ -86,7 +86,7 @@ integer (c_int) function cudaFree(ptr_d) &
        type(c_ptr), value :: ptr_d
      end function cudaFree
   end interface
-  
+
   interface
      integer (c_int) function cudaMemcpy(ptr_dst, ptr_src, s, dir) &
           bind(c, name='cudaMemcpy')
@@ -108,7 +108,7 @@ integer (c_int) function cudaMemcpyAsync(ptr_dst, ptr_src, s, dir, stream) &
        integer(c_int), value :: dir
      end function cudaMemcpyAsync
   end interface
-  
+
   interface
      integer (c_int) function cudaDeviceSynchronize() &
           bind(c, name='cudaDeviceSynchronize')
@@ -155,7 +155,7 @@ integer (c_int) function cudaStreamCreateWithPriority(stream, flags, prio) &
        integer(c_int), value :: flags, prio
      end function cudaStreamCreateWithPriority
   end interface
-  
+
   interface
      integer (c_int) function cudaStreamDestroy(steam) &
           bind(c, name='cudaStreamDestroy')
@@ -165,7 +165,7 @@ integer (c_int) function cudaStreamDestroy(steam) &
      end function cudaStreamDestroy
   end interface
 
-  interface 
+  interface
      integer (c_int) function cudaStreamSynchronize(stream) &
           bind(c, name='cudaStreamSynchronize')
        use, intrinsic :: iso_c_binding
@@ -174,7 +174,7 @@ integer (c_int) function cudaStreamSynchronize(stream) &
      end function cudaStreamSynchronize
   end interface
 
-  interface 
+  interface
      integer (c_int) function cudaStreamWaitEvent(stream, event, flags) &
           bind(c, name='cudaStreamWaitEvent')
        use, intrinsic :: iso_c_binding
@@ -192,7 +192,7 @@ integer (c_int) function cudaDeviceGetStreamPriorityRange(low_prio, high_prio) &
        integer(c_int) :: low_prio, high_prio
      end function cudaDeviceGetStreamPriorityRange
   end interface
-  
+
   interface
      integer (c_int) function cudaProfilerStart() &
           bind(c, name='cudaProfilerStart')
@@ -226,7 +226,7 @@ integer (c_int) function cudaEventDestroy(event) &
        type(c_ptr), value :: event
      end function cudaEventDestroy
   end interface
-  
+
   interface
      integer (c_int) function cudaEventCreateWithFlags(event, flags) &
           bind(c, name='cudaEventCreateWithFlags')
@@ -246,7 +246,7 @@ integer (c_int) function cudaEventRecord(event, stream) &
      end function cudaEventRecord
   end interface
 
-  interface 
+  interface
      integer (c_int) function cudaEventSynchronize(event) &
           bind(c, name='cudaEventSynchronize')
        use, intrinsic :: iso_c_binding
@@ -272,7 +272,7 @@ integer (c_int) function cudaSetDevice(device) &
        integer(c_int), value :: device
      end function cudaSetDevice
   end interface
-  
+
 contains
 
   subroutine cuda_init
@@ -297,10 +297,10 @@ subroutine cuda_init
        end if
        !$omp end parallel
     end if
-    
+
     if (cudaDeviceGetStreamPriorityRange(STRM_LOW_PRIO, STRM_HIGH_PRIO) &
          .ne. cudaSuccess) then
-       call neko_error('Error retrieving stream priority range')       
+       call neko_error('Error retrieving stream priority range')
     end if
 
     if (cudaStreamCreateWithPriority(glb_cmd_queue, 1, STRM_HIGH_PRIO)  &
@@ -323,7 +323,7 @@ subroutine cuda_finalize
        call neko_error('Error destroying aux stream')
     end if
   end subroutine cuda_finalize
-  
+
   subroutine cuda_device_name(name)
     character(len=*), intent(inout) :: name
     character(kind=c_char, len=8192), target :: prop
@@ -337,17 +337,17 @@ subroutine cuda_device_name(name)
     !
     ! This will of course break if sizeof(cudaDeviceProp) > 8192
     !
-    
+
     if (cudaGetDeviceProperties(c_loc(prop), 0) .ne. cudaSuccess) then
        call neko_error('Failed to query device')
     end if
-    
+
     end_pos = scan(prop(1:256), C_NULL_CHAR)
     if(end_pos .ge. 2) then
        name(1:end_pos-1) = prop(1:end_pos-1)
     endif
   end subroutine cuda_device_name
-  
+
 #endif
- 
+
 end module cuda_intf
diff --git a/src/device/device.F90 b/src/device/device.F90
index e8e7d161e19..89bf48cf7a0 100644
--- a/src/device/device.F90
+++ b/src/device/device.F90
@@ -38,7 +38,7 @@ module device
   use hip_intf
   use htable, only : htable_cptr_t, h_cptr_t
   use utils, only : neko_error
-  use dummy_device  
+  use dummy_device
   use opencl_prgm_lib
   use, intrinsic :: iso_c_binding
   implicit none
@@ -76,7 +76,7 @@ module device
      module procedure device_deassociate_r1, device_deassociate_r2, &
           device_deassociate_r3, device_deassociate_r4
   end interface device_deassociate
-  
+
   !> Return the device pointer for an associated Fortran array
   interface device_get_ptr
      module procedure device_get_ptr_r1, device_get_ptr_r2, &
@@ -87,7 +87,7 @@ module device
   interface device_sync
      module procedure device_sync_device, device_sync_stream
   end interface device_sync
-      
+
   !> Table of host to device address mappings
   type(htable_cptr_t), private :: device_addrtbl
 
@@ -100,7 +100,7 @@ module device
        device_stream_wait_event
 
   private :: device_memcpy_common
-  
+
 contains
 
   subroutine device_init
@@ -115,7 +115,7 @@ subroutine device_init
     call opencl_init
 #endif
 
-#endif    
+#endif
   end subroutine device_init
 
   subroutine device_finalize
@@ -145,7 +145,7 @@ subroutine device_name(name)
     call opencl_device_name(name)
 #endif
   end subroutine device_name
-  
+
   !> Allocate memory on the device
   subroutine device_alloc(x_d, s)
     type(c_ptr), intent(inout) :: x_d
@@ -192,21 +192,10 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
     class(*), intent(inout), target :: x(:)
     type(c_ptr), intent(inout) :: x_d
     integer, intent(in), value :: dir
-    logical, optional :: sync
+    logical :: sync
     type(c_ptr), optional :: strm
     type(c_ptr) :: ptr_h, copy_stream
     integer(c_size_t) :: s
-    logical :: sync_device
-
-    if (present(sync)) then
-       sync_device = sync
-    else
-#if defined(HAVE_CUDA) || defined (HAVE_HIP)
-       sync_device = .false.
-#else
-       sync_device = .true.
-#endif
-    end if
 
     if (present(strm)) then
        copy_stream = strm
@@ -218,7 +207,7 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
     type is (integer)
        s = n * 4
        ptr_h = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        s = n * 8
        ptr_h = c_loc(x)
     type is (real)
@@ -231,8 +220,8 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm)
        call neko_error('Unknown Fortran type')
     end select
 
-    call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream)
-    
+    call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
+
   end subroutine device_memcpy_r1
 
   !> Copy data between host and device (rank 2 arrays)
@@ -241,21 +230,10 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
     class(*), intent(inout), target :: x(:,:)
     type(c_ptr), intent(inout) :: x_d
     integer, intent(in), value :: dir
-    logical, optional :: sync
+    logical :: sync
     type(c_ptr), optional :: strm
     type(c_ptr) :: ptr_h, copy_stream
     integer(c_size_t) :: s
-    logical :: sync_device
-    
-    if (present(sync)) then
-       sync_device = sync
-    else
-#if defined(HAVE_CUDA) || defined (HAVE_HIP)
-       sync_device = .false.
-#else
-       sync_device = .true.
-#endif
-    end if
 
     if (present(strm)) then
        copy_stream = strm
@@ -267,7 +245,7 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
     type is (integer)
        s = n * 4
        ptr_h = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        s = n * 8
        ptr_h = c_loc(x)
     type is (real)
@@ -280,8 +258,8 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm)
        call neko_error('Unknown Fortran type')
     end select
 
-    call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream)
-    
+    call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
+
   end subroutine device_memcpy_r2
 
   !> Copy data between host and device (rank 3 arrays)
@@ -290,33 +268,22 @@ subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
     class(*), intent(inout), target :: x(:,:,:)
     type(c_ptr), intent(inout) :: x_d
     integer, intent(in), value :: dir
-    logical, optional :: sync
+    logical :: sync
     type(c_ptr), optional :: strm
     type(c_ptr) :: ptr_h, copy_stream
     integer(c_size_t) :: s
-    logical :: sync_device
-
-    if (present(sync)) then
-       sync_device = sync
-    else
-#if defined(HAVE_CUDA) || defined (HAVE_HIP)
-       sync_device = .false.
-#else
-       sync_device = .true.
-#endif
-    end if
 
     if (present(strm)) then
        copy_stream = strm
     else
        copy_stream = glb_cmd_queue
     end if
-    
+
     select type(x)
     type is (integer)
        s = n * 4
        ptr_h = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        s = n * 8
        ptr_h = c_loc(x)
     type is (real)
@@ -329,8 +296,8 @@ subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm)
        call neko_error('Unknown Fortran type')
     end select
 
-    call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream)
-    
+    call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
+
   end subroutine device_memcpy_r3
 
   !> Copy data between host and device (rank 4 arrays)
@@ -339,33 +306,22 @@ subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
     class(*), intent(inout), target :: x(:,:,:,:)
     type(c_ptr), intent(inout) :: x_d
     integer, intent(in), value :: dir
-    logical, optional :: sync
+    logical :: sync
     type(c_ptr), optional :: strm
     type(c_ptr) :: ptr_h, copy_stream
-    integer(c_size_t) :: s    
-    logical :: sync_device
-
-    if (present(sync)) then
-       sync_device = sync
-    else
-#if defined(HAVE_CUDA) || defined (HAVE_HIP)
-       sync_device = .false.
-#else
-       sync_device = .true.
-#endif
-    end if
+    integer(c_size_t) :: s
 
     if (present(strm)) then
        copy_stream = strm
     else
        copy_stream = glb_cmd_queue
     end if
-    
+
     select type(x)
     type is (integer)
        s = n * 4
        ptr_h = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        s = n * 8
        ptr_h = c_loc(x)
     type is (real)
@@ -378,8 +334,8 @@ subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm)
        call neko_error('Unknown Fortran type')
     end select
 
-    call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream)
-    
+    call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream)
+
   end subroutine device_memcpy_r4
 
   !> Copy data between host and device (or device and device) (c-pointers)
@@ -398,11 +354,7 @@ subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
     if (present(sync)) then
        sync_device = sync
     else
-#if defined(HAVE_CUDA) || defined (HAVE_HIP)
        sync_device = .false.
-#else
-       sync_device = .true.
-#endif
     end if
 
     if (present(strm)) then
@@ -412,9 +364,9 @@ subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm)
     end if
 
     call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream)
-    
+
   end subroutine device_memcpy_cptr
-  
+
   !> Copy data between host and device
   !! @note For device to device copies, @a ptr_h is assumed
   !! to be the dst device pointer
@@ -431,12 +383,12 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
             hipMemcpyHostToDevice, stream) .ne. hipSuccess) then
           call neko_error('Device memcpy async (host-to-device) failed')
        end if
-    else if (dir .eq. DEVICE_TO_HOST) then       
+    else if (dir .eq. DEVICE_TO_HOST) then
        if (hipMemcpyAsync(ptr_h, x_d, s, &
             hipMemcpyDeviceToHost, stream) .ne. hipSuccess) then
           call neko_error('Device memcpy async (device-to-host) failed')
        end if
-    else if (dir .eq. DEVICE_TO_DEVICE) then       
+    else if (dir .eq. DEVICE_TO_DEVICE) then
        if (hipMemcpyAsync(ptr_h, x_d, s, &
             hipMemcpyDeviceToDevice, stream) .ne. hipSuccess) then
           call neko_error('Device memcpy async (device-to-device) failed')
@@ -453,12 +405,12 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
             cudaMemcpyHostToDevice, stream) .ne. cudaSuccess) then
           call neko_error('Device memcpy async (host-to-device) failed')
        end if
-    else if (dir .eq. DEVICE_TO_HOST) then       
+    else if (dir .eq. DEVICE_TO_HOST) then
        if (cudaMemcpyAsync(ptr_h, x_d, s, &
             cudaMemcpyDeviceToHost, stream) .ne. cudaSuccess) then
           call neko_error('Device memcpy async (device-to-host) failed')
        end if
-    else if (dir .eq. DEVICE_TO_DEVICE) then       
+    else if (dir .eq. DEVICE_TO_DEVICE) then
        if (cudaMemcpyAsync(ptr_h, x_d, s, &
             cudaMemcpyDeviceToDevice, stream) .ne. cudaSuccess) then
           call neko_error('Device memcpy async (device-to-device) failed')
@@ -479,7 +431,7 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
        else if (dir .eq. DEVICE_TO_HOST) then
           if (clEnqueueReadBuffer(glb_cmd_queue, x_d, CL_TRUE, 0_i8, s, ptr_h, &
                0, C_NULL_PTR, C_NULL_PTR) .ne. CL_SUCCESS) then
-             call neko_error('Device memcpy (host-to-device) failed')
+             call neko_error('Device memcpy (device-to-host) failed')
           end if
        else if (dir .eq. DEVICE_TO_DEVICE) then
           if (clEnqueueCopyBuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
@@ -498,7 +450,7 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream)
        else if (dir .eq. DEVICE_TO_HOST) then
           if (clEnqueueReadBuffer(glb_cmd_queue, x_d, CL_FALSE, 0_i8, s, ptr_h, &
                0, C_NULL_PTR, C_NULL_PTR) .ne. CL_SUCCESS) then
-             call neko_error('Device memcpy (host-to-device) failed')
+             call neko_error('Device memcpy (device-to-host) failed')
           end if
        else if (dir .eq. DEVICE_TO_DEVICE) then
           if (clEnqueueCopyBuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, &
@@ -521,7 +473,7 @@ subroutine device_associate_r1(x, x_d)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -532,7 +484,7 @@ subroutine device_associate_r1(x, x_d)
     end select
 
     htbl_ptr_d%ptr = x_d
-    
+
     call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
 
   end subroutine device_associate_r1
@@ -546,7 +498,7 @@ subroutine device_associate_r2(x, x_d)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -557,7 +509,7 @@ subroutine device_associate_r2(x, x_d)
     end select
 
     htbl_ptr_d%ptr = x_d
-    
+
     call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
 
   end subroutine device_associate_r2
@@ -571,7 +523,7 @@ subroutine device_associate_r3(x, x_d)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -582,7 +534,7 @@ subroutine device_associate_r3(x, x_d)
     end select
 
     htbl_ptr_d%ptr = x_d
-    
+
     call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
 
   end subroutine device_associate_r3
@@ -596,7 +548,7 @@ subroutine device_associate_r4(x, x_d)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -607,7 +559,7 @@ subroutine device_associate_r4(x, x_d)
     end select
 
     htbl_ptr_d%ptr = x_d
-    
+
     call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d)
 
   end subroutine device_associate_r4
@@ -620,7 +572,7 @@ subroutine device_deassociate_r1(x)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -644,7 +596,7 @@ subroutine device_deassociate_r2(x)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -653,7 +605,7 @@ subroutine device_deassociate_r2(x)
     class default
        call neko_error('Unknown Fortran type')
     end select
-    
+
     if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
        call device_addrtbl%remove(htbl_ptr_h)
     end if
@@ -668,7 +620,7 @@ subroutine device_deassociate_r3(x)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -692,7 +644,7 @@ subroutine device_deassociate_r4(x)
     select type(x)
     type is (integer)
        htbl_ptr_h%ptr = c_loc(x)
-    type is (integer(i8))       
+    type is (integer(i8))
        htbl_ptr_h%ptr = c_loc(x)
     type is (real)
        htbl_ptr_h%ptr = c_loc(x)
@@ -707,7 +659,7 @@ subroutine device_deassociate_r4(x)
     end if
 
   end subroutine device_deassociate_r4
-  
+
   !> Map a Fortran rank 1 array to a device (allocate and associate)
   subroutine device_map_r1(x, x_d, n)
     integer, intent(in) :: n
@@ -846,9 +798,9 @@ function device_associated_r1(x) result(assoc)
     if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
        assoc = .true.
     else
-       assoc = .false.       
+       assoc = .false.
     end if
-    
+
   end function device_associated_r1
 
   !> Check if a Fortran rank 2 array is assoicated with a device pointer
@@ -873,9 +825,9 @@ function device_associated_r2(x) result(assoc)
     if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
        assoc = .true.
     else
-       assoc = .false.       
+       assoc = .false.
     end if
-    
+
   end function device_associated_r2
 
   !> Check if a Fortran rank 3 array is assoicated with a device pointer
@@ -900,9 +852,9 @@ function device_associated_r3(x) result(assoc)
     if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
        assoc = .true.
     else
-       assoc = .false.       
+       assoc = .false.
     end if
-    
+
   end function device_associated_r3
 
   !> Check if a Fortran rank 4 array is assoicated with a device pointer
@@ -927,9 +879,9 @@ function device_associated_r4(x) result(assoc)
     if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then
        assoc = .true.
     else
-       assoc = .false.       
+       assoc = .false.
     end if
-    
+
   end function device_associated_r4
 
   !> Return the device pointer for an associated Fortran rank 1 array
@@ -1043,7 +995,7 @@ function device_get_ptr_r4(x)
        call neko_error('Array not associated with device')
     end if
   end function device_get_ptr_r4
-  
+
   !> Synchronize the device
   subroutine device_sync_device()
 #ifdef HAVE_HIP
@@ -1108,7 +1060,7 @@ subroutine device_stream_create(stream, flags)
     stream = clCreateCommandQueue(glb_ctx, glb_device_id, 0_i8, ierr)
     if (ierr .ne. CL_SUCCESS) then
        call neko_error('Error during stream create')
-    end if       
+    end if
 #endif
   end subroutine device_stream_create
 
@@ -1141,7 +1093,7 @@ subroutine device_stream_destroy(stream)
        call neko_error('Error during stream destroy')
     end if
 #elif HAVE_OPENCL
-    if (clReleaseCommandQueue(stream) .eq. CL_SUCCESS) then
+    if (clReleaseCommandQueue(stream) .ne. CL_SUCCESS) then
        call neko_error('Error during stream destroy')
     end if
 #endif
@@ -1166,7 +1118,7 @@ subroutine device_stream_wait_event(stream, event, flags)
 !    end if
 #endif
   end subroutine device_stream_wait_event
-  
+
   !> Start device profiling
   subroutine device_profiler_start()
 #if HAVE_CUDA
@@ -1175,7 +1127,7 @@ subroutine device_profiler_start()
     end if
 #endif
   end subroutine device_profiler_start
-  
+
   !> Stop device profiling
   subroutine device_profiler_stop()
 #if HAVE_CUDA
@@ -1232,7 +1184,7 @@ subroutine device_event_destroy(event)
     end if
 #endif
   end subroutine device_event_destroy
-  
+
   !> Record a device event
   subroutine device_event_record(event, stream)
     type(c_ptr), intent(in) :: event
@@ -1264,11 +1216,11 @@ subroutine device_event_sync(event)
        call neko_error('Error during event sync')
     end if
 #elif HAVE_OPENCL
-    if (clWaitForEvents(1, event) .eq. CL_SUCCESS) then
+    if (clWaitForEvents(1, event) .ne. CL_SUCCESS) then
        call neko_error('Error during event sync')
     end if
 #endif
   end subroutine device_event_sync
 
-  
+
 end module device
diff --git a/src/device/device_config.h.in b/src/device/device_config.h.in
index d38ccce302d..f829d6517dd 100644
--- a/src/device/device_config.h.in
+++ b/src/device/device_config.h.in
@@ -1,3 +1,6 @@
+#ifndef __DEVICE_DEVICE_CONFIG__
+#define __DEVICE_DEVICE_CONFIG__
+
 /**
  * Parameters and options for device backends
  */
@@ -11,3 +14,5 @@ typedef @NEKO_DEV_REAL_TYPE@ real;
 extern void *glb_ctx;
 extern void *glb_cmd_queue;
 extern void *glb_device_id;
+
+#endif // __DEVICE_DEVICE_CONFIG__
\ No newline at end of file
diff --git a/src/device/dummy_device.F90 b/src/device/dummy_device.F90
index 07bc2d0e82b..babbfb9f99f 100644
--- a/src/device/dummy_device.F90
+++ b/src/device/dummy_device.F90
@@ -45,5 +45,5 @@ module dummy_device
   type(c_ptr), public, bind(c) :: aux_cmd_queue = C_NULL_PTR
 
 #endif
-  
+
 end module dummy_device
diff --git a/src/device/hip/roctx.F90 b/src/device/hip/roctx.F90
index 24914ea740d..cc475eb492b 100644
--- a/src/device/hip/roctx.F90
+++ b/src/device/hip/roctx.F90
@@ -3,7 +3,7 @@ module roctx
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   integer, parameter :: ROCTX_MAX_LEN = 256
 
 #ifdef HAVE_ROCTX
@@ -13,30 +13,30 @@ subroutine roctxRangePushA(name) bind(C, name='roctxRangePushA')
        character(kind=c_char) :: name(256)
      end subroutine roctxRangePushA
   end interface roctxRangePushA
-  
+
   interface roctxRangePop
      subroutine roctxRangePop() bind(C, name='roctxRangePop')
      end subroutine roctxRangePop
   end interface roctxRangePop
 
   public :: roctxStartRange, roctxRangePushA, roctxRangePop
-  
+
 contains
-  
+
   subroutine roctxStartRange(name)
     character(kind=c_char,len=*) :: name
     character :: c_name(ROCTX_MAX_LEN)
     integer:: i, str_len
-    
-    str_len = len(trim(name))    
+
+    str_len = len(trim(name))
     do i = 1, len(trim(name))
        c_name(i) = name(i:i)
     end do
     c_name(str_len+1) = C_NULL_CHAR
-    
+
     call roctxRangePushA(c_name)
 
   end subroutine roctxStartRange
-  
+
 #endif
 end module roctx
diff --git a/src/device/hip_intf.F90 b/src/device/hip_intf.F90
index 87a3c8ddb97..bbddaaeff19 100644
--- a/src/device/hip_intf.F90
+++ b/src/device/hip_intf.F90
@@ -49,43 +49,43 @@ module hip_intf
 
   !> Low priority stream setting
   integer :: STRM_LOW_PRIO
-  
+
   !> Enum @a hipError_t
   enum, bind(c)
-     enumerator :: hipSuccess = 0
-     enumerator :: hipErrorInvalidContext = 1
-     enumerator :: hipErrorInvalidKernelFile = 2
-     enumerator :: hipErrorMemoryAllocation = 3
-     enumerator :: hipErrorInitializationError = 4
-     enumerator :: hipErrorLaunchFailure = 5
-     enumerator :: hipErrorLaunchOutOfResources = 6
-     enumerator :: hipErrorInvalidDevice = 7
-     enumerator :: hipErrorInvalidValue = 8
-     enumerator :: hipErrorInvalidDevicePointer = 9
-     enumerator :: hipErrorInvalidMemcpyDirection = 10
-     enumerator :: hipErrorUnknown = 11
-     enumerator :: hipErrorInvalidResourceHandle = 12
-     enumerator :: hipErrorNotReady = 13
-     enumerator :: hipErrorNoDevice = 14
-     enumerator :: hipErrorPeerAccessAlreadyEnabled = 15
-     enumerator :: hipErrorPeerAccessNotEnabled = 16
-     enumerator :: hipErrorRuntimeMemory = 17
-     enumerator :: hipErrorRuntimeOther = 18
-     enumerator :: hipErrorHostMemoryAlreadyRegistered = 19
-     enumerator :: hipErrorHostMemoryNotRegistered = 20
-     enumerator :: hipErrorMapBufferObjectFailed = 21
-     enumerator :: hipErrorTbd = 22
+    enumerator :: hipSuccess = 0
+    enumerator :: hipErrorInvalidContext = 1
+    enumerator :: hipErrorInvalidKernelFile = 2
+    enumerator :: hipErrorMemoryAllocation = 3
+    enumerator :: hipErrorInitializationError = 4
+    enumerator :: hipErrorLaunchFailure = 5
+    enumerator :: hipErrorLaunchOutOfResources = 6
+    enumerator :: hipErrorInvalidDevice = 7
+    enumerator :: hipErrorInvalidValue = 8
+    enumerator :: hipErrorInvalidDevicePointer = 9
+    enumerator :: hipErrorInvalidMemcpyDirection = 10
+    enumerator :: hipErrorUnknown = 11
+    enumerator :: hipErrorInvalidResourceHandle = 12
+    enumerator :: hipErrorNotReady = 13
+    enumerator :: hipErrorNoDevice = 14
+    enumerator :: hipErrorPeerAccessAlreadyEnabled = 15
+    enumerator :: hipErrorPeerAccessNotEnabled = 16
+    enumerator :: hipErrorRuntimeMemory = 17
+    enumerator :: hipErrorRuntimeOther = 18
+    enumerator :: hipErrorHostMemoryAlreadyRegistered = 19
+    enumerator :: hipErrorHostMemoryNotRegistered = 20
+    enumerator :: hipErrorMapBufferObjectFailed = 21
+    enumerator :: hipErrorTbd = 22
   end enum
-  
+
   !> Enum @a hipMemcpyKind
   enum, bind(c)
-     enumerator :: hipMemcpyHostToHost = 0
-     enumerator :: hipMemcpyHostToDevice = 1
-     enumerator :: hipMemcpyDeviceToHost = 2
-     enumerator :: hipMemcpyDevicetoDevice = 3
-     enumerator :: hipMemcpyDefault = 4
+    enumerator :: hipMemcpyHostToHost = 0
+    enumerator :: hipMemcpyHostToDevice = 1
+    enumerator :: hipMemcpyDeviceToHost = 2
+    enumerator :: hipMemcpyDevicetoDevice = 3
+    enumerator :: hipMemcpyDefault = 4
   end enum
-  
+
   interface
      integer (c_int) function hipMalloc(ptr_d, s) &
           bind(c, name='hipMalloc')
@@ -104,7 +104,7 @@ integer (c_int) function hipFree(ptr_d) &
        type(c_ptr), value :: ptr_d
      end function hipFree
   end interface
-  
+
   interface
      integer (c_int) function hipMemcpy(ptr_dst, ptr_src, s, dir) &
           bind(c, name='hipMemcpy')
@@ -126,7 +126,7 @@ integer (c_int) function hipMemcpyAsync(ptr_dst, ptr_src, s, dir, stream) &
        integer(c_int), value :: dir
      end function hipMemcpyAsync
   end interface
-  
+
   interface
      integer (c_int) function hipDeviceSynchronize() &
           bind(c, name='hipDeviceSynchronize')
@@ -184,7 +184,7 @@ integer (c_int) function hipStreamDestroy(steam) &
      end function hipStreamDestroy
   end interface
 
-  interface 
+  interface
      integer (c_int) function hipStreamSynchronize(stream) &
           bind(c, name='hipStreamSynchronize')
        use, intrinsic :: iso_c_binding
@@ -192,8 +192,8 @@ integer (c_int) function hipStreamSynchronize(stream) &
        type(c_ptr), value :: stream
      end function hipStreamSynchronize
   end interface
-  
-  interface 
+
+  interface
      integer (c_int) function hipStreamWaitEvent(stream, event, flags) &
           bind(c, name='hipStreamWaitEvent')
        use, intrinsic :: iso_c_binding
@@ -211,7 +211,7 @@ integer (c_int) function hipDeviceGetStreamPriorityRange(low_prio, high_prio) &
        integer(c_int) :: low_prio, high_prio
      end function hipDeviceGetStreamPriorityRange
   end interface
-  
+
   interface
      integer (c_int) function hipEventCreate(event) &
           bind(c, name='hipEventCreate')
@@ -249,7 +249,7 @@ integer (c_int) function hipEventRecord(event, stream) &
      end function hipEventRecord
   end interface
 
-  interface 
+  interface
      integer (c_int) function hipEventSynchronize(event) &
           bind(c, name='hipEventSynchronize')
        use, intrinsic :: iso_c_binding
@@ -257,13 +257,13 @@ integer (c_int) function hipEventSynchronize(event) &
        type(c_ptr), value :: event
      end function hipEventSynchronize
   end interface
-  
+
 contains
 
   subroutine hip_init
 
     if (hipDeviceGetStreamPriorityRange(STRM_LOW_PRIO, STRM_HIGH_PRIO) &
-         .ne. hipSuccess) then 
+         .ne. hipSuccess) then
        call neko_error('Error retrieving stream priority range')
     end if
 
@@ -287,12 +287,12 @@ subroutine hip_finalize
        call neko_error('Error destroying aux stream')
     end if
   end subroutine hip_finalize
-  
+
   subroutine hip_device_name(name)
     character(len=*), intent(inout) :: name
     character(kind=c_char, len=1024), target :: c_name
     integer :: end_pos
-    
+
     if (hipDeviceGetName(c_loc(c_name), 1024, 0) .ne. hipSuccess) then
        call neko_error('Failed to query device')
     end if
@@ -305,5 +305,5 @@ subroutine hip_device_name(name)
   end subroutine hip_device_name
 
 #endif
- 
+
 end module hip_intf
diff --git a/src/device/opencl/check.c b/src/device/opencl/check.c
index 36817a863db..a50bad33e5d 100644
--- a/src/device/opencl/check.c
+++ b/src/device/opencl/check.c
@@ -40,6 +40,7 @@
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <stdlib.h>
 #include <device/opencl/check.h>
 
 #define CL_ERR_STR(err) case err: return #err
diff --git a/src/device/opencl/jit.c b/src/device/opencl/jit.c
index eab32c27864..d6930448029 100644
--- a/src/device/opencl/jit.c
+++ b/src/device/opencl/jit.c
@@ -39,6 +39,7 @@
 #endif
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <device/device_config.h>
 #include <device/opencl/check.h>
 
diff --git a/src/device/opencl/jit.h b/src/device/opencl/jit.h
index b31ad84a28f..50d19c0fabc 100644
--- a/src/device/opencl/jit.h
+++ b/src/device/opencl/jit.h
@@ -1,3 +1,6 @@
+#ifndef __CL_JIT_H
+#define __CL_JIT_H
+
 #ifdef __APPLE__
 #include <OpenCL/cl.h>
 #else
@@ -5,3 +8,5 @@
 #endif
 
 void opencl_kernel_jit(const char *kernel, cl_program *program);
+
+#endif
diff --git a/src/device/opencl/prgm_lib.F90 b/src/device/opencl/prgm_lib.F90
index 60bb1ed2626..be4fe2953fa 100644
--- a/src/device/opencl/prgm_lib.F90
+++ b/src/device/opencl/prgm_lib.F90
@@ -39,7 +39,7 @@ module opencl_prgm_lib
 
   !> Device onvective kernels
   type(c_ptr), bind(c) :: conv1_program = C_NULL_PTR
-  
+
   !> Device CFL kernels
   type(c_ptr), bind(c) :: cfl_program = C_NULL_PTR
 
@@ -253,16 +253,16 @@ subroutine opencl_prgm_lib_release
        end if
        scalar_residual_program = C_NULL_PTR
     end if
-    
+
     if (c_associated(lambda2_program)) then
        if(clReleaseProgram(lambda2_program) .ne. CL_SUCCESS) then
           call neko_error('Failed to release program')
        end if
        lambda2_program = C_NULL_PTR
     end if
-    
+
   end subroutine opencl_prgm_lib_release
 
 #endif
-  
+
 end module opencl_prgm_lib
diff --git a/src/device/opencl_intf.F90 b/src/device/opencl_intf.F90
index 156a7fe2074..677523a23a2 100644
--- a/src/device/opencl_intf.F90
+++ b/src/device/opencl_intf.F90
@@ -36,7 +36,7 @@ module opencl_intf
   use utils, only : neko_error
   use, intrinsic :: iso_c_binding
   implicit none
-  
+
 #ifdef HAVE_OPENCL
 
   !> Global OpenCL command queue
@@ -50,57 +50,57 @@ module opencl_intf
 
   !> Global OpenCL device_id
   type(c_ptr), bind(c), target :: glb_device_id = C_NULL_PTR
-  
+
   !> Enum Error Codes
   enum, bind(c)
-     enumerator :: CL_SUCCESS = 0
-     enumerator :: CL_DEVICE_NOT_FOUND = -1
-     enumerator :: CL_DEVICE_NOT_AVAILABLE = -2
-     enumerator :: CL_COMPILER_NOT_AVAILABLE = -3
-     enumerator :: CL_MEM_OBJECT_ALLOCATION_FAILURE = -4
-     enumerator :: CL_OUT_OF_RESOURCES = -5
-     enumerator :: CL_OUT_OF_HOST_MEMORY = -6
-     enumerator :: CL_PROFILING_INFO_NOT_AVAILABLE = -7
-     enumerator :: CL_MEM_COPY_OVERLAP = -8
-     enumerator :: CL_IMAGE_FORMAT_MISMATCH = -9
-     enumerator :: CL_IMAGE_FORMAT_NOT_SUPPORTED = -10
-     enumerator :: CL_BUILD_PROGRAM_FAILURE = -11
-     enumerator :: CL_MAP_FAILURE = -12
+    enumerator :: CL_SUCCESS = 0
+    enumerator :: CL_DEVICE_NOT_FOUND = -1
+    enumerator :: CL_DEVICE_NOT_AVAILABLE = -2
+    enumerator :: CL_COMPILER_NOT_AVAILABLE = -3
+    enumerator :: CL_MEM_OBJECT_ALLOCATION_FAILURE = -4
+    enumerator :: CL_OUT_OF_RESOURCES = -5
+    enumerator :: CL_OUT_OF_HOST_MEMORY = -6
+    enumerator :: CL_PROFILING_INFO_NOT_AVAILABLE = -7
+    enumerator :: CL_MEM_COPY_OVERLAP = -8
+    enumerator :: CL_IMAGE_FORMAT_MISMATCH = -9
+    enumerator :: CL_IMAGE_FORMAT_NOT_SUPPORTED = -10
+    enumerator :: CL_BUILD_PROGRAM_FAILURE = -11
+    enumerator :: CL_MAP_FAILURE = -12
   end enum
 
   !> Enum mem flags
   enum, bind(c)
-     enumerator :: CL_MEM_READ_WRITE = 1
-     enumerator :: CL_MEM_WRITE_ONLY = 2
-     enumerator :: CL_MEM_READ_ONLY = 4
-     enumerator :: CL_MEM_USE_HOST_PTR = 8
-     enumerator :: CL_MEM_ALLOC_HOST_PTR = 16
-     enumerator :: CL_MEM_HOST_WRITE_ONLY = 128
-     enumerator :: CL_MEM_HOST_READ_ONLY = 256
-     enumerator :: CL_MEM_HOST_NO_ACCESS = 512
+    enumerator :: CL_MEM_READ_WRITE = 1
+    enumerator :: CL_MEM_WRITE_ONLY = 2
+    enumerator :: CL_MEM_READ_ONLY = 4
+    enumerator :: CL_MEM_USE_HOST_PTR = 8
+    enumerator :: CL_MEM_ALLOC_HOST_PTR = 16
+    enumerator :: CL_MEM_HOST_WRITE_ONLY = 128
+    enumerator :: CL_MEM_HOST_READ_ONLY = 256
+    enumerator :: CL_MEM_HOST_NO_ACCESS = 512
   end enum
 
   !> Enum event flags
   enum, bind(c)
-     enumerator :: CL_COMPLETE = 0
-     enumerator :: CL_RUNNING = 1
-     enumerator :: CL_SUBMITTED = 2
-     enumerator :: CL_QUEUED = 3
+    enumerator :: CL_COMPLETE = 0
+    enumerator :: CL_RUNNING = 1
+    enumerator :: CL_SUBMITTED = 2
+    enumerator :: CL_QUEUED = 3
   end enum
 
   !> Enum boolean
   enum, bind(c)
-     enumerator :: CL_FALSE = 0
-     enumerator :: CL_TRUE = 1
+    enumerator :: CL_FALSE = 0
+    enumerator :: CL_TRUE = 1
   end enum
 
   enum, bind(c)
-     enumerator :: CL_CONTEXT_PLATFORM = int(Z'1084')
+    enumerator :: CL_CONTEXT_PLATFORM = int(Z'1084')
   end enum
 
   !> Enum device info
   enum, bind(c)
-     enumerator :: CL_DEVICE_NAME = 4139
+    enumerator :: CL_DEVICE_NAME = 4139
   end enum
 
   !> Device types
@@ -110,10 +110,10 @@ module opencl_intf
   integer(c_int64_t), parameter :: CL_DEVICE_TYPE_ACCELERATOR = 8
   integer(c_int64_t), parameter :: CL_DEVICE_TYPE_CUSTOM = 16
   integer(c_int64_t), parameter :: CL_DEVICE_TYPE_ALL = int(Z'FFFFFFFF', i8)
-    
+
   interface
      integer (c_int) function clGetPlatformIDs(num_entries, &
-          platforms, num_platforms) bind(c, name='clGetPlatformIDs')       
+          platforms, num_platforms) bind(c, name='clGetPlatformIDs')
        use, intrinsic :: iso_c_binding
        implicit none
        integer(c_int), value :: num_entries
@@ -253,7 +253,7 @@ end function clEnqueueMarkerWithWaitList
 
   interface
      integer (c_int) function clEnqueueMarker(cmd_queue, event) &
-          bind(c, name='clEnqueueMarker') 
+          bind(c, name='clEnqueueMarker')
        use, intrinsic :: iso_c_binding
        implicit none
        type(c_ptr), value :: cmd_queue
@@ -291,7 +291,7 @@ integer (c_int) function clSetUserEventStatus(event, status) &
        integer(c_int), value :: status
      end function clSetUserEventStatus
   end interface
-       
+
   interface
      integer (c_int) function clGetDeviceInfo(device, param_name, &
           param_value_size, param_value, param_value_size_ret) &
@@ -305,7 +305,7 @@ integer (c_int) function clGetDeviceInfo(device, param_name, &
        type(c_ptr), value :: param_value_size_ret
      end function clGetDeviceInfo
   end interface
-       
+
   interface
      integer (c_int) function clReleaseContext(context) &
           bind(c, name='clReleaseContext')
@@ -314,7 +314,7 @@ integer (c_int) function clReleaseContext(context) &
        type(c_ptr), value :: context
      end function clReleaseContext
   end interface
-  
+
   interface
      integer (c_int) function clReleaseCommandQueue(queue) &
           bind(c, name='clReleaseCommandQueue')
@@ -323,7 +323,7 @@ integer (c_int) function clReleaseCommandQueue(queue) &
        type(c_ptr), value :: queue
      end function clReleaseCommandQueue
   end interface
-  
+
   interface
      integer (c_int) function clReleaseDevice(device) &
           bind(c, name='clReleaseDevice')
@@ -341,7 +341,7 @@ integer (c_int) function clReleaseProgram(prgm) &
        type(c_ptr), value :: prgm
      end function clReleaseProgram
   end interface
-  
+
   interface
      integer (c_int) function clReleaseMemObject(ptr_d) &
           bind(c, name='clReleaseMemObject')
@@ -369,7 +369,7 @@ integer (c_int) function clFlush(cmd_queue) &
      end function clFlush
   end interface
 
-    interface
+  interface
      integer (c_int) function clFinish(cmd_queue) &
           bind(c, name='clFinish')
        use, intrinsic :: iso_c_binding
@@ -377,7 +377,7 @@ integer (c_int) function clFinish(cmd_queue) &
        type(c_ptr), value :: cmd_queue
      end function clFinish
   end interface
-  
+
 contains
 
   subroutine opencl_init
@@ -415,7 +415,7 @@ subroutine opencl_init
           call neko_error('Faield to release command queue')
        end if
     end if
-    
+
     glb_cmd_queue = clCreateCommandQueue(glb_ctx, glb_device_id, queue_props, ierr)
 
     if (ierr .ne. CL_SUCCESS) then
@@ -448,7 +448,7 @@ subroutine opencl_finalize
           call neko_error('Faield to release device')
        end if
     end if
-    
+
   end subroutine opencl_finalize
 
   subroutine opencl_device_name(name)
@@ -460,11 +460,11 @@ subroutine opencl_device_name(name)
          c_loc(c_name), c_loc(name_len)) .ne. CL_SUCCESS) then
        call neko_error('Failed to query device')
     end if
-        
+
     name(1:name_len) = c_name(1:name_len)
-    
+
   end subroutine opencl_device_name
-  
+
 #endif
-  
+
 end module opencl_intf
diff --git a/src/driver.f90 b/src/driver.f90
index c4467c69de9..44fc83285a3 100644
--- a/src/driver.f90
+++ b/src/driver.f90
@@ -2,7 +2,7 @@
 program turboneko
   use neko
   type(case_t), target :: C
-  
+
   call neko_init(C)
   call neko_solve(C)
   call neko_finalize(C)
diff --git a/src/field/field.f90 b/src/field/field.f90
index 898f6a2377d..b9a165dff55 100644
--- a/src/field/field.f90
+++ b/src/field/field.f90
@@ -42,10 +42,10 @@ module field
   use dofmap, only : dofmap_t
   implicit none
   private
-  
+
   type, public :: field_t
      real(kind=rp), allocatable :: x(:,:,:,:) !< Field data
-     
+
      type(space_t), pointer :: Xh   !< Function space \f$ X_h \f$
      type(mesh_t), pointer :: msh   !< Mesh
      type(dofmap_t), pointer :: dof !< Dofmap
@@ -62,8 +62,10 @@ module field
      procedure, private, pass(this) :: assign_field => field_assign_field
      procedure, private, pass(this) :: assign_scalar => field_assign_scalar
      procedure, private, pass(this) :: add_field => field_add_field
-     procedure, private, pass(this) :: add_Scalar => field_add_scalar
+     procedure, private, pass(this) :: add_scalar => field_add_scalar
      procedure, pass(this) :: free => field_free
+     !> Return the size of the field.
+     procedure, pass(this) :: size => field_size
      !> Initialise a field
      generic :: init => init_external_dof, init_internal_dof
      !> Assignemnt to current field
@@ -96,13 +98,13 @@ subroutine field_init_internal_dof(this, msh, space, fld_name)
     allocate(this%dof)
     this%dof = dofmap_t(this%msh, this%Xh)
     this%internal_dofmap = .true.
-    
+
     if (present(fld_name)) then
        call this%init_common(fld_name)
     else
        call this%init_common()
     end if
-    
+
   end subroutine field_init_internal_dof
 
   !> Initialize a field @a this on the mesh @a msh using an internal dofmap
@@ -122,7 +124,7 @@ subroutine field_init_external_dof(this, dof, fld_name)
     else
        call this%init_common()
     end if
-    
+
   end subroutine field_init_external_dof
 
   !> Initialize a field @a this
@@ -134,9 +136,9 @@ subroutine field_init_common(this, fld_name)
 
     associate(lx => this%Xh%lx, ly => this%Xh%ly, &
          lz => this%Xh%lz, nelv => this%msh%nelv)
-    
+
       if (.not. allocated(this%x)) then
-         allocate(this%x(lx, ly, lz, nelv), stat = ierr)        
+         allocate(this%x(lx, ly, lz, nelv), stat = ierr)
          this%x = 0d0
       end if
 
@@ -147,17 +149,17 @@ subroutine field_init_common(this, fld_name)
       end if
 
       if (NEKO_BCKND_DEVICE .eq. 1) then
-         n = lx * ly * lz * nelv           
+         n = lx * ly * lz * nelv
          call device_map(this%x, this%x_d, n)
       end if
     end associate
-    
+
   end subroutine field_init_common
 
   !> Deallocate a field @a f
   subroutine field_free(this)
     class(field_t), intent(inout) :: this
-    
+
     if (allocated(this%x)) then
        deallocate(this%x)
     end if
@@ -166,7 +168,7 @@ subroutine field_free(this)
        deallocate(this%dof)
        this%internal_dofmap = .false.
     end if
-    
+
     nullify(this%msh)
     nullify(this%Xh)
     nullify(this%dof)
@@ -183,38 +185,38 @@ end subroutine field_free
   subroutine field_assign_field(this, g)
     class(field_t), intent(inout) :: this
     type(field_t), intent(in) :: g
-    
+
     if (allocated(this%x)) then
        if (this%Xh .ne. g%Xh) then
           call this%free()
        end if
     end if
-    
+
     this%Xh => g%Xh
     this%msh => g%msh
     this%dof => g%dof
-    
-    
+
+
     this%Xh%lx = g%Xh%lx
     this%Xh%ly = g%Xh%ly
     this%Xh%lz = g%Xh%lz
-        
+
     if (.not. allocated(this%x)) then
-       
+
        allocate(this%x(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-       
+
        if (NEKO_BCKND_DEVICE .eq. 1) then
-          call device_map(this%x, this%x_d, this%dof%size())
+          call device_map(this%x, this%x_d, this%size())
        end if
-       
+
     end if
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_copy(this%x_d, g%x_d, this%dof%size())
+       call device_copy(this%x_d, g%x_d, this%size())
     else
        call copy(this%x, g%x, this%dof%size())
     end if
-    
+
   end subroutine field_assign_field
 
   !> Assignment \f$ this = a \f$
@@ -224,7 +226,7 @@ subroutine field_assign_scalar(this, a)
     integer :: i, j, k, l
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_cfill(this%x_d, a, this%dof%size())
+       call device_cfill(this%x_d, a, this%size())
     else
        do i = 1, this%msh%nelv
           do l = 1, this%Xh%lz
@@ -236,9 +238,9 @@ subroutine field_assign_scalar(this, a)
           end do
        end do
     end if
-    
+
   end subroutine field_assign_scalar
-  
+
   !> Add \f$ this(u_1, u_2, ... , u_n) =
   !! this(u_1, u_2, ... , u_n) + G(u_1, u_2, ... , u_n) \f$
   !! @note Component wise
@@ -247,9 +249,9 @@ subroutine field_add_field(this, g)
     type(field_t), intent(in) :: g
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_add2(this%x_d, g%x_d, this%dof%size())
+       call device_add2(this%x_d, g%x_d, this%size())
     else
-       call add2(this%x, g%x, this%dof%size())
+       call add2(this%x, g%x, this%size())
     end if
 
   end subroutine field_add_field
@@ -262,12 +264,20 @@ subroutine field_add_scalar(this, a)
     real(kind=rp), intent(in) :: a
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_cadd(this%x_d, a, this%dof%size())
+       call device_cadd(this%x_d, a, this%size())
     else
-       call cadd(this%x, a, this%dof%size())
+       call cadd(this%x, a, this%size())
     end if
 
   end subroutine field_add_scalar
 
+  !> Return the size of the field based on the underlying dofmap.
+  pure function field_size(this) result(size)
+    class(field_t), intent(in) :: this
+    integer :: size
+
+    size = this%dof%size()
+  end function field_size
+
 end module field
 
diff --git a/src/field/field_list.f90 b/src/field/field_list.f90
index 04de4c2dfc7..36264a8d170 100644
--- a/src/field/field_list.f90
+++ b/src/field/field_list.f90
@@ -13,21 +13,21 @@ module field_list
      procedure, pass(this) :: free => field_list_free
   end type field_list_t
 
-  contains
+contains
   !> Append a field to the list.
-  !! @param field The field to append.
-  subroutine field_list_append(this, field)
+  !! @param f The field to append.
+  subroutine field_list_append(this, f)
     class(field_list_t), intent(inout) :: this
-    class(field_t), intent(inout), target :: field
+    class(field_t), intent(in), target :: f
     type(field_ptr_t), allocatable :: tmp(:)
     integer :: len
-    
+
     len = size(this%fields)
 
     allocate(tmp(len+1))
     tmp(1:len) = this%fields
     call move_alloc(tmp, this%fields)
-    this%fields(len+1)%f => field
+    this%fields(len+1)%f => f
 
   end subroutine field_list_append
 
@@ -35,7 +35,7 @@ end subroutine field_list_append
   subroutine field_list_free(this)
     class(field_list_t), intent(inout) :: this
     integer :: i, n_fields
-    
+
     if (allocated(this%fields)) then
        n_fields = size(this%fields)
        do i=1, n_fields
@@ -48,5 +48,5 @@ subroutine field_list_free(this)
   end subroutine field_list_free
 
 
-  
+
 end module field_list
diff --git a/src/field/field_registry.f90 b/src/field/field_registry.f90
index a29e9f142c1..feb75086d81 100644
--- a/src/field/field_registry.f90
+++ b/src/field/field_registry.f90
@@ -40,24 +40,33 @@ module field_registry
   use utils, only: neko_error
   implicit none
   private
-  
+
   type :: field_registry_t
-     !> list of fields stored
-     type(field_t), private, allocatable :: fields(:) 
-     !> number of registered fields
-     integer, private :: n                            
-     !> the size the fields array is increased by upon reallocation
-     integer, private :: expansion_size               
+     !> List of fields stored.
+     type(field_t), private, allocatable :: fields(:)
+     !> Number of registered fields.
+     integer, private :: n
+     !> The size the fields array is increased by upon reallocation.
+     integer, private :: expansion_size
    contains
      procedure, private, pass(this) :: expand
+     !> Constructor.
      procedure, pass(this) :: init => field_registry_init
+     !> Destructor.
      procedure, pass(this) :: free => field_registry_free
+     !> Add a field to the registry.
      procedure, pass(this) :: add_field
+     !> Get the number of fields in the registry.
      procedure, pass(this) :: n_fields
+     !> Get pointer to a stored field by index.
      procedure, pass(this) :: get_field_by_index
+     !> Get pointer to a stored field by name.
      procedure, pass(this) :: get_field_by_name
+     !> Get the `expansion_size`
      procedure, pass(this) :: get_expansion_size
+     !> Get total allocated size of `fields`.
      procedure, pass(this) :: get_size
+     !> Check if a field with a given name is already in the registry.
      procedure, pass(this) :: field_exists
      generic :: get_field => get_field_by_index, get_field_by_name
   end type field_registry_t
@@ -66,8 +75,9 @@ module field_registry
   type(field_registry_t), public, target :: neko_field_registry
 
 contains
-  !> Constructor, optionally taking initial registry and expansion
-  !> size as argument
+  !> Constructor
+  !! @param size The allocation size of `fields` on init.
+  !! @param expansion_size The number of entries added to `fields` on expansion.
   subroutine field_registry_init(this, size, expansion_size)
     class(field_registry_t), intent(inout):: this
     integer, optional, intent(in) :: size
@@ -104,11 +114,10 @@ subroutine field_registry_free(this)
     this%expansion_size = 0
   end subroutine field_registry_free
 
-  !> expand the fields array so as to accomodate more fields
+  !> Expand the fields array so as to accomodate more fields.
   subroutine expand(this)
     class(field_registry_t), intent(inout) :: this
-    type(field_t), allocatable :: temp(:)  
-    integer :: i
+    type(field_t), allocatable :: temp(:)
 
     allocate(temp(this%n + this%expansion_size))
     temp(1:this%n) = this%fields(1:this%n)
@@ -117,16 +126,24 @@ subroutine expand(this)
 
   end subroutine expand
 
-  subroutine add_field(this, dof, fld_name)
+  !> Add a field to the registry.
+  !! @param dof The map of degrees of freedom.
+  !! @param fld_name The name of the field.
+  !! @param ignore_existing If true, will do nothing if the field is already in
+  !! the registry. If false, will throw an error. Optional, defaults to false.
+  subroutine add_field(this, dof, fld_name, ignore_existing)
     class(field_registry_t), intent(inout) :: this
     type(dofmap_t), target, intent(in) :: dof
-    character(len=*), target, intent(in) :: fld_name 
-!    type(h_cptr_t) :: key
-    integer :: i
+    character(len=*), target, intent(in) :: fld_name
+    logical, optional, intent(in) :: ignore_existing
 
     if (this%field_exists(fld_name)) then
-       call neko_error("Field with name " // fld_name // &
-            " is already registered")
+      if (present(ignore_existing) .and. ignore_existing .eqv. .true.) then
+         return
+      else
+         call neko_error("Field with name " // fld_name // &
+                         " is already registered")
+      end if
     end if
 
     if (this%n_fields() == size(this%fields)) then
@@ -150,10 +167,10 @@ pure function n_fields(this) result(n)
     class(field_registry_t), intent(in) :: this
     integer :: n
 
-    n = this%n  
+    n = this%n
   end function n_fields
 
-  !> Get the size of the fields array
+  !> Get the size of the fields array.
   pure function get_size(this) result(n)
     class(field_registry_t), intent(in) :: this
     integer :: n
@@ -161,7 +178,7 @@ pure function get_size(this) result(n)
     n = size(this%fields)
   end function get_size
 
-  !> Get the expansion size
+  !> Get the expansion size.
   pure function get_expansion_size(this) result(n)
     class(field_registry_t), intent(in) :: this
     integer :: n
@@ -169,7 +186,7 @@ pure function get_expansion_size(this) result(n)
     n = this%expansion_size
   end function get_expansion_size
 
-  !> Get pointer to a stored field by index
+  !> Get pointer to a stored field by index.
   function get_field_by_index(this, i) result(f)
     class(field_registry_t), target, intent(in) :: this
     integer, intent(in) :: i
@@ -184,15 +201,16 @@ function get_field_by_index(this, i) result(f)
     f => this%fields(i)
   end function get_field_by_index
 
-  !> Get pointer to a stored field by field name
+  !> Get pointer to a stored field by field name.
   function get_field_by_name(this, name) result(f)
     class(field_registry_t), target, intent(in) :: this
-    character(len=*), intent(in) ::name 
+    character(len=*), intent(in) ::name
     type(field_t), pointer :: f
     logical :: found
     integer :: i
 
     found = .false.
+   
     do i=1, this%n_fields()
        if (this%fields(i)%name == name) then
           f => this%fields(i)
@@ -207,10 +225,10 @@ function get_field_by_name(this, name) result(f)
     end if
   end function get_field_by_name
 
-  !> Check if a field with a given name is already in the registry
+  !> Check if a field with a given name is already in the registry.
   function field_exists(this, name) result(found)
     class(field_registry_t), target, intent(in) :: this
-    character(len=*), intent(in) ::name 
+    character(len=*), intent(in) ::name
     logical :: found
     integer :: i
 
diff --git a/src/field/field_series.f90 b/src/field/field_series.f90
index d11966b5dee..cfa4b85874b 100644
--- a/src/field/field_series.f90
+++ b/src/field/field_series.f90
@@ -36,10 +36,10 @@ module field_series
   use field
   implicit none
   private
-  
+
   type, public :: field_series_t
      type(field_t), pointer :: f => null()
-     type(field_t), allocatable :: lf(:) 
+     type(field_t), allocatable :: lf(:)
      integer, private :: len = 0
    contains
      procedure, pass(this) :: init => field_series_init
@@ -57,6 +57,7 @@ subroutine field_series_init(this, f, len)
     type(field_t), intent(inout), target :: f
     integer :: len
     character(len=80) :: name
+    character(len=5) :: id_str
     integer :: i
 
     call this%free()
@@ -67,7 +68,8 @@ subroutine field_series_init(this, f, len)
     allocate(this%lf(len))
 
     do i = 1, this%len
-       name = trim(f%name)//'_lag'//char(i)
+       write(id_str, '(I0)') i
+       name = trim(f%name)//'_lag'//id_str
        call this%lf(i)%init(this%f%dof, name)
     end do
 
@@ -85,7 +87,7 @@ subroutine field_series_free(this)
     do i = 1, this%len
        call this%lf(i)%free()
     end do
-    
+
   end subroutine field_series_free
 
   !> Return the size of the field series
@@ -105,7 +107,7 @@ subroutine field_series_update(this)
     end do
 
     this%lf(1) = this%f
-    
+
   end subroutine field_series_update
 
   !> Set all fields in a series to @a g
@@ -117,7 +119,7 @@ subroutine field_series_set(this, g)
     do i = 1, this%len
        this%lf(i) = g
     end do
-    
+
   end subroutine field_series_set
-  
+
 end module field_series
diff --git a/src/field/mean_field.f90 b/src/field/mean_field.f90
index c85c20c98fa..0072177eb43 100644
--- a/src/field/mean_field.f90
+++ b/src/field/mean_field.f90
@@ -41,7 +41,7 @@ module mean_field
   use device_math, only : device_cmult, device_add2s2
   implicit none
   private
-  
+
   type, public, extends(stats_quant_t) ::  mean_field_t
      type(field_t), pointer :: f => null()
      type(field_t) :: mf
@@ -61,15 +61,15 @@ subroutine mean_field_init(this, f, field_name)
     type(field_t), intent(inout), target :: f
     character(len=*), optional, intent(in) :: field_name
     character(len=80) :: name
-    
-    
+
+
     call this%free()
 
     this%f => f
     this%time = 0.0_rp
     if (present(field_name)) then
        name = field_name
-    else 
+    else
        write(name, '(A,A)') 'mean_',trim(f%name)
     end if
 
@@ -91,9 +91,9 @@ end subroutine mean_field_free
   !> Resets a mean field
   subroutine mean_field_reset(this)
     class(mean_field_t), intent(inout) :: this
-    
+
     this%time = 0.0
-    this%mf = 0.0_rp  
+    this%mf = 0.0_rp
   end subroutine mean_field_reset
 
 
@@ -113,8 +113,8 @@ subroutine mean_field_update(this, k)
        this%time = this%time + k
        this%mf%x = this%mf%x / this%time
     end if
-       
+
   end subroutine mean_field_update
-  
+
 end module mean_field
 
diff --git a/src/field/mean_sqr_field.f90 b/src/field/mean_sqr_field.f90
index c76f339d749..6c1b210c1b0 100644
--- a/src/field/mean_sqr_field.f90
+++ b/src/field/mean_sqr_field.f90
@@ -46,7 +46,7 @@ module mean_sqr_field
      procedure, pass(this) :: update => mean_sqr_field_update
   end type mean_sqr_field_t
 
-contains    
+contains
 
   !> Update a mean sqr field
   subroutine mean_sqr_field_update(this, k)
diff --git a/src/field/mesh_field.f90 b/src/field/mesh_field.f90
index 19a757041a1..188b4ee7766 100644
--- a/src/field/mesh_field.f90
+++ b/src/field/mesh_field.f90
@@ -32,11 +32,11 @@
 !
 !> Defines a mesh field
 !! @details A mesh field is a scalar integer cell based field (\f$ dQ_0 \f$)
-module mesh_field  
+module mesh_field
   use mesh, only : mesh_t
   implicit none
   private
-  
+
   !> @todo Add support for different data types
   type, public ::  mesh_fld_t
      integer, allocatable :: data(:) !< Data
@@ -47,11 +47,11 @@ module mesh_field
   public :: mesh_field_init, mesh_field_free
 
 contains
-  
+
   subroutine mesh_field_init(fld, msh, fld_name)
     type(mesh_fld_t), intent(inout) :: fld
     type(mesh_t), target, intent(in) :: msh
-    character(len=*), optional :: fld_name 
+    character(len=*), optional :: fld_name
 
     call mesh_field_free(fld)
 
diff --git a/src/field/scratch_registry.f90 b/src/field/scratch_registry.f90
index 4206772a437..7e7b14bf7b5 100644
--- a/src/field/scratch_registry.f90
+++ b/src/field/scratch_registry.f90
@@ -35,48 +35,46 @@
 !! often and you don't want to create temporary fields (work arrays) inside
 !! it on each call.
 module scratch_registry
-  use num_types, only : rp
   use field, only : field_t, field_ptr_t
   use dofmap, only : dofmap_t
-  use utils, only : neko_error
   implicit none
   private
-  
-  
+
+
   type, public :: scratch_registry_t
-     !> list of scratch fields 
-     type(field_ptr_t), private, allocatable :: fields(:) 
+     !> list of scratch fields
+     type(field_ptr_t), private, allocatable :: fields(:)
      !> Tracks which fields are used
-     logical, private, allocatable :: inuse(:) 
+     logical, private, allocatable :: inuse(:)
      !> number of registered fields
-     integer, private :: nfields                      
+     integer, private :: nfields
      !> number of fields in use
-     integer, private :: nfields_inuse                
+     integer, private :: nfields_inuse
      !> the size the fields array is increased by upon reallocation
-     integer, private :: expansion_size               
+     integer, private :: expansion_size
      !> Dofmap
-     type(dofmap_t), pointer :: dof                   
+     type(dofmap_t), pointer :: dof
    contains
      procedure, private, pass(this) :: expand
      !> destructor
-     procedure, pass(this) :: free => scratch_registry_free 
-     !> getter for nfields  
-     procedure, pass(this) :: get_nfields 
-     !> getter for nfields_inuse 
-     procedure, pass(this) :: get_nfields_inuse 
+     procedure, pass(this) :: free => scratch_registry_free
+     !> getter for nfields
+     procedure, pass(this) :: get_nfields
+     !> getter for nfields_inuse
+     procedure, pass(this) :: get_nfields_inuse
      !> getter for expansion_size
-     procedure, pass(this) :: get_expansion_size 
+     procedure, pass(this) :: get_expansion_size
      !> return size of allocated fields
-     procedure, pass(this) :: get_size 
+     procedure, pass(this) :: get_size
      !> get value of inuse for a given index
-     procedure, pass(this) :: get_inuse 
+     procedure, pass(this) :: get_inuse
      !> get a new scratch field
-     procedure, pass(this) :: request_field 
+     procedure, pass(this) :: request_field
      procedure, pass(this) :: relinquish_field_single
      procedure, pass(this) :: relinquish_field_multiple
      !> free a field for later reuse
      generic :: relinquish_field => relinquish_field_single, &
-          relinquish_field_multiple 
+          relinquish_field_multiple
   end type scratch_registry_t
 
   interface scratch_registry_t
@@ -87,7 +85,7 @@ module scratch_registry
   type(scratch_registry_t), public, target :: neko_scratch_registry
 
 contains
-  
+
   !> Constructor, optionally taking initial registry and expansion
   !! size as argument
   type(scratch_registry_t) function init(dof, size, expansion_size) result(this)
@@ -95,13 +93,13 @@ type(scratch_registry_t) function init(dof, size, expansion_size) result(this)
     integer, optional, intent(in) :: size
     integer, optional, intent(in) :: expansion_size
     integer :: i
-    
+
     this%dof => dof
 
     if (present(size)) then
        allocate (this%fields(size))
        do i= 1, size
-         allocate(this%fields(i)%f)
+          allocate(this%fields(i)%f)
        end do
        allocate (this%inuse(size))
     else
@@ -130,13 +128,13 @@ subroutine scratch_registry_free(this)
           call this%fields(i)%f%free()
           deallocate(this%fields(i)%f)
        end do
-    
+
        deallocate(this%fields)
        deallocate(this%inuse)
     end if
 
     nullify(this%dof)
-    
+
   end subroutine scratch_registry_free
 
 
@@ -145,7 +143,7 @@ pure function get_nfields(this) result(n)
     class(scratch_registry_t), intent(in) :: this
     integer :: n
 
-    n = this%nfields  
+    n = this%nfields
   end function get_nfields
 
   pure function get_nfields_inuse(this) result(n)
@@ -154,7 +152,7 @@ pure function get_nfields_inuse(this) result(n)
 
     n = 0
     do i=1,this%get_size()
-      if (this%inuse(i)) n = n + 1
+       if (this%inuse(i)) n = n + 1
     end do
   end function get_nfields_inuse
 
@@ -176,15 +174,15 @@ end function get_expansion_size
 
   subroutine expand(this)
     class(scratch_registry_t), intent(inout) :: this
-    type(field_ptr_t), allocatable :: temp(:)  
-    logical, allocatable :: temp2(:)  
+    type(field_ptr_t), allocatable :: temp(:)
+    logical, allocatable :: temp2(:)
     integer :: i
 
     allocate(temp(this%get_size() + this%expansion_size))
     temp(1:this%nfields) = this%fields(1:this%nfields)
-    
+
     do i=this%nfields +1, size(temp)
-      allocate(temp(i)%f)
+       allocate(temp(i)%f)
     enddo
 
     call move_alloc(temp, this%fields)
@@ -203,13 +201,13 @@ subroutine request_field(this, f, index)
     integer, intent(inout) :: index !< The index of the field in the inuse array
     character(len=10) :: name
 
-    
-    associate(nfields => this%nfields, nfields_inuse => this%nfields_inuse) 
+
+    associate(nfields => this%nfields, nfields_inuse => this%nfields_inuse)
 
       do index=1,this%get_size()
          if (this%inuse(index) .eqv. .false.) then
             write (name, "(A3,I0.3)") "wrk", index
-            
+
             if (.not. allocated(this%fields(index)%f%x)) then
                call this%fields(index)%f%init(this%dof, trim(name))
                nfields = nfields + 1
@@ -232,12 +230,12 @@ subroutine request_field(this, f, index)
 
     end associate
   end subroutine request_field
-  
+
   !> Relinquish the use of a field in the registry
   subroutine relinquish_field_single(this, index)
     class(scratch_registry_t), target, intent(inout) :: this
     integer, intent(inout) :: index !< The index of the field to free
-    
+
     this%inuse(index) = .false.
     this%nfields_inuse = this%nfields_inuse - 1
   end subroutine relinquish_field_single
@@ -248,15 +246,15 @@ subroutine relinquish_field_multiple(this, indices)
     integer :: i
 
     do i=1, size(indices)
-      this%inuse(indices(i)) = .false.
+       this%inuse(indices(i)) = .false.
     end do
-    this%nfields_inuse = this%nfields_inuse - size(indices) 
+    this%nfields_inuse = this%nfields_inuse - size(indices)
   end subroutine relinquish_field_multiple
 
   logical function get_inuse(this, index)
     class(scratch_registry_t), target, intent(inout) :: this
-    integer, intent(inout) :: index !< The index of the field to check 
-    
+    integer, intent(inout) :: index !< The index of the field to check
+
     get_inuse = this%inuse(index)
   end function get_inuse
 
diff --git a/src/fluid/advection.f90 b/src/fluid/advection.f90
index 2226f09e2ff..07c6c69577f 100644
--- a/src/fluid/advection.f90
+++ b/src/fluid/advection.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021-2023, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -30,14 +30,14 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Subroutines to add advection terms to the RHS of a transport equation. 
+!> Subroutines to add advection terms to the RHS of a transport equation.
 module advection
-  use num_types
+  use num_types, only : rp
   use math
   use utils
-  use space
-  use field
-  use coefs
+  use space, only : space_t, GL
+  use field, only : field_t
+  use coefs, only : coef_t
   use device_math
   use neko_config
   use operators
@@ -48,12 +48,13 @@ module advection
                                           c_associated
   implicit none
   private
-  
+
   !> Base abstract type for computing the advection operator
   type, public, abstract :: advection_t
    contains
      procedure(compute_adv), pass(this), deferred :: compute
      procedure(compute_scalar_adv), pass(this), deferred :: compute_scalar
+     procedure(advection_free), pass(this), deferred :: free
   end type advection_t
 
   !> Type encapsulating advection routines with no dealiasing applied
@@ -61,6 +62,10 @@ module advection
      real(kind=rp), allocatable :: temp(:)
      type(c_ptr) :: temp_d = C_NULL_PTR
    contains
+     !> Constructor
+     procedure, pass(this) :: init => init_no_dealias
+     !> Destructor
+     procedure, pass(this) :: free => free_no_dealias
      !> Add the advection term for the fluid, i.e. \f$u \cdot \nabla u \f$, to
      !! the RHS
      procedure, pass(this) :: compute => compute_advection_no_dealias
@@ -112,6 +117,8 @@ module advection
      procedure, pass(this) :: compute_scalar => compute_scalar_advection_dealias
      !> Constructor
      procedure, pass(this) :: init => init_dealias
+     !> Destructor
+     procedure, pass(this) :: free => free_dealias
   end type adv_dealias_t
 
   abstract interface
@@ -143,7 +150,7 @@ end subroutine compute_adv
 
   abstract interface
      !> Add advection operator to the right-hand-side for a scalar.
-     !! @param this The object. 
+     !! @param this The object.
      !! @param vx The x component of velocity.
      !! @param vy The y component of velocity.
      !! @param vz The z component of velocity.
@@ -168,63 +175,21 @@ subroutine compute_scalar_adv(this, vx, vy, vz, s, fs, Xh, coef, n)
      end subroutine compute_scalar_adv
   end interface
 
-  public :: advection_factory
+  abstract interface
+     !> Destructor
+     subroutine advection_free(this)
+       import :: advection_t
+       class(advection_t), intent(inout) :: this
+     end subroutine advection_free
+  end interface
 
 contains
-  
-  !> A factory for \ref advection_t decendants.
-  !! @param this Polymorphic object of class \ref advection_t.
-  !! @param coeff The coefficients of the (space, mesh) pair.
-  !! @param delias Whether to dealias or not.
-  !! @param lxd The polynomial order of the space used in the dealiasing.
-  !! Defaults to 3/2 of `coeff%Xh%lx`.
-  !! @note The factory both allocates and initializes `this`.
-  subroutine advection_factory(this, coef, dealias, lxd)
-    implicit none
-    class(advection_t), allocatable, intent(inout) :: this
-    type(coef_t), target :: coef
-    logical, intent(in) :: dealias
-    integer, intent(in) :: lxd
-
-    ! Free allocatables if necessary
-    if (allocated(this)) then
-       select type(adv => this)
-       type is(adv_no_dealias_t)
-          if (allocated(adv%temp)) then
-             deallocate(adv%temp)
-          end if
-          if (c_associated(adv%temp_d)) then
-             call device_free(adv%temp_d)
-          end if
-       end select
-       deallocate(this)
-    end if
-
-    if (dealias) then
-       allocate(adv_dealias_t::this)
-    else
-       allocate(adv_no_dealias_t::this)
-    end if
-    
-    select type(adv => this)
-    type is(adv_dealias_t)
-       if (lxd .gt. 0) then
-          call init_dealias(adv, lxd, coef) 
-       else
-          call init_dealias(adv, coef%Xh%lx * 3/2,  coef)
-       end if
-    type is(adv_no_dealias_t)
-       call init_no_dealias(adv, coef)
-    end select
-
-  end subroutine advection_factory
 
   !> Constructor
-  !! @param coeff The coefficients of the (space, mesh) pair.
+  !! @param coef The coefficients of the (space, mesh) pair.
   subroutine init_no_dealias(this, coef)
-    implicit none
-    class(adv_no_dealias_t) :: this
-    type(coef_t) :: coef
+    class(adv_no_dealias_t), intent(inout) :: this
+    type(coef_t), intent(in) :: coef
 
     allocate(this%temp(coef%dof%size()))
 
@@ -234,11 +199,22 @@ subroutine init_no_dealias(this, coef)
 
   end subroutine init_no_dealias
 
+  !> Destructor
+  subroutine free_no_dealias(this)
+    class(adv_no_dealias_t), intent(inout) :: this
+
+    if (allocated(this%temp)) then
+       deallocate(this%temp)
+    end if
+    if (c_associated(this%temp_d)) then
+       call device_free(this%temp_d)
+    end if
+  end subroutine free_no_dealias
+
   !> Constructor
   !! @param lxd The polynomial order of the space used in the dealiasing.
-  !! @param coeff The coefficients of the (space, mesh) pair.
+  !! @param coef The coefficients of the (space, mesh) pair.
   subroutine init_dealias(this, lxd, coef)
-    implicit none
     class(adv_dealias_t), target, intent(inout) :: this
     integer, intent(in) :: lxd
     type(coef_t), intent(inout), target :: coef
@@ -275,7 +251,7 @@ subroutine init_dealias(this, lxd, coef)
        allocate(this%vs(n_GL))
        allocate(this%vt(n_GL))
     end if
-    
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(this%temp, this%temp_d, n_GL)
        call device_map(this%tbf, this%tbf_d, n_GL)
@@ -288,7 +264,13 @@ subroutine init_dealias(this, lxd, coef)
     end if
 
   end subroutine init_dealias
-  
+
+  !> Destructor
+  subroutine free_dealias(this)
+    class(adv_dealias_t), intent(inout) :: this
+  end subroutine free_dealias
+
+
   !> Add the advection term for the fluid, i.e. \f$u \cdot \nabla u \f$, to
   !! the RHS.
   !! @param vx The x component of velocity.
@@ -301,7 +283,6 @@ end subroutine init_dealias
   !! @param coef The coefficients of the (Xh, mesh) pair.
   !! @param n Typically the size of the mesh.
   subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n)
-    implicit none
     class(adv_dealias_t), intent(inout) :: this
     type(space_t), intent(inout) :: Xh
     type(coef_t), intent(inout) :: coef
@@ -309,7 +290,7 @@ subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n)
     integer, intent(in) :: n
     real(kind=rp), intent(inout), dimension(n) :: fx, fy, fz
     real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz
-    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz 
+    real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz
     real(kind=rp), dimension(this%Xh_GL%lxyz) :: vr, vs, vt
     real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz
     type(c_ptr) :: fx_d, fy_d, fz_d
@@ -318,90 +299,90 @@ subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n)
     n_GL = nel * this%Xh_GL%lxyz
     !This is extremely primitive and unoptimized  on the device //Karp
     associate(c_GL => this%coef_GL)
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-       fx_d = device_get_ptr(fx)
-       fy_d = device_get_ptr(fy)
-       fz_d = device_get_ptr(fz)
-       call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
-
-       call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL)
-       call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+         fx_d = device_get_ptr(fx)
+         fy_d = device_get_ptr(fy)
+         fz_d = device_get_ptr(fz)
+         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
+
+         call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL)
+         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
                          this%tx_d, this%ty_d, this%tz_d, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call device_sub2(fx_d, this%temp_d, n)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call device_sub2(fx_d, this%temp_d, n)
 
 
-       call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL)
-       call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
+         call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL)
+         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
                          this%tx_d, this%ty_d, this%tz_d, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call device_sub2(fy_d, this%temp_d, n)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call device_sub2(fy_d, this%temp_d, n)
 
-       call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL)
-       call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
+         call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL)
+         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
                          this%tx_d, this%ty_d, this%tz_d, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call device_sub2(fz_d, this%temp_d, n)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call device_sub2(fz_d, this%temp_d, n)
 
-    else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then
+      else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then
 
-       call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
 
-       call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL)
-       call vdot3(this%tbf, this%vr, this%vs, this%vt, &
+         call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL)
+         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
                   this%tx, this%ty, this%tz, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call sub2(fx, this%temp, n)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call sub2(fx, this%temp, n)
 
 
-       call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL)
-       call vdot3(this%tbf, this%vr, this%vs, this%vt, &
+         call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL)
+         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
                   this%tx, this%ty, this%tz, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call sub2(fy, this%temp, n)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call sub2(fy, this%temp, n)
 
-       call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL)
-       call vdot3(this%tbf, this%vr, this%vs, this%vt, &
+         call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL)
+         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
                   this%tx, this%ty, this%tz, n_GL)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       call sub2(fz, this%temp, n)
-       
-    else
-
-       do e = 1, coef%msh%nelv
-          call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
-          call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
-          call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
-
-          call opgrad(vr, vs, vt, tx, c_GL, e, e)
-          do i = 1, this%Xh_GL%lxyz
-             tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
-          end do
-
-          call opgrad(vr, vs, vt, ty, c_GL, e, e)
-          do i = 1, this%Xh_GL%lxyz
-             tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
-          end do
-
-          call opgrad(vr, vs, vt, tz, c_GL, e, e)
-          do i = 1, this%Xh_GL%lxyz
-             tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
-          end do
-
-          call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
-          call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
-          call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
-
-          idx = (e-1)*this%Xh_GLL%lxyz+1
-          call sub2(fx(idx), tempx, this%Xh_GLL%lxyz)
-          call sub2(fy(idx), tempy, this%Xh_GLL%lxyz)
-          call sub2(fz(idx), tempz, this%Xh_GLL%lxyz)
-       end do
-    end if
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+         call sub2(fz, this%temp, n)
+
+      else
+
+         do e = 1, coef%msh%nelv
+            call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL)
+            call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL)
+            call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL)
+
+            call opgrad(vr, vs, vt, tx, c_GL, e, e)
+            do i = 1, this%Xh_GL%lxyz
+               tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
+            end do
+
+            call opgrad(vr, vs, vt, ty, c_GL, e, e)
+            do i = 1, this%Xh_GL%lxyz
+               tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
+            end do
+
+            call opgrad(vr, vs, vt, tz, c_GL, e, e)
+            do i = 1, this%Xh_GL%lxyz
+               tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i)
+            end do
+
+            call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL)
+            call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL)
+            call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL)
+
+            idx = (e-1)*this%Xh_GLL%lxyz+1
+            call sub2(fx(idx), tempx, this%Xh_GLL%lxyz)
+            call sub2(fy(idx), tempy, this%Xh_GLL%lxyz)
+            call sub2(fz(idx), tempz, this%Xh_GLL%lxyz)
+         end do
+      end if
     end associate
 
   end subroutine compute_advection_dealias
@@ -418,7 +399,6 @@ end subroutine compute_advection_dealias
   !! @param coef The coefficients of the (Xh, mesh) pair.
   !! @param n Typically the size of the mesh.
   subroutine compute_advection_no_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n)
-    implicit none
     class(adv_no_dealias_t), intent(inout) :: this
     type(space_t), intent(inout) :: Xh
     type(coef_t), intent(inout) :: coef
@@ -431,7 +411,7 @@ subroutine compute_advection_no_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef,
        fx_d = device_get_ptr(fx)
        fy_d = device_get_ptr(fy)
        fz_d = device_get_ptr(fz)
-       
+
        call conv1(this%temp, vx%x, vx%x, vy%x, vz%x, Xh, coef)
        call device_subcol3 (fx_d, coef%B_d, this%temp_d, n)
        call conv1(this%temp, vy%x, vx%x, vy%x, vz%x, Xh, coef)
@@ -459,7 +439,7 @@ end subroutine compute_advection_no_dealias
 
   !> Add the advection term for a scalar, i.e. \f$u \cdot \nabla s \f$, to the
   !! RHS.
-  !! @param this The object. 
+  !! @param this The object.
   !! @param vx The x component of velocity.
   !! @param vy The y component of velocity.
   !! @param vz The z component of velocity.
@@ -470,7 +450,6 @@ end subroutine compute_advection_no_dealias
   !! @param n Typically the size of the mesh.
   subroutine compute_scalar_advection_no_dealias(this, vx, vy, vz, s, fs, Xh, &
                                                coef, n)
-    implicit none
     class(adv_no_dealias_t), intent(inout) :: this
     type(field_t), intent(inout) :: vx, vy, vz
     type(field_t), intent(inout) :: s
@@ -482,7 +461,7 @@ subroutine compute_scalar_advection_no_dealias(this, vx, vy, vz, s, fs, Xh, &
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        fs_d = device_get_ptr(fs)
-       
+
        call conv1(this%temp, s%x, vx%x, vy%x, vz%x, Xh, coef)
        call device_subcol3 (fs_d, coef%B_d, this%temp_d, n)
        if (coef%Xh%lz .eq. 1) then
@@ -503,7 +482,7 @@ end subroutine compute_scalar_advection_no_dealias
 
   !> Add the advection term for a scalar, i.e. \f$u \cdot \nabla s \f$, to the
   !! RHS.
-  !! @param this The object. 
+  !! @param this The object.
   !! @param vx The x component of velocity.
   !! @param vy The y component of velocity.
   !! @param vz The z component of velocity.
@@ -514,7 +493,6 @@ end subroutine compute_scalar_advection_no_dealias
   !! @param n Typically the size of the mesh.
   subroutine compute_scalar_advection_dealias(this, vx, vy, vz, s, fs, Xh, &
                                             coef, n)
-    implicit none
     class(adv_dealias_t), intent(inout) :: this
     type(field_t), intent(inout) :: vx, vy, vz
     type(field_t), intent(inout) :: s
@@ -533,80 +511,80 @@ subroutine compute_scalar_advection_dealias(this, vx, vy, vz, s, fs, Xh, &
     n_GL = nel * this%Xh_GL%lxyz
 
     associate(c_GL => this%coef_GL)
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-       fs_d = device_get_ptr(fs)
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+         fs_d = device_get_ptr(fs)
+
+         ! Map advecting velocity onto the higher-order space
+         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
 
-       ! Map advecting velocity onto the higher-order space
-       call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
-
-       ! Map the scalar onto the high-order space
-       call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL)
-       
-       ! Compute the scalar gradient in the high-order space
-       call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL)
-       
-       ! Compute the convective term, i.e dot the velocity with the scalar grad
-       call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
+         ! Map the scalar onto the high-order space
+         call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL)
+
+         ! Compute the scalar gradient in the high-order space
+         call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL)
+
+         ! Compute the convective term, i.e dot the velocity with the scalar grad
+         call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, &
                          this%tx_d, this%ty_d, this%tz_d, n_GL)
 
-       ! Map back to the original space (we reuse this%temp)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       
-       ! Update the source term
-       call device_sub2(fs_d, this%temp_d, n)
-
-    else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then
-
-       ! Map advecting velocity onto the higher-order space
-       call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
-       call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
-
-       ! Map the scalar onto the high-order space
-       call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL)
-       
-       ! Compute the scalar gradient in the high-order space
-       call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL)
-       
-       ! Compute the convective term, i.e dot the velocity with the scalar grad
-       call vdot3(this%tbf, this%vr, this%vs, this%vt, &
+         ! Map back to the original space (we reuse this%temp)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
+
+         ! Update the source term
+         call device_sub2(fs_d, this%temp_d, n)
+
+      else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then
+
+         ! Map advecting velocity onto the higher-order space
+         call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL)
+         call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL)
+
+         ! Map the scalar onto the high-order space
+         call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL)
+
+         ! Compute the scalar gradient in the high-order space
+         call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL)
+
+         ! Compute the convective term, i.e dot the velocity with the scalar grad
+         call vdot3(this%tbf, this%vr, this%vs, this%vt, &
                   this%tx, this%ty, this%tz, n_GL)
 
-       ! Map back to the original space (we reuse this%temp)
-       call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
-       
-       ! Update the source term
-       call sub2(fs, this%temp, n)
+         ! Map back to the original space (we reuse this%temp)
+         call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL)
 
-    else
-       do e = 1, coef%msh%nelv
-          ! Map advecting velocity onto the higher-order space
-          call this%GLL_to_GL%map(vx_GL, vx%x(1,1,1,e), 1, this%Xh_GL)
-          call this%GLL_to_GL%map(vy_GL, vy%x(1,1,1,e), 1, this%Xh_GL)
-          call this%GLL_to_GL%map(vz_GL, vz%x(1,1,1,e), 1, this%Xh_GL)
-
-          ! Map scalar onto the higher-order space
-          call this%GLL_to_GL%map(s_GL, s%x(1,1,1,e), 1, this%Xh_GL)
-
-          ! Gradient of s in the higher-order space
-          call opgrad(dsdx, dsdy, dsdz, s_GL, c_GL, e, e)
-          
-          ! vx * ds/dx + vy * ds/dy + vz * ds/dz for each point in the element
-          do i = 1, this%Xh_GL%lxyz
-             f_GL(i) = vx_GL(i)*dsdx(i) + vy_GL(i)*dsdy(i) + vz_GL(i)*dsdz(i)
-          end do
-
-          ! Map back the contructed operator to the original space
-          call this%GLL_to_GL%map(temp, f_GL, 1, this%Xh_GLL)
-
-          idx = (e-1)*this%Xh_GLL%lxyz + 1
-
-          call sub2(fs(idx), temp, this%Xh_GLL%lxyz)
-       end do
-   end if
-   end associate
+         ! Update the source term
+         call sub2(fs, this%temp, n)
+
+      else
+         do e = 1, coef%msh%nelv
+            ! Map advecting velocity onto the higher-order space
+            call this%GLL_to_GL%map(vx_GL, vx%x(1,1,1,e), 1, this%Xh_GL)
+            call this%GLL_to_GL%map(vy_GL, vy%x(1,1,1,e), 1, this%Xh_GL)
+            call this%GLL_to_GL%map(vz_GL, vz%x(1,1,1,e), 1, this%Xh_GL)
+
+            ! Map scalar onto the higher-order space
+            call this%GLL_to_GL%map(s_GL, s%x(1,1,1,e), 1, this%Xh_GL)
+
+            ! Gradient of s in the higher-order space
+            call opgrad(dsdx, dsdy, dsdz, s_GL, c_GL, e, e)
+
+            ! vx * ds/dx + vy * ds/dy + vz * ds/dz for each point in the element
+            do i = 1, this%Xh_GL%lxyz
+               f_GL(i) = vx_GL(i)*dsdx(i) + vy_GL(i)*dsdy(i) + vz_GL(i)*dsdz(i)
+            end do
+
+            ! Map back the contructed operator to the original space
+            call this%GLL_to_GL%map(temp, f_GL, 1, this%Xh_GLL)
+
+            idx = (e-1)*this%Xh_GLL%lxyz + 1
+
+            call sub2(fs(idx), temp, this%Xh_GLL%lxyz)
+         end do
+      end if
+    end associate
 
   end subroutine compute_scalar_advection_dealias
 
diff --git a/src/fluid/advection_fctry.f90 b/src/fluid/advection_fctry.f90
new file mode 100644
index 00000000000..7f84dad793e
--- /dev/null
+++ b/src/fluid/advection_fctry.f90
@@ -0,0 +1,96 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Contains the factory routine for `advection_t` children.
+module advection_fctry
+  use num_types, only : rp
+  use advection, only : advection_t, adv_no_dealias_t, adv_dealias_t
+  use coefs, only : coef_t
+  use device, only : device_free
+  use, intrinsic :: iso_c_binding, only : c_associated
+  use json_utils, only : json_get
+  use json_module, only : json_file
+  implicit none
+  private
+
+  public :: advection_factory
+
+contains
+
+  !> A factory for \ref advection_t decendants.
+  !! @param this Polymorphic object of class \ref advection_t.
+  !! @param json The parameter file.
+  !! @param coef The coefficients of the (space, mesh) pair.
+  !! @note The factory both allocates and initializes `this`.
+  subroutine advection_factory(this, json, coef)
+    implicit none
+    class(advection_t), allocatable, intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    type(coef_t), target :: coef
+    logical :: dealias, found
+    integer :: lxd, order
+
+    call json_get(json, 'case.numerics.dealias', dealias)
+    call json%get('case.numerics.dealiased_polynomial_order', lxd, found)
+    if (.not. found) then
+       call json_get(json, 'case.numerics.polynomial_order', order)
+       ! Note, assumes odd polynomial order
+       lxd =  3.0_rp / 2.0_rp * (order + 1)
+    end if
+
+    ! Free allocatables if necessary
+    if (allocated(this)) then
+       call this%free
+       deallocate(this)
+    end if
+
+    if (dealias) then
+       allocate(adv_dealias_t::this)
+    else
+       allocate(adv_no_dealias_t::this)
+    end if
+
+    select type(adv => this)
+    type is(adv_dealias_t)
+       if (lxd .gt. 0) then
+          call adv%init(lxd, coef)
+       else
+          call adv%init(coef%Xh%lx * 3/2,  coef)
+       end if
+    type is(adv_no_dealias_t)
+       call adv%init(coef)
+    end select
+
+  end subroutine advection_factory
+
+
+end module advection_fctry
diff --git a/src/fluid/bcknd/cpu/pnpn_res_cpu.f90 b/src/fluid/bcknd/cpu/pnpn_res_cpu.f90
index dfe1fb6e3ea..87e2f8d5697 100644
--- a/src/fluid/bcknd/cpu/pnpn_res_cpu.f90
+++ b/src/fluid/bcknd/cpu/pnpn_res_cpu.f90
@@ -13,7 +13,7 @@ module pnpn_res_cpu
   use space, only : space_t
   implicit none
   private
-  
+
   type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_cpu_t
    contains
      procedure, nopass :: compute => pnpn_prs_res_cpu_compute
@@ -36,7 +36,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
     type(gs_t), intent(inout) :: gs_Xh
     type(facet_normal_t), intent(inout) :: bc_prs_surface
     type(facet_normal_t), intent(inout) :: bc_sym_surface
-    class(Ax_t), intent(inout) :: Ax
+    class(ax_t), intent(inout) :: Ax
     real(kind=rp), intent(inout) :: bd
     real(kind=rp), intent(in) :: dt
     real(kind=rp), intent(in) :: mu
@@ -63,11 +63,11 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
        c_Xh%h2(i,1,1,1) = 0.0_rp
     end do
     c_Xh%ifh2 = .false.
-    
+
     call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh)
     call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh)
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        ta1%x(i,1,1,1) = f_x%x(i,1,1,1) / rho &
             - ((wa1%x(i,1,1,1) * (mu / rho)) * c_Xh%B(i,1,1,1))
        ta2%x(i,1,1,1) = f_y%x(i,1,1,1) / rho &
@@ -75,12 +75,12 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
        ta3%x(i,1,1,1) = f_z%x(i,1,1,1) / rho &
             - ((wa3%x(i,1,1,1) * (mu / rho)) * c_Xh%B(i,1,1,1))
     end do
-     
-    call gs_Xh%op(ta1, GS_OP_ADD) 
-    call gs_Xh%op(ta2, GS_OP_ADD) 
-    call gs_Xh%op(ta3, GS_OP_ADD) 
 
-    do i = 1, n
+    call gs_Xh%op(ta1, GS_OP_ADD)
+    call gs_Xh%op(ta2, GS_OP_ADD)
+    call gs_Xh%op(ta3, GS_OP_ADD)
+
+    do concurrent (i = 1:n)
        ta1%x(i,1,1,1) = ta1%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
        ta2%x(i,1,1,1) = ta2%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
        ta3%x(i,1,1,1) = ta3%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
@@ -92,7 +92,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
 
     call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh)
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        p_res%x(i,1,1,1) = (-p_res%x(i,1,1,1)) &
                         + wa1%x(i,1,1,1) + wa2%x(i,1,1,1) + wa3%x(i,1,1,1)
     end do
@@ -100,29 +100,29 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
     !
     ! Surface velocity terms
     !
-    do i = 1, n
+    do concurrent (i = 1:n)
        wa1%x(i,1,1,1) = 0.0_rp
        wa2%x(i,1,1,1) = 0.0_rp
        wa3%x(i,1,1,1) = 0.0_rp
     end do
-    
+
     call bc_sym_surface%apply_surfvec(wa1%x,wa2%x,wa3%x,ta1%x, ta2%x, ta3%x, n)
 
     dtbd = bd / dt
-    do i = 1, n
+    do concurrent (i = 1:n)
        ta1%x(i,1,1,1) = 0.0_rp
        ta2%x(i,1,1,1) = 0.0_rp
        ta3%x(i,1,1,1) = 0.0_rp
     end do
-    
+
     call bc_prs_surface%apply_surfvec(ta1%x, ta2%x, ta3%x, u%x, v%x, w%x, n)
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        p_res%x(i,1,1,1) = p_res%x(i,1,1,1) &
             - (dtbd * (ta1%x(i,1,1,1) + ta2%x(i,1,1,1) + ta3%x(i,1,1,1)))&
             - (wa1%x(i,1,1,1) + wa2%x(i,1,1,1) + wa3%x(i,1,1,1))
     end do
-    
+
     call neko_scratch_registry%relinquish_field(temp_indices)
 
   end subroutine pnpn_prs_res_cpu_compute
@@ -131,7 +131,7 @@ subroutine pnpn_vel_res_cpu_compute(Ax, u, v, w, u_res, v_res, w_res, &
        p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: p, u, v, w
     type(field_t), intent(inout) :: u_res, v_res, w_res
     type(field_t), intent(inout) :: f_x, f_y, f_z
@@ -161,14 +161,14 @@ subroutine pnpn_vel_res_cpu_compute(Ax, u, v, w, u_res, v_res, w_res, &
 
     call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_Xh)
 
-    do i = 1, n
+    do concurrent (i = 1:n)
        u_res%x(i,1,1,1) = (-u_res%x(i,1,1,1)) - ta1%x(i,1,1,1) + f_x%x(i,1,1,1)
        v_res%x(i,1,1,1) = (-v_res%x(i,1,1,1)) - ta2%x(i,1,1,1) + f_y%x(i,1,1,1)
        w_res%x(i,1,1,1) = (-w_res%x(i,1,1,1)) - ta3%x(i,1,1,1) + f_z%x(i,1,1,1)
     end do
-    
+
     call neko_scratch_registry%relinquish_field(temp_indices)
-  
-   end subroutine pnpn_vel_res_cpu_compute  
+
+  end subroutine pnpn_vel_res_cpu_compute
 
 end module pnpn_res_cpu
diff --git a/src/fluid/bcknd/device/cuda/prs_res_kernel.h b/src/fluid/bcknd/device/cuda/prs_res_kernel.h
index 17615bdbc54..894bc0d7d0b 100644
--- a/src/fluid/bcknd/device/cuda/prs_res_kernel.h
+++ b/src/fluid/bcknd/device/cuda/prs_res_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __FLUID_PRS_RES_KERNEL__
+#define __FLUID_PRS_RES_KERNEL__
+
 template< typename T >
 __global__ void prs_res_part1_kernel(T * __restrict__ ta1,
                                      T * __restrict__ ta2,
@@ -92,3 +95,5 @@ __global__ void prs_res_part3_kernel(T * __restrict__ p_res,
     p_res[i] = p_res[i] - (dtbd * (ta1[i] + ta2[i] + ta3[i]));
   }
 }
+
+#endif // __FLUID_PRS_RES_KERNEL__
\ No newline at end of file
diff --git a/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h b/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h
index 037d43f8931..0cbe3b13d76 100644
--- a/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h
+++ b/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __FLUID_VEL_RES_UPDATE_KERNEL__
+#define __FLUID_VEL_RES_UPDATE_KERNEL__
+
 template< typename T >
 __global__ void vel_res_update_kernel(T * __restrict__ u_res,
                                       T * __restrict__ v_res,
@@ -55,3 +58,4 @@ __global__ void vel_res_update_kernel(T * __restrict__ u_res,
 
 }
 
+#endif // __FLUID_VEL_RES_UPDATE_KERNEL__
\ No newline at end of file
diff --git a/src/fluid/bcknd/device/fluid_abbdf_device.F90 b/src/fluid/bcknd/device/fluid_abbdf_device.F90
index 4cfe776dc84..79d6cf9b759 100644
--- a/src/fluid/bcknd/device/fluid_abbdf_device.F90
+++ b/src/fluid/bcknd/device/fluid_abbdf_device.F90
@@ -54,7 +54,7 @@ module rhs_maker_device
   end type rhs_maker_bdf_device_t
 
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine rhs_maker_sumab_hip(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
           uulag1, uulag2, vvlag1, vvlag2, wwlag1, wwlag2, ab1, ab2, ab3, nab, n)&
           bind(c, name='rhs_maker_sumab_hip')
@@ -75,7 +75,7 @@ subroutine rhs_maker_ext_hip(abx1_d, aby1_d, abz1_d, &
                                   bind(c, name='rhs_maker_ext_hip')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -95,7 +95,7 @@ subroutine rhs_maker_bdf_hip(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_hip
-  end interface 
+  end interface
 #elif HAVE_CUDA
   interface
      subroutine rhs_maker_sumab_cuda(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
@@ -118,7 +118,7 @@ subroutine rhs_maker_ext_cuda(abx1_d, aby1_d, abz1_d, &
                                    bind(c, name='rhs_maker_ext_cuda')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -138,7 +138,7 @@ subroutine rhs_maker_bdf_cuda(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_cuda
-  end interface  
+  end interface
 #elif HAVE_OPENCL
   interface
      subroutine rhs_maker_sumab_opencl(u_d, v_d, w_d, uu_d, vv_d, ww_d, &
@@ -161,7 +161,7 @@ subroutine rhs_maker_ext_opencl(abx1_d, aby1_d, abz1_d, &
                                      bind(c, name='rhs_maker_ext_opencl')
        use, intrinsic :: iso_c_binding
        import c_rp
-       type(c_ptr), value :: abx1_d, aby1_d, abz1_d 
+       type(c_ptr), value :: abx1_d, aby1_d, abz1_d
        type(c_ptr), value :: abx2_d, aby2_d, abz2_d
        type(c_ptr), value :: bfx_d, bfy_d, bfz_d
        real(c_rp) :: rho, ab1, ab2, ab3
@@ -181,12 +181,12 @@ subroutine rhs_maker_bdf_opencl(ulag1_d, ulag2_d, vlag1_d, vlag2_d, &
        reaL(c_rp) :: rho, dt, bd2, bd3, bd4
        integer(c_int) :: nbd, n
      end subroutine rhs_maker_bdf_opencl
-  end interface 
+  end interface
 #endif
 
 contains
 
-    subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
+  subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab)
     type(field_t), intent(inout) :: u,v, w
     type(field_t), intent(inout) :: uu, vv, ww
     type(field_series_t), intent(inout) :: uulag, vvlag, wwlag
@@ -198,7 +198,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab,
          uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, &
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
          uu%dof%size())
-#elif HAVE_CUDA    
+#elif HAVE_CUDA
     call rhs_maker_sumab_cuda(u%x_d, v%x_d, w%x_d, uu%x_d, vv%x_d, ww%x_d, &
          uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, &
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
@@ -209,7 +209,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab,
          wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, &
          uu%dof%size())
 #endif
-    
+
   end subroutine rhs_maker_sumab_device
 
   subroutine rhs_maker_ext_device(temp1, temp2, temp3, fx_lag, fy_lag, fz_lag, &
@@ -243,17 +243,17 @@ subroutine rhs_maker_ext_device(temp1, temp2, temp3, fx_lag, fy_lag, fz_lag, &
                               fx_d, fy_d, fz_d, rho, &
                               ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n)
 #endif
-    
+
   end subroutine rhs_maker_ext_device
 
   subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, &
                                ulag, vlag, wlag, bfx, bfy, bfz, &
-                               u, v, w, B, rho, dt, bd, nbd, n)    
+                               u, v, w, B, rho, dt, bd, nbd, n)
     integer, intent(in) :: n, nbd
     type(field_t), intent(inout) :: ta1, ta2, ta3
     type(field_t), intent(in) :: u, v, w
     type(field_t), intent(inout) :: tb1, tb2, tb3
-    type(field_series_t), intent(in) :: ulag, vlag, wlag        
+    type(field_series_t), intent(in) :: ulag, vlag, wlag
     real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n)
     real(kind=rp), intent(in) :: B(n)
     real(kind=rp), intent(in) :: dt, rho, bd(10)
@@ -263,7 +263,7 @@ subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, &
     bfy_d = device_get_ptr(bfy)
     bfz_d = device_get_ptr(bfz)
     B_d = device_get_ptr(B)
-    
+
 #ifdef HAVE_HIP
     call rhs_maker_bdf_hip(ulag%lf(1)%x_d, ulag%lf(2)%x_d, &
                            vlag%lf(1)%x_d, vlag%lf(2)%x_d, &
@@ -285,5 +285,5 @@ subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, &
 #endif
 
   end subroutine rhs_maker_bdf_device
-  
+
 end module rhs_maker_device
diff --git a/src/fluid/bcknd/device/hip/prs_res_kernel.h b/src/fluid/bcknd/device/hip/prs_res_kernel.h
index 17615bdbc54..894bc0d7d0b 100644
--- a/src/fluid/bcknd/device/hip/prs_res_kernel.h
+++ b/src/fluid/bcknd/device/hip/prs_res_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __FLUID_PRS_RES_KERNEL__
+#define __FLUID_PRS_RES_KERNEL__
+
 template< typename T >
 __global__ void prs_res_part1_kernel(T * __restrict__ ta1,
                                      T * __restrict__ ta2,
@@ -92,3 +95,5 @@ __global__ void prs_res_part3_kernel(T * __restrict__ p_res,
     p_res[i] = p_res[i] - (dtbd * (ta1[i] + ta2[i] + ta3[i]));
   }
 }
+
+#endif // __FLUID_PRS_RES_KERNEL__
\ No newline at end of file
diff --git a/src/fluid/bcknd/device/hip/vel_res_update_kernel.h b/src/fluid/bcknd/device/hip/vel_res_update_kernel.h
index 037d43f8931..0cbe3b13d76 100644
--- a/src/fluid/bcknd/device/hip/vel_res_update_kernel.h
+++ b/src/fluid/bcknd/device/hip/vel_res_update_kernel.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __FLUID_VEL_RES_UPDATE_KERNEL__
+#define __FLUID_VEL_RES_UPDATE_KERNEL__
+
 template< typename T >
 __global__ void vel_res_update_kernel(T * __restrict__ u_res,
                                       T * __restrict__ v_res,
@@ -55,3 +58,4 @@ __global__ void vel_res_update_kernel(T * __restrict__ u_res,
 
 }
 
+#endif // __FLUID_VEL_RES_UPDATE_KERNEL__
\ No newline at end of file
diff --git a/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl b/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl
index c0a4cdb840a..ba18b53e5af 100644
--- a/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl
+++ b/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl
@@ -27,6 +27,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __FLUID_PNPN_RES_KERNEL__
+#define __FLUID_PNPN_RES_KERNEL__
+
 __kernel void prs_res_part1_kernel(__global real * __restrict__ ta1,
                                    __global real * __restrict__ ta2,
                                    __global real * __restrict__ ta3,
@@ -107,3 +110,5 @@ __kernel void vel_res_update_kernel(__global real * __restrict__ u_res,
   }
 
 }
+
+#endif // __FLUID_PNPN_RES_KERNEL__
\ No newline at end of file
diff --git a/src/fluid/bcknd/device/pnpn_res_device.F90 b/src/fluid/bcknd/device/pnpn_res_device.F90
index 8e676a2fa3d..2b7de4428af 100644
--- a/src/fluid/bcknd/device/pnpn_res_device.F90
+++ b/src/fluid/bcknd/device/pnpn_res_device.F90
@@ -47,7 +47,7 @@ module pnpn_res_device
   use scratch_registry, only : neko_scratch_registry
   implicit none
   private
- 
+
   type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t
    contains
      procedure, nopass :: compute => pnpn_prs_res_device_compute
@@ -59,7 +59,7 @@ module pnpn_res_device
   end type pnpn_vel_res_device_t
 
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, &
           wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, &
           B_d, h1_d, mu, rho, n) &
@@ -97,7 +97,7 @@ subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
        integer(c_int) :: n
      end subroutine pnpn_prs_res_part3_hip
   end interface
-  
+
   interface
      subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, &
           ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
@@ -149,7 +149,7 @@ subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
        integer(c_int) :: n
      end subroutine pnpn_prs_res_part3_cuda
   end interface
-  
+
   interface
      subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, &
           ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
@@ -201,7 +201,7 @@ subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) &
        integer(c_int) :: n
      end subroutine pnpn_prs_res_part3_opencl
   end interface
-  
+
   interface
      subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, &
           ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) &
@@ -216,7 +216,7 @@ end subroutine pnpn_vel_res_update_opencl
   end interface
 #endif
 
-  
+
 contains
 
   subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
@@ -251,7 +251,7 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
 
     n = u%dof%size()
     gdim = c_Xh%msh%gdim
-    
+
     call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh)
     call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh)
 
@@ -259,23 +259,23 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
 #ifdef HAVE_HIP
     call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, &
          wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
-         c_Xh%B_d, c_Xh%h1_d, mu, rho, n) 
+         c_Xh%B_d, c_Xh%h1_d, mu, rho, n)
 
 #elif HAVE_CUDA
     call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, &
          wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, &
-         c_Xh%B_d, c_Xh%h1_d, mu, rho, n) 
+         c_Xh%B_d, c_Xh%h1_d, mu, rho, n)
 #elif HAVE_OPENCL
     call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, &
          wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_z%x_d, f_z%x_d, &
-         c_Xh%B_d, c_Xh%h1_d, mu, rho, n) 
+         c_Xh%B_d, c_Xh%h1_d, mu, rho, n)
 #endif
-     c_Xh%ifh2 = .false.
+    c_Xh%ifh2 = .false.
     call device_cfill(c_Xh%h1_d,1.0_rp / rho,n)
-         
-    call gs_Xh%op(ta1, GS_OP_ADD) 
-    call gs_Xh%op(ta2, GS_OP_ADD) 
-    call gs_Xh%op(ta3, GS_OP_ADD) 
+
+    call gs_Xh%op(ta1, GS_OP_ADD)
+    call gs_Xh%op(ta2, GS_OP_ADD)
+    call gs_Xh%op(ta3, GS_OP_ADD)
 
     call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_Xh%Binv_d, gdim, n)
 
@@ -286,11 +286,11 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
     call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh)
 
 #ifdef HAVE_HIP
-    call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n);
+    call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
 #elif HAVE_CUDA
-    call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n);
+    call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
 #elif HAVE_OPENCL
-    call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n);
+    call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n)
 #endif
 
     !
@@ -303,13 +303,13 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
     call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, ta2%x_d, ta3%x_d)
 
 #ifdef HAVE_HIP
-    call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
 #elif HAVE_CUDA
-    call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
 #elif HAVE_OPENCL
-    call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n)
 #endif
-   !
+    !
     dtbd = bd / dt
 
     call device_rzero(ta1%x_d, n)
@@ -320,14 +320,14 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, &
          u%x_d, v%x_d, w%x_d)
 
 #ifdef HAVE_HIP
-    call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
 #elif HAVE_CUDA
-    call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
 #elif HAVE_OPENCL
-    call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n);
+    call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n)
 #endif
-        
-  call neko_scratch_registry%relinquish_field(temp_indices)
+
+    call neko_scratch_registry%relinquish_field(temp_indices)
 
   end subroutine pnpn_prs_res_device_compute
 
@@ -335,12 +335,12 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
        p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: p, u, v, w
     type(field_t), intent(inout) :: u_res, v_res, w_res
     type(field_t), intent(inout) :: f_x, f_y, f_z
     type(coef_t), intent(inout) :: c_Xh
-    real(kind=rp), intent(in) :: mu 
+    real(kind=rp), intent(in) :: mu
     real(kind=rp), intent(in) :: rho
     real(kind=rp), intent(in) :: bd
     real(kind=rp), intent(in) :: dt
@@ -351,7 +351,7 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
     call device_cfill(c_Xh%h1_d, mu, n)
     call device_cfill(c_Xh%h2_d, rho * (bd / dt), n)
     c_Xh%ifh2 = .true.
-    
+
     call Ax%compute(u_res%x, u%x, c_Xh, msh, Xh)
     call Ax%compute(v_res%x, v%x, c_Xh, msh, Xh)
     call Ax%compute(w_res%x, w%x, c_Xh, msh, Xh)
@@ -372,7 +372,7 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, &
     call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, &
          ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n)
 #endif
-    
+
     call neko_scratch_registry%relinquish_field(temp_indices)
   end subroutine pnpn_vel_res_device_compute
 
diff --git a/src/fluid/bcknd/sx/pnpn_res_sx.f90 b/src/fluid/bcknd/sx/pnpn_res_sx.f90
index 477d2cb7531..2c849077c36 100644
--- a/src/fluid/bcknd/sx/pnpn_res_sx.f90
+++ b/src/fluid/bcknd/sx/pnpn_res_sx.f90
@@ -6,14 +6,14 @@ module pnpn_res_sx
   use ax_product, only : ax_t
   use coefs, only : coef_t
   use facet_normal, only : facet_normal_t
-  use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t    
+  use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t
   use scratch_registry, only: neko_scratch_registry
   use mesh, only : mesh_t
   use num_types, only : rp
   use space, only : space_t
   implicit none
   private
-  
+
   type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_sx_t
    contains
      procedure, nopass :: compute => pnpn_prs_res_sx_compute
@@ -63,7 +63,7 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
        c_Xh%h2(i,1,1,1) = 0.0_rp
     end do
     c_Xh%ifh2 = .false.
-    
+
     call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh)
     call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh)
 
@@ -78,17 +78,17 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
        ta2%x(i,1,1,1) = f_y%x(i,1,1,1) / rho - wa2%x(i,1,1,1)
        ta3%x(i,1,1,1) = f_z%x(i,1,1,1) / rho - wa3%x(i,1,1,1)
     end do
-     
-    call gs_Xh%op(ta1, GS_OP_ADD) 
-    call gs_Xh%op(ta2, GS_OP_ADD) 
-    call gs_Xh%op(ta3, GS_OP_ADD) 
+
+    call gs_Xh%op(ta1, GS_OP_ADD)
+    call gs_Xh%op(ta2, GS_OP_ADD)
+    call gs_Xh%op(ta3, GS_OP_ADD)
 
     do i = 1, n
        ta1%x(i,1,1,1) = ta1%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
        ta2%x(i,1,1,1) = ta2%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
        ta3%x(i,1,1,1) = ta3%x(i,1,1,1) * c_Xh%Binv(i,1,1,1)
     end do
-    
+
     call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh)
 
     call cdtp(wa1%x, ta1%x, c_Xh%drdx, c_Xh%dsdx, c_Xh%dtdx, c_Xh)
@@ -107,16 +107,16 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, &
        wa2%x(i,1,1,1) = 0.0_rp
        wa3%x(i,1,1,1) = 0.0_rp
     end do
-    
+
     call bc_sym_surface%apply_surfvec(wa1%x,wa2%x,wa3%x,ta1%x, ta2%x, ta3%x, n)
-    
+
     dtbd = bd / dt
     do i = 1, n
        ta1%x(i,1,1,1) = 0.0_rp
        ta2%x(i,1,1,1) = 0.0_rp
        ta3%x(i,1,1,1) = 0.0_rp
     end do
-    
+
     call bc_prs_surface%apply_surfvec(ta1%x, ta2%x, ta3%x, u%x, v%x, w%x, n)
 
     do i = 1, n
@@ -132,7 +132,7 @@ subroutine pnpn_vel_res_sx_compute(Ax, u, v, w, u_res, v_res, w_res, &
        p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: p, u, v, w
     type(field_t), intent(inout) :: u_res, v_res, w_res
     type(field_t), intent(inout) :: f_x, f_y, f_z
@@ -167,8 +167,8 @@ subroutine pnpn_vel_res_sx_compute(Ax, u, v, w, u_res, v_res, w_res, &
        v_res%x(i,1,1,1) = (-v_res%x(i,1,1,1)) - ta2%x(i,1,1,1) + f_y%x(i,1,1,1)
        w_res%x(i,1,1,1) = (-w_res%x(i,1,1,1)) - ta3%x(i,1,1,1) + f_z%x(i,1,1,1)
     end do
-    
+
     call neko_scratch_registry%relinquish_field(temp_indices)
-  end subroutine pnpn_vel_res_sx_compute  
-     
+  end subroutine pnpn_vel_res_sx_compute
+
 end module pnpn_res_sx
diff --git a/src/fluid/flow_ic.f90 b/src/fluid/flow_ic.f90
index 3e0d9e1587c..194eeec267d 100644
--- a/src/fluid/flow_ic.f90
+++ b/src/fluid/flow_ic.f90
@@ -43,16 +43,16 @@ module flow_ic
   use math
   use user_intf, only : useric
   use json_module, only : json_file
-  use json_utils, only: json_get, json_get_or_default
+  use json_utils, only: json_get
   implicit none
   private
 
   interface set_flow_ic
      module procedure set_flow_ic_int, set_flow_ic_usr
-  end interface
+  end interface set_flow_ic
 
   public :: set_flow_ic
-  
+
 contains
 
   !> Set initial flow condition (builtin)
@@ -71,7 +71,7 @@ subroutine set_flow_ic_int(u, v, w, p, coef, gs, type, params)
     real(kind=rp), allocatable :: uinf(:)
     character(len=:), allocatable :: blasius_approximation
 
-    if (trim(type) .eq. 'uniform') then       
+    if (trim(type) .eq. 'uniform') then
        call json_get(params, 'case.fluid.initial_condition.value', uinf)
        call set_flow_ic_uniform(u, v, w, uinf)
     else if (trim(type) .eq. 'blasius') then
@@ -83,9 +83,9 @@ subroutine set_flow_ic_int(u, v, w, p, coef, gs, type, params)
     else
        call neko_error('Invalid initial condition')
     end if
-    
+
     call set_flow_ic_common(u, v, w, p, coef, gs)
-    
+
   end subroutine set_flow_ic_int
 
   !> Set intial flow condition (user defined)
@@ -100,9 +100,9 @@ subroutine set_flow_ic_usr(u, v, w, p, coef, gs, usr_ic, params)
     type(json_file), intent(inout) :: params
 
     call usr_ic(u, v, w, p, params)
-    
+
     call set_flow_ic_common(u, v, w, p, coef, gs)
-    
+
   end subroutine set_flow_ic_usr
 
   subroutine set_flow_ic_common(u, v, w, p, coef, gs)
@@ -112,17 +112,20 @@ subroutine set_flow_ic_common(u, v, w, p, coef, gs)
     type(field_t), intent(inout) :: p
     type(coef_t), intent(in) :: coef
     type(gs_t), intent(inout) :: gs
-    
-    if (NEKO_BCKND_DEVICE .eq. 1) then 
-       call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE)
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
-    
+
     ! Ensure continuity across elements for initial conditions
-    call gs%op(u%x, u%dof%size(), GS_OP_ADD) 
-    call gs%op(v%x, v%dof%size(), GS_OP_ADD) 
-    call gs%op(w%x, w%dof%size(), GS_OP_ADD) 
+    call gs%op(u%x, u%dof%size(), GS_OP_ADD)
+    call gs%op(v%x, v%dof%size(), GS_OP_ADD)
+    call gs%op(w%x, w%dof%size(), GS_OP_ADD)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_col2(u%x_d, coef%mult_d, u%dof%size())
@@ -133,7 +136,7 @@ subroutine set_flow_ic_common(u, v, w, p, coef, gs)
        call col2(v%x, coef%mult, v%dof%size())
        call col2(w%x, coef%mult, w%dof%size())
     end if
-    
+
   end subroutine set_flow_ic_common
 
   !> Uniform initial condition
@@ -181,7 +184,7 @@ subroutine set_flow_ic_blasius(u, v, w, delta, uinf, type)
     case default
        call neko_error('Invalid Blasius approximation')
     end select
-    
+
     if ((uinf(1) .gt. 0.0_rp) .and. (uinf(2) .eq. 0.0_rp) &
          .and. (uinf(3) .eq. 0.0_rp)) then
        do i = 1, u%dof%size()
@@ -202,9 +205,9 @@ subroutine set_flow_ic_blasius(u, v, w, delta, uinf, type)
           u%x(i,1,1,1) = 0.0_rp
           v%x(i,1,1,1) = 0.0_rp
           w%x(i,1,1,1) = bla(u%dof%y(i,1,1,1), delta, uinf(3))
-       end do       
+       end do
     end if
-    
+
   end subroutine set_flow_ic_blasius
-  
+
 end module flow_ic
diff --git a/src/fluid/flow_profile.f90 b/src/fluid/flow_profile.f90
index ec380c511d4..32e5ef29506 100644
--- a/src/fluid/flow_profile.f90
+++ b/src/fluid/flow_profile.f90
@@ -30,7 +30,7 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Defines a flow profile 
+!> Defines a flow profile
 module flow_profile
   use num_types
   implicit none
@@ -59,7 +59,7 @@ function blasius_linear(y, delta, u)
     else
        blasius_linear = u * (y / delta)
     end if
-    
+
   end function blasius_linear
 
   !> Quadratic approximate Blasius Profile
@@ -76,7 +76,7 @@ function blasius_quadratic(y, delta, u)
     else
        blasius_quadratic = u * arg
     end if
-    
+
   end function blasius_quadratic
 
   !> Cubic approximate Blasius Profile
@@ -93,7 +93,7 @@ function blasius_cubic(y, delta, u)
     else
        blasius_cubic = u * arg
     end if
-    
+
   end function blasius_cubic
 
   !> Quartic approximate Blasius Profile
@@ -111,7 +111,7 @@ function blasius_quartic(y, delta, u)
     else
        blasius_quartic = u * arg
     end if
-    
+
   end function blasius_quartic
 
   !> Sinusoidal approximate Blasius Profile
@@ -129,7 +129,7 @@ function blasius_sin(y, delta, u)
     else
        blasius_sin = u * sin(arg)
     end if
-    
+
   end function blasius_sin
-    
+
 end module flow_profile
diff --git a/src/fluid/fluid_aux.f90 b/src/fluid/fluid_aux.f90
index 5641e694cb1..5f8bed604ae 100644
--- a/src/fluid/fluid_aux.f90
+++ b/src/fluid/fluid_aux.f90
@@ -5,11 +5,14 @@ module fluid_aux
   use krylov, only : ksp_monitor_t
   use, intrinsic :: ieee_arithmetic, only: ieee_is_nan
   implicit none
+  private
+
+  public :: fluid_step_info
 
 contains
 
   !> Prints for prs, velx, vely, velz the following:
-  !! Number of iterations, start residual, end residual 
+  !! Number of iterations, start residual, end residual
   subroutine fluid_step_info(step, t, dt, ksp_results)
     type(ksp_monitor_t), intent(in) :: ksp_results(4)
     integer, intent(in) :: step
@@ -58,7 +61,7 @@ subroutine fluid_step_info(step, t, dt, ksp_results)
           stop
        end if
     end do
-    
+
   end subroutine fluid_step_info
 
 
diff --git a/src/fluid/fluid_fctry.f90 b/src/fluid/fluid_fctry.f90
index 5773c081ba4..75e11c9254a 100644
--- a/src/fluid/fluid_fctry.f90
+++ b/src/fluid/fluid_fctry.f90
@@ -34,7 +34,7 @@
 module fluid_fctry
   use fluid_scheme, only : fluid_scheme_t
 !  use fluid_plan1, only : fluid_plan1_t
-  use fluid_pnpn, only : fluid_pnpn_t    
+  use fluid_pnpn, only : fluid_pnpn_t
   use utils, only : neko_error
   use neko_config
   implicit none
@@ -53,7 +53,7 @@ subroutine fluid_scheme_factory(fluid, fluid_scheme)
     else
        call neko_error('Invalid fluid scheme')
     end if
-    
+
   end subroutine fluid_scheme_factory
 
 end module fluid_fctry
diff --git a/src/fluid/fluid_plan1.f90 b/src/fluid/fluid_plan1.f90
index cc7096544e7..3b1b7deff2e 100644
--- a/src/fluid/fluid_plan1.f90
+++ b/src/fluid/fluid_plan1.f90
@@ -22,17 +22,17 @@ subroutine fluid_plan1_init(this, msh, lx, param)
     class(fluid_plan1_t), target, intent(inout) :: this
     type(mesh_t), target, intent(inout) :: msh
     integer, intent(inout) :: lx
-    type(param_t), target, intent(inout) :: param        
+    type(param_t), target, intent(inout) :: param
     character(len=15), parameter :: scheme = 'plan1 (Pn/Pn-2)'
     integer :: lx2
 
     call this%free()
-    
+
     !> Setup velocity fields on the space \f$ Xh \f$
     call this%scheme_init(msh, lx, param, kspv_init=.true., scheme=scheme)
 
     !> Setup pressure field and related space \f$ Yh \f$
-    lx2 = lx - 2        
+    lx2 = lx - 2
     if (msh%gdim .eq. 2) then
        call this%Yh%init(GLL, lx2, lx2)
     else
@@ -40,19 +40,19 @@ subroutine fluid_plan1_init(this, msh, lx, param)
     end if
 
     this%dm_Yh = dofmap_t(msh, this%Yh)
-        
+
     call this%p%init(this%dm_Yh)
 
     call gs_init(this%gs_Yh, this%dm_Yh)
 
     call this%c_Yh%init(this%gs_Yh)
-    
+
     call fluid_scheme_solver_factory(this%ksp_prs, this%dm_Yh%size(), &
          param%ksp_prs, param%abstol_prs)
     call fluid_scheme_precon_factory(this%pc_prs, this%ksp_prs, &
          this%c_Yh, this%dm_Yh, this%gs_Yh, this%bclst_prs, param%pc_prs)
-    
-    
+
+
   end subroutine fluid_plan1_init
 
   subroutine fluid_plan1_free(this)
@@ -67,17 +67,18 @@ subroutine fluid_plan1_free(this)
     call gs_free(this%gs_Yh)
 
     call this%c_Yh%free()
-    
+
   end subroutine fluid_plan1_free
 
-  subroutine fluid_plan1_step(this, t, tstep, ext_bdf)
-    class(fluid_plan1_t), intent(inout) :: this
+  subroutine fluid_plan1_step(this, t, tstep, ext_bdf, dt_controller)
+    class(fluid_plan1_t), target, intent(inout) :: this
     real(kind=rp), intent(inout) :: t
     integer, intent(inout) :: tstep
     type(time_scheme_controller_t), intent(inout) :: ext_bdf
+    type(time_step_controller_t), intent(in) :: dt_controller
 
     if (this%freeze) return
 
   end subroutine fluid_plan1_step
-  
+
 end module fluid_plan1
diff --git a/src/fluid/fluid_pnpn.f90 b/src/fluid/fluid_pnpn.f90
index bd36238cb89..480c661c420 100644
--- a/src/fluid/fluid_pnpn.f90
+++ b/src/fluid/fluid_pnpn.f90
@@ -32,38 +32,56 @@
 !
 !> Modular version of the Classic Nek5000 Pn/Pn formulation for fluids
 module fluid_pnpn
-  use pnpn_res_fctry
-  use ax_helm_fctry
-  use rhs_maker_fctry
-  use fluid_volflow
-  use fluid_scheme
-  use field_series  
-  use facet_normal
-  use device_math
-  use device_mathops
-  use fluid_aux    
-  use time_scheme_controller
-  use projection
-  use device
-  use logger
-  use advection
-  use profiler
-  use json_utils, only : json_get, json_get_or_default
+  use num_types, only : rp
+  use krylov, only : ksp_monitor_t
+  use pnpn_res_fctry, only : pnpn_prs_res_factory, pnpn_vel_res_factory
+  use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t
+  use ax_helm_fctry, only : ax_helm_factory
+  use rhs_maker_fctry, only : rhs_maker_sumab_fctry, rhs_maker_bdf_fctry, &
+                              rhs_maker_ext_fctry
+  use rhs_maker, only : rhs_maker_sumab_t, rhs_maker_bdf_t, rhs_maker_ext_t
+  use fluid_volflow, only : fluid_volflow_t
+  use fluid_scheme, only : fluid_scheme_t
+  use field_series, only : field_series_t
+  use device_math, only : device_add2, device_col2
+  use device_mathops, only : device_opcolv, device_opadd2cm
+  use fluid_aux, only : fluid_step_info
+  use time_scheme_controller, only : time_scheme_controller_t
+  use projection, only : projection_t
+  use device, only : device_memcpy, HOST_TO_DEVICE
+  use logger, only : neko_log
+  use advection, only : advection_t
+  use profiler, only : profiler_start_region, profiler_end_region
+  use json_utils, only : json_get
   use json_module, only : json_file
   use material_properties, only : material_properties_t
+  use advection_fctry, only : advection_factory
+  use ax_product, only : ax_t
+  use field, only : field_t
+  use dirichlet, only : dirichlet_t
+  use facet_normal, only : facet_normal_t
+  use non_normal, only : non_normal_t
+  use mesh, only : mesh_t
+  use user_intf, only : user_t
+  use coefs, only : coef_t
+  use time_step_controller, only : time_step_controller_t
+  use gather_scatter, only : gs_t, GS_OP_ADD
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use math, only : col2, add2
+  use mathops, only : opadd2cm, opcolv
+  use bc, only: bc_list_t, bc_list_init, bc_list_add, bc_list_free, &
+                bc_list_apply_scalar, bc_list_apply_vector
   implicit none
   private
 
-  
+
   type, public, extends(fluid_scheme_t) :: fluid_pnpn_t
      type(field_t) :: p_res, u_res, v_res, w_res
 
-     type(field_series_t) :: ulag, vlag, wlag
-
      type(field_t) :: dp, du, dv, dw
 
      class(ax_t), allocatable :: Ax
-     
+
      type(projection_t) :: proj_prs
      type(projection_t) :: proj_u
      type(projection_t) :: proj_v
@@ -72,20 +90,23 @@ module fluid_pnpn
      type(facet_normal_t) :: bc_prs_surface !< Surface term in pressure rhs
      type(facet_normal_t) :: bc_sym_surface !< Surface term in pressure rhs
      type(dirichlet_t) :: bc_vel_res   !< Dirichlet condition vel. res.
-     type(dirichlet_t) :: bc_dp   !< Dirichlet condition vel. res.
+     type(dirichlet_t) :: bc_field_dirichlet_p   !< Dirichlet condition vel. res.
+     type(dirichlet_t) :: bc_field_dirichlet_u   !< Dirichlet condition vel. res.
+     type(dirichlet_t) :: bc_field_dirichlet_v   !< Dirichlet condition vel. res.
+     type(dirichlet_t) :: bc_field_dirichlet_w   !< Dirichlet condition vel. res.
      type(non_normal_t) :: bc_vel_res_non_normal   !< Dirichlet condition vel. res.
-     type(bc_list_t) :: bclst_vel_res  
+     type(bc_list_t) :: bclst_vel_res
      type(bc_list_t) :: bclst_du
      type(bc_list_t) :: bclst_dv
      type(bc_list_t) :: bclst_dw
-     type(bc_list_t) :: bclst_dp  
+     type(bc_list_t) :: bclst_dp
 
-     class(advection_t), allocatable :: adv 
+     class(advection_t), allocatable :: adv
 
      ! Time variables
      type(field_t) :: abx1, aby1, abz1
      type(field_t) :: abx2, aby2, abz2
-     
+
      !> Pressure residual
      class(pnpn_prs_res_t), allocatable :: prs_res
 
@@ -103,7 +124,7 @@ module fluid_pnpn
 
      !> Adjust flow volume
      type(fluid_volflow_t) :: vol_flow
-     
+
    contains
      procedure, pass(this) :: init => fluid_pnpn_init
      procedure, pass(this) :: free => fluid_pnpn_free
@@ -112,21 +133,21 @@ module fluid_pnpn
   end type fluid_pnpn_t
 
 contains
-  
+
   subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
     class(fluid_pnpn_t), target, intent(inout) :: this
     type(mesh_t), target, intent(inout) :: msh
     integer, intent(inout) :: lx
     type(json_file), target, intent(inout) :: params
     type(user_t), intent(in) :: user
-    type(material_properties_t), intent(inout) :: material_properties
+    type(material_properties_t), target, intent(inout) :: material_properties
     character(len=15), parameter :: scheme = 'Modular (Pn/Pn)'
     logical :: found, logical_val
     integer :: integer_val
     real(kind=rp) :: real_val
 
     call this%free()
-    
+
     ! Initialize base class
     call this%scheme_init(msh, lx, params, .true., .true., scheme, user, &
                           material_properties)
@@ -148,7 +169,7 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
 
     ! Setup backend depenent contributions to F from lagged BD terms
     call rhs_maker_bdf_fctry(this%makebdf)
-    
+
     ! Initialize variables specific to this plan
     associate(Xh_lx => this%Xh%lx, Xh_ly => this%Xh%ly, Xh_lz => this%Xh%lz, &
          dm_Xh => this%dm_Xh, nelv => this%msh%nelv)
@@ -156,7 +177,7 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
       call this%p_res%init(dm_Xh, "p_res")
       call this%u_res%init(dm_Xh, "u_res")
       call this%v_res%init(dm_Xh, "v_res")
-      call this%w_res%init(dm_Xh, "w_res")            
+      call this%w_res%init(dm_Xh, "w_res")
       call this%abx1%init(dm_Xh, "abx1")
       call this%aby1%init(dm_Xh, "aby1")
       call this%abz1%init(dm_Xh, "abz1")
@@ -169,34 +190,36 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
       this%abx2 = 0.0_rp
       this%aby2 = 0.0_rp
       this%abz2 = 0.0_rp
-                  
+
       call this%du%init(dm_Xh, 'du')
       call this%dv%init(dm_Xh, 'dv')
       call this%dw%init(dm_Xh, 'dw')
       call this%dp%init(dm_Xh, 'dp')
 
-      call this%ulag%init(this%u, 2)
-      call this%vlag%init(this%v, 2)
-      call this%wlag%init(this%w, 2)
-      
     end associate
-    
+
     ! Initialize velocity surface terms in pressure rhs
-    call this%bc_prs_surface%init(this%dm_Xh)
+    call this%bc_prs_surface%init(this%c_Xh)
     call this%bc_prs_surface%mark_zone(msh%inlet)
     call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,&
                                                  'v', this%bc_labels)
+    !This impacts the rhs of the pressure, need to check what is correct to add here
+    call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,&
+                                                 'd_vel_u', this%bc_labels)
+    call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,&
+                                                 'd_vel_v', this%bc_labels)
+    call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,&
+                                                 'd_vel_w', this%bc_labels)
     call this%bc_prs_surface%finalize()
-    call this%bc_prs_surface%set_coef(this%c_Xh)
     ! Initialize symmetry surface terms in pressure rhs
-    call this%bc_sym_surface%init(this%dm_Xh)
+    call this%bc_sym_surface%init(this%c_Xh)
     call this%bc_sym_surface%mark_zone(msh%sympln)
     call this%bc_sym_surface%mark_zones_from_list(msh%labeled_zones,&
                                                  'sym', this%bc_labels)
+    ! Same here, should du, dv, dw be marked here?
     call this%bc_sym_surface%finalize()
-    call this%bc_sym_surface%set_coef(this%c_Xh)
     ! Initialize dirichlet bcs for velocity residual
-    call this%bc_vel_res_non_normal%init(this%dm_Xh)
+    call this%bc_vel_res_non_normal%init(this%c_Xh)
     call this%bc_vel_res_non_normal%mark_zone(msh%outlet_normal)
     call this%bc_vel_res_non_normal%mark_zones_from_list(msh%labeled_zones,&
                                                          'on', this%bc_labels)
@@ -204,21 +227,41 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
                                                          'on+dong', &
                                                          this%bc_labels)
     call this%bc_vel_res_non_normal%finalize()
-    call this%bc_vel_res_non_normal%init_msk(this%c_Xh)    
+    call this%bc_vel_res_non_normal%init_msk()
 
-    call this%bc_dp%init(this%dm_Xh)
-    call this%bc_dp%mark_zones_from_list(msh%labeled_zones, 'on+dong', &
+    call this%bc_field_dirichlet_p%init(this%c_Xh)
+    call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, 'on+dong', &
                                          this%bc_labels)
-    call this%bc_dp%mark_zones_from_list(msh%labeled_zones, &
+    call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, &
                                          'o+dong', this%bc_labels)
-    call this%bc_dp%finalize()
-    call this%bc_dp%set_g(0.0_rp)
+    call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, 'd_pres', &
+                                         this%bc_labels)
+    call this%bc_field_dirichlet_p%finalize()
+    call this%bc_field_dirichlet_p%set_g(0.0_rp)
     call bc_list_init(this%bclst_dp)
-    call bc_list_add(this%bclst_dp, this%bc_dp)
+    call bc_list_add(this%bclst_dp, this%bc_field_dirichlet_p)
     !Add 0 prs bcs
     call bc_list_add(this%bclst_dp, this%bc_prs)
 
-    call this%bc_vel_res%init(this%dm_Xh)
+    call this%bc_field_dirichlet_u%init(this%c_Xh)
+    call this%bc_field_dirichlet_u%mark_zones_from_list(msh%labeled_zones, 'd_vel_u', &
+                                         this%bc_labels)
+    call this%bc_field_dirichlet_u%finalize()
+    call this%bc_field_dirichlet_u%set_g(0.0_rp)
+
+    call this%bc_field_dirichlet_v%init(this%c_Xh)
+    call this%bc_field_dirichlet_v%mark_zones_from_list(msh%labeled_zones, 'd_vel_v', &
+                                         this%bc_labels)
+    call this%bc_field_dirichlet_v%finalize()
+    call this%bc_field_dirichlet_v%set_g(0.0_rp)
+
+    call this%bc_field_dirichlet_w%init(this%c_Xh)
+    call this%bc_field_dirichlet_w%mark_zones_from_list(msh%labeled_zones, 'd_vel_w', &
+                                         this%bc_labels)
+    call this%bc_field_dirichlet_w%finalize()
+    call this%bc_field_dirichlet_w%set_g(0.0_rp)
+
+    call this%bc_vel_res%init(this%c_Xh)
     call this%bc_vel_res%mark_zone(msh%inlet)
     call this%bc_vel_res%mark_zone(msh%wall)
     call this%bc_vel_res%mark_zones_from_list(msh%labeled_zones, &
@@ -237,45 +280,41 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties)
     call bc_list_add(this%bclst_du,this%bc_sym%bc_x)
     call bc_list_add(this%bclst_du,this%bc_vel_res_non_normal%bc_x)
     call bc_list_add(this%bclst_du, this%bc_vel_res)
+    call bc_list_add(this%bclst_du, this%bc_field_dirichlet_u)
 
     call bc_list_init(this%bclst_dv)
     call bc_list_add(this%bclst_dv,this%bc_sym%bc_y)
     call bc_list_add(this%bclst_dv,this%bc_vel_res_non_normal%bc_y)
     call bc_list_add(this%bclst_dv, this%bc_vel_res)
+    call bc_list_add(this%bclst_dv, this%bc_field_dirichlet_v)
 
     call bc_list_init(this%bclst_dw)
     call bc_list_add(this%bclst_dw,this%bc_sym%bc_z)
     call bc_list_add(this%bclst_dw,this%bc_vel_res_non_normal%bc_z)
     call bc_list_add(this%bclst_dw, this%bc_vel_res)
+    call bc_list_add(this%bclst_dw, this%bc_field_dirichlet_w)
 
     !Intialize projection space thingy
-    if (this%pr_projection_dim .gt. 0) then
-       call this%proj_prs%init(this%dm_Xh%size(), this%pr_projection_dim)
-    end if
-    
-    if (this%vel_projection_dim .gt. 0) then
-       call this%proj_u%init(this%dm_Xh%size(), this%vel_projection_dim)
-       call this%proj_v%init(this%dm_Xh%size(), this%vel_projection_dim)
-       call this%proj_w%init(this%dm_Xh%size(), this%vel_projection_dim)
-    end if
+    call this%proj_prs%init(this%dm_Xh%size(), this%pr_projection_dim, &
+                              this%pr_projection_activ_step)
+
+    call this%proj_u%init(this%dm_Xh%size(), this%vel_projection_dim, &
+                              this%vel_projection_activ_step)
+    call this%proj_v%init(this%dm_Xh%size(), this%vel_projection_dim, &
+                              this%vel_projection_activ_step)
+    call this%proj_w%init(this%dm_Xh%size(), this%vel_projection_dim, &
+                              this%vel_projection_activ_step)
+
 
     ! Add lagged term to checkpoint
-    call this%chkp%add_lag(this%ulag, this%vlag, this%wlag)    
-
-    call json_get(params, 'case.numerics.dealias', logical_val)
-    call params%get('case.numerics.dealiased_polynomial_order', integer_val, &
-                    found)
-    if (.not. found) then
-       call json_get(params, 'case.numerics.polynomial_order', integer_val)
-       integer_val =  3.0_rp / 2.0_rp * (integer_val + 1) - 1
-    end if
-    ! an extra +1 below to go from poly order to space size
-    call advection_factory(this%adv, this%c_Xh, logical_val, integer_val + 1)
+    call this%chkp%add_lag(this%ulag, this%vlag, this%wlag)
+
+    call advection_factory(this%adv, params, this%c_Xh)
 
     if (params%valid_path('case.fluid.flow_rate_force')) then
        call this%vol_flow%init(this%dm_Xh, params)
     end if
-    
+
   end subroutine fluid_pnpn_init
 
   subroutine fluid_pnpn_restart(this,dtlag, tlag)
@@ -285,7 +324,9 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag)
     integer :: i, n
 
     n = this%u%dof%size()
-    ! Make sure that continuity is maintained (important for interpolation) 
+    ! Make sure that continuity is maintained (important for interpolation)
+    ! Do not do this for lagged rhs
+    ! (derivatives are not necessairly coninous across elements)
     call col2(this%u%x,this%c_Xh%mult,this%u%dof%size())
     call col2(this%v%x,this%c_Xh%mult,this%u%dof%size())
     call col2(this%w%x,this%c_Xh%mult,this%u%dof%size())
@@ -295,44 +336,48 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag)
        call col2(this%vlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size())
        call col2(this%wlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size())
     end do
-    
+
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
        associate(u=>this%u, v=>this%v, w=>this%w, &
             ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag,&
             p=>this%p)
-       call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, &
-                          u%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, &
-                          u%dof%size(), HOST_TO_DEVICE)
-  
-       call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, &
-                          v%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, &
-                          v%dof%size(), HOST_TO_DEVICE)
-    
-       call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%abx1%x, this%abx1%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%abx2%x, this%abx2%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%aby1%x, this%aby1%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%aby2%x, this%aby2%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%abz1%x, this%abz1%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
-       call device_memcpy(this%abz2%x, this%abz2%x_d, &
-                          w%dof%size(), HOST_TO_DEVICE)
+         call device_memcpy(u%x, u%x_d, u%dof%size(), &
+                            HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(v%x, v%x_d, v%dof%size(), &
+                            HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(w%x, w%x_d, w%dof%size(), &
+                            HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(p%x, p%x_d, p%dof%size(), &
+                            HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, &
+                            u%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, &
+                            u%dof%size(), HOST_TO_DEVICE, sync=.false.)
+
+         call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, &
+                            v%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, &
+                            v%dof%size(), HOST_TO_DEVICE, sync=.false.)
+
+         call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%abx1%x, this%abx1%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%abx2%x, this%abx2%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%aby1%x, this%aby1%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%aby2%x, this%aby2%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%abz1%x, this%abz1%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(this%abz2%x, this%abz2%x_d, &
+                            w%dof%size(), HOST_TO_DEVICE, sync=.false.)
        end associate
-   end if
-
+    end if
 
 
     call this%gs_Xh%op(this%u,GS_OP_ADD)
@@ -346,33 +391,83 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag)
        call this%gs_Xh%op(this%wlag%lf(i),GS_OP_ADD)
     end do
 
+
+    !! If we would decide to only restart from lagged fields instead of asving abx1, aby1 etc.
+    !! Observe that one also needs to recompute the focing at the old time steps
+    !u_temp = this%ulag%lf(2)
+    !v_temp = this%vlag%lf(2)
+    !w_temp = this%wlag%lf(2)
+    !! Compute the source terms
+    !call this%source_term%compute(tlag(2), -1)
+    !
+    !! Pre-multiply the source terms with the mass matrix.
+    !if (NEKO_BCKND_DEVICE .eq. 1) then
+    !   call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n)
+    !else
+    !   call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n)
+    !end if
+
+    !! Add the advection operators to the right-hand-side.
+    !call this%adv%compute(u_temp, v_temp, w_temp, &
+    !                      this%f_x%x, this%f_y%x, this%f_z%x, &
+    !                      this%Xh, this%c_Xh, this%dm_Xh%size())
+    !this%abx2 = this%f_x
+    !this%aby2 = this%f_y
+    !this%abz2 = this%f_z
+    !
+    !u_temp = this%ulag%lf(1)
+    !v_temp = this%vlag%lf(1)
+    !w_temp = this%wlag%lf(1)
+    !call this%source_term%compute(tlag(1), 0)
+
+    !! Pre-multiply the source terms with the mass matrix.
+    !if (NEKO_BCKND_DEVICE .eq. 1) then
+    !   call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n)
+    !else
+    !   call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n)
+    !end if
+
+    !! Pre-multiply the source terms with the mass matrix.
+    !if (NEKO_BCKND_DEVICE .eq. 1) then
+    !   call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n)
+    !else
+    !   call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n)
+    !end if
+
+    !call this%adv%compute(u_temp, v_temp, w_temp, &
+    !                      this%f_x%x, this%f_y%x, this%f_z%x, &
+    !                      this%Xh, this%c_Xh, this%dm_Xh%size())
+    !this%abx1 = this%f_x
+    !this%aby1 = this%f_y
+    !this%abz1 = this%f_z
+
   end subroutine fluid_pnpn_restart
-  
+
   subroutine fluid_pnpn_free(this)
     class(fluid_pnpn_t), intent(inout) :: this
 
     !Deallocate velocity and pressure fields
     call this%scheme_free()
 
-    call this%bc_prs_surface%free() 
-    call this%bc_sym_surface%free()  
+    call this%bc_prs_surface%free()
+    call this%bc_sym_surface%free()
     call bc_list_free(this%bclst_vel_res)
     call bc_list_free(this%bclst_dp)
     call this%proj_prs%free()
     call this%proj_u%free()
     call this%proj_v%free()
     call this%proj_w%free()
-   
+
     call this%p_res%free()
     call this%u_res%free()
     call this%v_res%free()
     call this%w_res%free()
-    
+
     call this%du%free()
     call this%dv%free()
     call this%dw%free()
     call this%dp%free()
-    
+
     call this%abx1%free()
     call this%aby1%free()
     call this%abz1%free()
@@ -380,7 +475,7 @@ subroutine fluid_pnpn_free(this)
     call this%abx2%free()
     call this%aby2%free()
     call this%abz2%free()
-    
+
     if (allocated(this%Ax)) then
        deallocate(this%Ax)
     end if
@@ -388,7 +483,7 @@ subroutine fluid_pnpn_free(this)
     if (allocated(this%prs_res)) then
        deallocate(this%prs_res)
     end if
-    
+
     if (allocated(this%vel_res)) then
        deallocate(this%vel_res)
     end if
@@ -406,11 +501,7 @@ subroutine fluid_pnpn_free(this)
     end if
 
     call this%vol_flow%free()
-    
-    call this%ulag%free()
-    call this%vlag%free()
-    call this%wlag%free()
-    
+
   end subroutine fluid_pnpn_free
 
   !> Advance fluid simulation in time.
@@ -418,19 +509,21 @@ end subroutine fluid_pnpn_free
   !! @param tstep The current interation.
   !! @param dt The timestep
   !! @param ext_bdf Time integration logic.
-  subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
-    class(fluid_pnpn_t), intent(inout) :: this
+  !! @param dt_controller timestep controller
+  subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf, dt_controller)
+    class(fluid_pnpn_t), target, intent(inout) :: this
     real(kind=rp), intent(inout) :: t
     integer, intent(inout) :: tstep
     real(kind=rp), intent(in) :: dt
     type(time_scheme_controller_t), intent(inout) :: ext_bdf
+    type(time_step_controller_t), intent(in) :: dt_controller
     ! number of degrees of freedom
     integer :: n
     ! Solver results monitors (pressure + 3 velocity)
     type(ksp_monitor_t) :: ksp_results(4)
     ! Extrapolated velocity for the pressure residual
     type(field_t), pointer :: u_e, v_e, w_e
-    ! Indices for tracking temporary fields 
+    ! Indices for tracking temporary fields
     integer :: temp_indices(3)
     ! Counter
     integer :: i
@@ -439,7 +532,7 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
 
     n = this%dm_Xh%size()
 
-    call profiler_start_region('Fluid')
+    call profiler_start_region('Fluid', 1)
     associate(u => this%u, v => this%v, w => this%w, p => this%p, &
          du => this%du, dv => this%dv, dw => this%dw, dp => this%dp, &
          u_res =>this%u_res, v_res => this%v_res, w_res => this%w_res, &
@@ -452,21 +545,21 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
          makeabf => this%makeabf, makebdf => this%makebdf, &
          vel_projection_dim => this%vel_projection_dim, &
          pr_projection_dim => this%pr_projection_dim, &
-         ksp_vel_maxiter => this%ksp_vel_maxiter, &
-         ksp_pr_maxiter => this%ksp_pr_maxiter, &
          rho => this%rho, mu => this%mu, &
-         f_x => this%f_x, f_y => this%f_y, f_z => this%f_z)
-      
+         f_x => this%f_x, f_y => this%f_y, f_z => this%f_z, &
+         if_variable_dt => dt_controller%if_variable_dt, &
+         dt_last_change => dt_controller%dt_last_change)
+
       ! Get temporary arrays
       call this%scratch%request_field(u_e, temp_indices(1))
       call this%scratch%request_field(v_e, temp_indices(2))
       call this%scratch%request_field(w_e, temp_indices(3))
       call sumab%compute_fluid(u_e, v_e, w_e, u, v, w, &
            ulag, vlag, wlag, ext_bdf%advection_coeffs, ext_bdf%nadv)
-      
+
       ! Compute the source terms
       call this%source_term%compute(t, tstep)
-        
+
       ! Pre-multiply the source terms with the mass matrix.
       if (NEKO_BCKND_DEVICE .eq. 1) then
          call device_opcolv(f_x%x_d, f_y%x_d, f_z%x_d, c_Xh%B_d, msh%gdim, n)
@@ -482,7 +575,7 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
       ! At this point the RHS contains the sum of the advection operator and
       ! additional source terms, evaluated using the velocity field from the
       ! previous time-step. Now, this value is used in the explicit time
-      ! scheme to advance both terms in time. 
+      ! scheme to advance both terms in time.
       call makeabf%compute_fluid(this%abx1, this%aby1, this%abz1,&
                            this%abx2, this%aby2, this%abz2, &
                            f_x%x, f_y%x, f_z%x, &
@@ -497,49 +590,47 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
       call vlag%update()
       call wlag%update()
 
-      !> We assume that no change of boundary conditions 
+      !> We assume that no change of boundary conditions
       !! occurs between elements. I.e. we do not apply gsop here like in Nek5000
       !> Apply dirichlet
+      call this%dirichlet_update_(this%field_dirichlet_fields, &
+           this%field_dirichlet_bcs, this%c_Xh, t, tstep, "fluid")
+
       call this%bc_apply_vel(t, tstep)
       call this%bc_apply_prs(t, tstep)
 
       ! Compute pressure.
-      call profiler_start_region('Pressure residual')
+      call profiler_start_region('Pressure residual', 18)
       call prs_res%compute(p, p_res, u, v, w, u_e, v_e, w_e, &
                            f_x, f_y, f_z, c_Xh, gs_Xh, this%bc_prs_surface, &
                            this%bc_sym_surface, Ax, ext_bdf%diffusion_coeffs(1), &
                            dt, mu, rho)
-      
-      call gs_Xh%op(p_res, GS_OP_ADD) 
+
+      call gs_Xh%op(p_res, GS_OP_ADD)
       call bc_list_apply_scalar(this%bclst_dp, p_res%x, p%dof%size(), t, tstep)
       call profiler_end_region
 
-      if( tstep .gt. 5 .and. pr_projection_dim .gt. 0) then
-         call this%proj_prs%project_on(p_res%x, c_Xh, n)
-         call this%proj_prs%log_info('Pressure')
-      end if
-      
+      call this%proj_prs%pre_solving(p_res%x, tstep, c_Xh, n, dt_controller, 'Pressure')
+
       call this%pc_prs%update()
-      call profiler_start_region('Pressure solve')
+      call profiler_start_region('Pressure solve', 3)
       ksp_results(1) = &
-         this%ksp_prs%solve(Ax, dp, p_res%x, n, c_Xh,  this%bclst_dp, gs_Xh, &
-                            ksp_pr_maxiter)
+         this%ksp_prs%solve(Ax, dp, p_res%x, n, c_Xh,  this%bclst_dp, gs_Xh)
+
       call profiler_end_region
 
-      if( tstep .gt. 5 .and. pr_projection_dim .gt. 0) then
-         call this%proj_prs%project_back(dp%x, Ax, c_Xh, &
-                                         this%bclst_dp, gs_Xh, n)
-      end if
+      call this%proj_prs%post_solving(dp%x, Ax, c_Xh, &
+                                 this%bclst_dp, gs_Xh, n, tstep, dt_controller)
 
       if (NEKO_BCKND_DEVICE .eq. 1) then
          call device_add2(p%x_d, dp%x_d,n)
       else
          call add2(p%x, dp%x,n)
       end if
-      
+
 
       ! Compute velocity.
-      call profiler_start_region('Velocity residual')
+      call profiler_start_region('Velocity residual', 19)
       call vel_res%compute(Ax, u, v, w, &
                            u_res, v_res, w_res, &
                            p, &
@@ -547,43 +638,50 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
                            c_Xh, msh, Xh, &
                            mu, rho, ext_bdf%diffusion_coeffs(1), &
                            dt, dm_Xh%size())
-      
-      call gs_Xh%op(u_res, GS_OP_ADD) 
-      call gs_Xh%op(v_res, GS_OP_ADD) 
-      call gs_Xh%op(w_res, GS_OP_ADD) 
+
+      call gs_Xh%op(u_res, GS_OP_ADD)
+      call gs_Xh%op(v_res, GS_OP_ADD)
+      call gs_Xh%op(w_res, GS_OP_ADD)
 
       call bc_list_apply_vector(this%bclst_vel_res,&
                                 u_res%x, v_res%x, w_res%x, dm_Xh%size(),&
                                 t, tstep)
-      
-      call profiler_end_region
-      
-      if (tstep .gt. 5 .and. vel_projection_dim .gt. 0) then 
-         call this%proj_u%project_on(u_res%x, c_Xh, n)
-         call this%proj_v%project_on(v_res%x, c_Xh, n)
-         call this%proj_w%project_on(w_res%x, c_Xh, n)
+
+      !We should implement a bc that takes three field_bcs and implements vector_apply
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+         call this%bc_field_dirichlet_u%apply_scalar_dev(u_res%x_d, t, tstep)
+         call this%bc_field_dirichlet_v%apply_scalar_dev(v_res%x_d, t, tstep)
+         call this%bc_field_dirichlet_w%apply_scalar_dev(w_res%x_d, t, tstep)
+      else
+         call this%bc_field_dirichlet_u%apply_scalar(u_res%x, this%dm_Xh%size(), t, tstep)
+         call this%bc_field_dirichlet_v%apply_scalar(v_res%x, this%dm_Xh%size(), t, tstep)
+         call this%bc_field_dirichlet_w%apply_scalar(w_res%x, this%dm_Xh%size(), t, tstep)
       end if
 
+      call profiler_end_region
+
+      call this%proj_u%pre_solving(u_res%x, tstep, c_Xh, n, dt_controller)
+      call this%proj_v%pre_solving(v_res%x, tstep, c_Xh, n, dt_controller)
+      call this%proj_w%pre_solving(w_res%x, tstep, c_Xh, n, dt_controller)
+
       call this%pc_vel%update()
 
-      call profiler_start_region("Velocity solve")
+      call profiler_start_region("Velocity solve", 4)
       ksp_results(2) = this%ksp_vel%solve(Ax, du, u_res%x, n, &
-           c_Xh, this%bclst_du, gs_Xh, ksp_vel_maxiter)
+           c_Xh, this%bclst_du, gs_Xh)
       ksp_results(3) = this%ksp_vel%solve(Ax, dv, v_res%x, n, &
-           c_Xh, this%bclst_dv, gs_Xh, ksp_vel_maxiter)
+           c_Xh, this%bclst_dv, gs_Xh)
       ksp_results(4) = this%ksp_vel%solve(Ax, dw, w_res%x, n, &
-           c_Xh, this%bclst_dw, gs_Xh, ksp_vel_maxiter)
+           c_Xh, this%bclst_dw, gs_Xh)
       call profiler_end_region
 
-      if (tstep .gt. 5 .and. vel_projection_dim .gt. 0) then 
-         call this%proj_u%project_back(du%x, Ax, c_Xh, &
-                                  this%bclst_du, gs_Xh, n)
-         call this%proj_v%project_back(dv%x, Ax, c_Xh, &
-                                  this%bclst_dv, gs_Xh, n)
-         call this%proj_w%project_back(dw%x, Ax, c_Xh, &
-                                  this%bclst_dw, gs_Xh, n)
-      end if
-      
+      call this%proj_u%post_solving(du%x, Ax, c_Xh, &
+                                 this%bclst_du, gs_Xh, n, tstep, dt_controller)
+      call this%proj_v%post_solving(dv%x, Ax, c_Xh, &
+                                 this%bclst_dv, gs_Xh, n, tstep, dt_controller)
+      call this%proj_w%post_solving(dw%x, Ax, c_Xh, &
+                                 this%bclst_dw, gs_Xh, n, tstep, dt_controller)
+
       if (NEKO_BCKND_DEVICE .eq. 1) then
          call device_opadd2cm(u%x_d, v%x_d, w%x_d, &
               du%x_d, dv%x_d, dw%x_d, 1.0_rp, n, msh%gdim)
@@ -596,16 +694,18 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf)
               c_Xh, gs_Xh, ext_bdf, rho, mu,&
               dt, this%bclst_dp, this%bclst_du, this%bclst_dv, &
               this%bclst_dw, this%bclst_vel_res, Ax, this%ksp_prs, &
-              this%ksp_vel, this%pc_prs, this%pc_vel, ksp_pr_maxiter, &
-              ksp_vel_maxiter)
+              this%ksp_vel, this%pc_prs, this%pc_vel, this%ksp_prs%max_iter, &
+              this%ksp_vel%max_iter)
       end if
-      
+
       call fluid_step_info(tstep, t, dt, ksp_results)
-      
+
       call this%scratch%relinquish_field(temp_indices)
-      
+
     end associate
     call profiler_end_region
   end subroutine fluid_pnpn_step
-  
+
+
+
 end module fluid_pnpn
diff --git a/src/fluid/fluid_scheme.f90 b/src/fluid/fluid_scheme.f90
index 9a098237b2a..77bfaa50360 100644
--- a/src/fluid/fluid_scheme.f90
+++ b/src/fluid/fluid_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2022, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,7 @@ module fluid_scheme
   use checkpoint, only : chkp_t
   use mean_flow, only : mean_flow_t
   use num_types
+  use comm
   use fluid_user_source_term, only: fluid_user_source_term_t
   use fluid_source_term, only: fluid_source_term_t
   use field_list, only : field_list_t
@@ -54,6 +55,8 @@ module fluid_scheme
   use dong_outflow, only : dong_outflow_t
   use symmetry, only : symmetry_t
   use non_normal, only : non_normal_t
+  use field_dirichlet, only : field_dirichlet_t, field_dirichlet_update
+  use field_dirichlet_vector, only: field_dirichlet_vector_t
   use krylov_fctry
   use precon_fctry
   use fluid_stats, only : fluid_stats_t
@@ -68,20 +71,20 @@ module fluid_scheme
   use json_utils, only : json_get, json_get_or_default
   use json_module, only : json_file, json_core, json_value
   use scratch_registry, only : scratch_registry_t
-  use source_term, only : source_term_wrapper_t
-  use source_term_fctry, only : source_term_factory
-  use const_source_term, only : const_source_term_t
   use user_intf, only : user_t
   use utils, only : neko_warning, neko_error
   use material_properties, only : material_properties_t
+  use field_series
+  use time_step_controller
   implicit none
-  
+
   !> Base type of all fluid formulations
   type, abstract :: fluid_scheme_t
      type(field_t), pointer :: u => null() !< x-component of Velocity
      type(field_t), pointer :: v => null() !< y-component of Velocity
      type(field_t), pointer :: w => null() !< z-component of Velocity
      type(field_t), pointer :: p => null() !< Pressure
+     type(field_series_t) :: ulag, vlag, wlag !< fluid field (lag)
      type(space_t) :: Xh        !< Function space \f$ X_h \f$
      type(dofmap_t) :: dm_Xh    !< Dofmap associated with \f$ X_h \f$
      type(gs_t) :: gs_Xh        !< Gather-scatter associated with \f$ X_h \f$
@@ -98,19 +101,28 @@ module fluid_scheme
      class(ksp_t), allocatable  :: ksp_prs     !< Krylov solver for pressure
      class(pc_t), allocatable :: pc_vel        !< Velocity Preconditioner
      class(pc_t), allocatable :: pc_prs        !< Velocity Preconditioner
-     integer :: ksp_vel_maxiter                !< Max iterations in ksp_vel
-     integer :: ksp_pr_maxiter                 !< Max iterattions in ksp_pr
      integer :: vel_projection_dim         !< Size of the projection space for ksp_vel
      integer :: pr_projection_dim          !< Size of the projection space for ksp_pr
+     integer :: vel_projection_activ_step  !< Steps to activate projection for ksp_vel
+     integer :: pr_projection_activ_step   !< Steps to activate projection for ksp_pr
      type(no_slip_wall_t) :: bc_wall           !< No-slip wall for velocity
      class(inflow_t), allocatable :: bc_inflow !< Dirichlet inflow for velocity
+
+     ! Attributes for field dirichlet BCs
+     type(field_dirichlet_vector_t) :: bc_field_vel   !< Field Dirichlet velocity condition
+     type(field_dirichlet_t) :: bc_field_prs   !< Field Dirichlet pressure condition
+     procedure(field_dirichlet_update), nopass, pointer :: dirichlet_update_ &
+          => null() !< Pointer to user_dirichlet_update to be called in fluid_scheme_step
+     type(bc_list_t) :: field_dirichlet_bcs       !< List of BC objects to pass to user_dirichlet_update
+     type(field_list_t) :: field_dirichlet_fields !< List of fields to pass to user_dirichlet_update
+
      type(dirichlet_t) :: bc_prs               !< Dirichlet pressure condition
      type(dong_outflow_t) :: bc_dong           !< Dong outflow condition
      type(symmetry_t) :: bc_sym                !< Symmetry plane for velocity
      type(bc_list_t) :: bclst_vel              !< List of velocity conditions
      type(bc_list_t) :: bclst_prs              !< List of pressure conditions
-     type(field_t) :: bdry                     !< Boundary markings     
-     type(json_file), pointer :: params        !< Parameters          
+     type(field_t) :: bdry                     !< Boundary markings
+     type(json_file), pointer :: params        !< Parameters
      type(mesh_t), pointer :: msh => null()    !< Mesh
      type(chkp_t) :: chkp                      !< Checkpoint
      type(mean_flow_t) :: mean                 !< Mean flow field
@@ -119,12 +131,12 @@ module fluid_scheme
      logical :: forced_flow_rate = .false.     !< Is the flow rate forced?
      logical :: freeze = .false.               !< Freeze velocity at initial condition?
      !> Dynamic viscosity
-     real(kind=rp), pointer :: mu
+     real(kind=rp), pointer :: mu => null()
      !> Density
-     real(kind=rp), pointer :: rho
+     real(kind=rp), pointer :: rho => null()
      type(scratch_registry_t) :: scratch       !< Manager for temporary fields
      !> Boundary condition labels (if any)
-     character(len=20), allocatable :: bc_labels(:)
+     character(len=NEKO_MSH_MAX_ZLBL_LEN), allocatable :: bc_labels(:)
    contains
      procedure, pass(this) :: fluid_scheme_init_all
      procedure, pass(this) :: fluid_scheme_init_uvw
@@ -155,7 +167,7 @@ subroutine fluid_scheme_init_intrf(this, msh, lx, params, user, &
        integer, intent(inout) :: lx
        type(json_file), target, intent(inout) :: params
        type(user_t), intent(in) :: user
-       type(material_properties_t), intent(inout) :: material_properties
+       type(material_properties_t), target, intent(inout) :: material_properties
      end subroutine fluid_scheme_init_intrf
   end interface
 
@@ -166,18 +178,20 @@ subroutine fluid_scheme_free_intrf(this)
        class(fluid_scheme_t), intent(inout) :: this
      end subroutine fluid_scheme_free_intrf
   end interface
-  
+
   !> Abstract interface to compute a time-step
   abstract interface
-     subroutine fluid_scheme_step_intrf(this, t, tstep, dt, ext_bdf)
+     subroutine fluid_scheme_step_intrf(this, t, tstep, dt, ext_bdf, dt_controller)
        import fluid_scheme_t
        import time_scheme_controller_t
+       import time_step_controller_t
        import rp
-       class(fluid_scheme_t), intent(inout) :: this
+       class(fluid_scheme_t), target, intent(inout) :: this
        real(kind=rp), intent(inout) :: t
        integer, intent(inout) :: tstep
        real(kind=rp), intent(in) :: dt
        type(time_scheme_controller_t), intent(inout) :: ext_bdf
+       type(time_step_controller_t), intent(in) :: dt_controller
      end subroutine fluid_scheme_step_intrf
   end interface
 
@@ -192,7 +206,6 @@ subroutine fluid_scheme_restart_intrf(this, dtlag, tlag)
      end subroutine fluid_scheme_restart_intrf
   end interface
 
-
 contains
 
   !> Initialize common data for the current scheme
@@ -211,11 +224,11 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
     real(kind=rp), allocatable :: real_vec(:)
     real(kind=rp) :: real_val
     logical :: logical_val
-    integer :: integer_val
+    integer :: integer_val, ierr
     character(len=:), allocatable :: string_val1, string_val2
     ! A local pointer that is needed to make Intel happy
 
-    
+
     call neko_log%section('Fluid')
     write(log_buf, '(A, A)') 'Type       : ', trim(scheme)
     call neko_log%message(log_buf)
@@ -270,12 +283,6 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
     write(log_buf, '(A, L1)') 'Save bdry  : ',  logical_val
     call neko_log%message(log_buf)
 
-    call json_get_or_default(params, &
-                            'case.fluid.velocity_solver.max_iterations', &
-                            this%ksp_vel_maxiter, 800)
-    call json_get_or_default(params, &
-                            'case.fluid.pressure_solver.max_iterations', &
-                            this%ksp_pr_maxiter, 800)
 
     call json_get_or_default(params, &
                             'case.fluid.velocity_solver.projection_space_size',&
@@ -283,12 +290,19 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
     call json_get_or_default(params, &
                             'case.fluid.pressure_solver.projection_space_size',&
                             this%pr_projection_dim, 20)
+    call json_get_or_default(params, &
+                            'case.fluid.velocity_solver.projection_hold_steps',&
+                            this%vel_projection_activ_step, 5)
+    call json_get_or_default(params, &
+                            'case.fluid.pressure_solver.projection_hold_steps',&
+                            this%pr_projection_activ_step, 5)
+
 
     call json_get_or_default(params, 'case.fluid.freeze', this%freeze, .false.)
 
-   if (params%valid_path("case.fluid.flow_rate_force")) then
-      this%forced_flow_rate = .true.
-   end if
+    if (params%valid_path("case.fluid.flow_rate_force")) then
+       this%forced_flow_rate = .true.
+    end if
 
     if (msh%gdim .eq. 2) then
        call this%Xh%init(GLL, lx, lx)
@@ -306,12 +320,12 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
 
     call this%c_Xh%init(this%gs_Xh)
 
-    
+
     this%scratch = scratch_registry_t(this%dm_Xh, 10, 2)
 
     allocate(this%bc_labels(NEKO_MSH_MAX_ZLBLS))
     this%bc_labels = "not"
-    
+
     !
     ! Setup velocity boundary conditions
     !
@@ -320,19 +334,19 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
                                   'case.fluid.boundary_types', &
                                   this%bc_labels)
     end if
-    
+
     call bc_list_init(this%bclst_vel)
 
-    call this%bc_sym%init(this%dm_Xh)
+    call this%bc_sym%init(this%c_Xh)
     call this%bc_sym%mark_zone(msh%sympln)
     call this%bc_sym%mark_zones_from_list(msh%labeled_zones,&
                         'sym', this%bc_labels)
     call this%bc_sym%finalize()
-    call this%bc_sym%init_msk(this%c_Xh)    
+    call this%bc_sym%init_msk()
     call bc_list_add(this%bclst_vel, this%bc_sym)
 
     !
-    ! Inflow 
+    ! Inflow
     !
     if (params%valid_path('case.fluid.inflow_condition')) then
        call json_get(params, 'case.fluid.inflow_condition.type', string_val1)
@@ -346,7 +360,7 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
           call neko_error('Invalid inflow condition '//string_val1)
        end if
 
-       call this%bc_inflow%init(this%dm_Xh)
+       call this%bc_inflow%init(this%c_Xh)
        call this%bc_inflow%mark_zone(msh%inlet)
        call this%bc_inflow%mark_zones_from_list(msh%labeled_zones,&
                         'v', this%bc_labels)
@@ -354,8 +368,8 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
        call bc_list_add(this%bclst_vel, this%bc_inflow)
 
        if (trim(string_val1) .eq. "uniform") then
-           call json_get(params, 'case.fluid.inflow_condition.value', real_vec)
-           call this%bc_inflow%set_inflow(real_vec)
+          call json_get(params, 'case.fluid.inflow_condition.value', real_vec)
+          call this%bc_inflow%set_inflow(real_vec)
        else if (trim(string_val1) .eq. "blasius") then
           select type(bc_if => this%bc_inflow)
           type is(blasius_t)
@@ -372,26 +386,97 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
           end select
        else if (trim(string_val1) .eq. "user") then
           select type(bc_if => this%bc_inflow)
-             type is(usr_inflow_t)
+          type is(usr_inflow_t)
              call bc_if%set_coef(this%C_Xh)
           end select
        end if
     end if
-    
-    call this%bc_wall%init(this%dm_Xh)
+
+    call this%bc_wall%init(this%c_Xh)
     call this%bc_wall%mark_zone(msh%wall)
     call this%bc_wall%mark_zones_from_list(msh%labeled_zones,&
                         'w', this%bc_labels)
     call this%bc_wall%finalize()
     call bc_list_add(this%bclst_vel, this%bc_wall)
-       
+
+    ! Setup field dirichlet bc for u-velocity
+    call this%bc_field_vel%field_dirichlet_u%init(this%c_Xh)
+    call this%bc_field_vel%field_dirichlet_u%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_u', this%bc_labels)
+    call this%bc_field_vel%field_dirichlet_u%finalize()
+
+    call MPI_Allreduce(this%bc_field_vel%field_dirichlet_u%msk(0), integer_val, 1, &
+         MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
+    if (integer_val .gt. 0)  call this%bc_field_vel%field_dirichlet_u%init_field('d_vel_u')
+
+    ! Setup field dirichlet bc for v-velocity
+    call this%bc_field_vel%field_dirichlet_v%init(this%c_Xh)
+    call this%bc_field_vel%field_dirichlet_v%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_v', this%bc_labels)
+    call this%bc_field_vel%field_dirichlet_v%finalize()
+
+    call MPI_Allreduce(this%bc_field_vel%field_dirichlet_v%msk(0), integer_val, 1, &
+         MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
+    if (integer_val .gt. 0)  call this%bc_field_vel%field_dirichlet_v%init_field('d_vel_v')
+
+    ! Setup field dirichlet bc for w-velocity
+    call this%bc_field_vel%field_dirichlet_w%init(this%c_Xh)
+    call this%bc_field_vel%field_dirichlet_w%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_w', this%bc_labels)
+    call this%bc_field_vel%field_dirichlet_w%finalize()
+
+    call MPI_Allreduce(this%bc_field_vel%field_dirichlet_w%msk(0), integer_val, 1, &
+         MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
+    if (integer_val .gt. 0)  call this%bc_field_vel%field_dirichlet_w%init_field('d_vel_w')
+
+    ! Setup our global field dirichlet bc
+    call this%bc_field_vel%init(this%c_Xh)
+    call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_u', this%bc_labels)
+    call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_v', this%bc_labels)
+    call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,&
+                        'd_vel_w', this%bc_labels)
+    call this%bc_field_vel%finalize()
+
+    ! Add the field bc to velocity bcs
+    call bc_list_add(this%bclst_vel, this%bc_field_vel)
+
+    !
+    ! Associate our field dirichlet update to the user one.
+    !
+    this%dirichlet_update_ => user%user_dirichlet_update
+
+    !
+    ! Initialize field list and bc list for user_dirichlet_update
+    !
+    allocate(this%field_dirichlet_fields%fields(4))
+
+    this%field_dirichlet_fields%fields(1)%f => &
+         this%bc_field_vel%field_dirichlet_u%field_bc
+    this%field_dirichlet_fields%fields(2)%f => &
+         this%bc_field_vel%field_dirichlet_v%field_bc
+    this%field_dirichlet_fields%fields(3)%f => &
+         this%bc_field_vel%field_dirichlet_w%field_bc
+    this%field_dirichlet_fields%fields(4)%f => &
+         this%bc_field_prs%field_bc
+
+    call bc_list_init(this%field_dirichlet_bcs, size=4)
+    call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_u)
+    call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_v)
+    call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_w)
+
+    !
+    ! Check if we need to output boundaries
+    !
     call json_get_or_default(params, 'case.output_boundary', logical_val,&
                              .false.)
+
     if (logical_val) then
        call this%bdry%init(this%dm_Xh, 'bdry')
        this%bdry = 0.0_rp
-       
-       call bdry_mask%init(this%dm_Xh)
+
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%wall)
        call bdry_mask%mark_zones_from_list(msh%labeled_zones,&
                       'w', this%bc_labels)
@@ -400,17 +485,16 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
        call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size())
        call bdry_mask%free()
 
-       call bdry_mask%init(this%dm_Xh)
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%inlet)
        call bdry_mask%mark_zones_from_list(msh%labeled_zones,&
                       'v', this%bc_labels)
-
        call bdry_mask%finalize()
        call bdry_mask%set_g(2.0_rp)
        call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size())
        call bdry_mask%free()
 
-       call bdry_mask%init(this%dm_Xh)
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%outlet)
        call bdry_mask%mark_zones_from_list(msh%labeled_zones,&
                       'o', this%bc_labels)
@@ -419,7 +503,7 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
        call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size())
        call bdry_mask%free()
 
-       call bdry_mask%init(this%dm_Xh)
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%sympln)
        call bdry_mask%mark_zones_from_list(msh%labeled_zones,&
                       'sym', this%bc_labels)
@@ -428,14 +512,14 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
        call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size())
        call bdry_mask%free()
 
-       call bdry_mask%init(this%dm_Xh)
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%periodic)
        call bdry_mask%finalize()
        call bdry_mask%set_g(5.0_rp)
        call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size())
        call bdry_mask%free()
 
-       call bdry_mask%init(this%dm_Xh)
+       call bdry_mask%init(this%c_Xh)
        call bdry_mask%mark_zone(msh%outlet_normal)
        call bdry_mask%mark_zones_from_list(msh%labeled_zones,&
                       'on', this%bc_labels)
@@ -475,14 +559,14 @@ subroutine fluid_scheme_init_uvw(this, msh, lx, params, kspv_init, scheme, &
     logical :: kspv_init
     character(len=*), intent(in) :: scheme
     ! Variables for extracting json
-    logical :: found, logical_val
     real(kind=rp) :: abs_tol
     character(len=:), allocatable :: solver_type, precon_type
+    integer :: ksp_vel_maxiter
 
 
     call fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
                                   material_properties)
-    
+
     call neko_field_registry%add_field(this%dm_Xh, 'u')
     call neko_field_registry%add_field(this%dm_Xh, 'v')
     call neko_field_registry%add_field(this%dm_Xh, 'w')
@@ -497,8 +581,11 @@ subroutine fluid_scheme_init_uvw(this, msh, lx, params, kspv_init, scheme, &
                   abs_tol)
 
     if (kspv_init) then
+      call json_get_or_default(params, &
+                              'case.fluid.velocity_solver.max_iterations', &
+                              ksp_vel_maxiter, 800)
        call fluid_scheme_solver_factory(this%ksp_vel, this%dm_Xh%size(), &
-            solver_type, abs_tol)
+            solver_type, ksp_vel_maxiter, abs_tol)
        call fluid_scheme_precon_factory(this%pc_vel, this%ksp_vel, &
             this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_vel, precon_type)
     end if
@@ -521,7 +608,7 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
     character(len=*), intent(in) :: scheme
     real(kind=rp) :: real_val, dong_delta, dong_uchar
     real(kind=rp), allocatable :: real_vec(:)
-    integer :: integer_val
+    integer :: integer_val, ierr
     character(len=:), allocatable :: string_val1, string_val2
 
     call fluid_scheme_init_common(this, msh, lx, params, scheme, user, &
@@ -536,16 +623,34 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
     this%w => neko_field_registry%get_field('w')
     this%p => neko_field_registry%get_field('p')
 
+    !! lag fields
+    call this%ulag%init(this%u, 2)
+    call this%vlag%init(this%v, 2)
+    call this%wlag%init(this%w, 2)
+
+
     !
     ! Setup pressure boundary conditions
     !
     call bc_list_init(this%bclst_prs)
-    call this%bc_prs%init(this%dm_Xh)
+    call this%bc_prs%init(this%c_Xh)
     call this%bc_prs%mark_zones_from_list(msh%labeled_zones,&
                         'o', this%bc_labels)
     call this%bc_prs%mark_zones_from_list(msh%labeled_zones,&
                         'on', this%bc_labels)
 
+    ! Field dirichlet pressure bc
+    call this%bc_field_prs%init(this%c_Xh)
+    call this%bc_field_prs%mark_zones_from_list(msh%labeled_zones,&
+                        'd_pres', this%bc_labels)
+    call this%bc_field_prs%finalize()
+    call MPI_Allreduce(this%bc_field_prs%msk(0), integer_val, 1, &
+         MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
+
+    if (integer_val .gt. 0)  call this%bc_field_prs%init_field('d_pres')
+    call bc_list_add(this%bclst_prs, this%bc_field_prs)
+    call bc_list_add(this%field_dirichlet_bcs, this%bc_field_prs)
+
     if (msh%outlet%size .gt. 0) then
        call this%bc_prs%mark_zone(msh%outlet)
     end if
@@ -556,7 +661,7 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
     call this%bc_prs%finalize()
     call this%bc_prs%set_g(0.0_rp)
     call bc_list_add(this%bclst_prs, this%bc_prs)
-    call this%bc_dong%init(this%dm_Xh)
+    call this%bc_dong%init(this%c_Xh)
     call this%bc_dong%mark_zones_from_list(msh%labeled_zones,&
                         'o+dong', this%bc_labels)
     call this%bc_dong%mark_zones_from_list(msh%labeled_zones,&
@@ -568,13 +673,16 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
     call json_get_or_default(params, 'case.fluid.outflow_condition.velocity_scale',&
                              dong_uchar, 1.0_rp)
 
-    call this%bc_dong%set_vars(this%c_Xh, this%u, this%v, this%w,&
-         dong_uchar, dong_delta)
+
+    call this%bc_dong%set_vars(dong_uchar, dong_delta)
 
     call bc_list_add(this%bclst_prs, this%bc_dong)
 
 
     if (kspv_init) then
+       call json_get_or_default(params, &
+                                'case.fluid.velocity_solver.max_iterations', &
+                                integer_val, 800)
        call json_get(params, 'case.fluid.velocity_solver.type', string_val1)
        call json_get(params, 'case.fluid.velocity_solver.preconditioner', &
                      string_val2)
@@ -582,12 +690,15 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
                      real_val)
 
        call fluid_scheme_solver_factory(this%ksp_vel, this%dm_Xh%size(), &
-            string_val1, real_val)
+            string_val1, integer_val, real_val)
        call fluid_scheme_precon_factory(this%pc_vel, this%ksp_vel, &
             this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_vel, string_val2)
     end if
 
     if (kspp_init) then
+       call json_get_or_default(params, &
+                               'case.fluid.pressure_solver.max_iterations', &
+                               integer_val, 800)
        call json_get(params, 'case.fluid.pressure_solver.type', string_val1)
        call json_get(params, 'case.fluid.pressure_solver.preconditioner', &
                      string_val2)
@@ -595,20 +706,19 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,&
                      real_val)
 
        call fluid_scheme_solver_factory(this%ksp_prs, this%dm_Xh%size(), &
-            string_val1, real_val)
+            string_val1, integer_val, real_val)
        call fluid_scheme_precon_factory(this%pc_prs, this%ksp_prs, &
             this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_prs, string_val2)
     end if
 
 
     call neko_log%end_section()
-    
+
   end subroutine fluid_scheme_init_all
 
   !> Deallocate a fluid formulation
   subroutine fluid_scheme_free(this)
     class(fluid_scheme_t), intent(inout) :: this
-    integer :: i
 
     call this%bdry%free()
 
@@ -619,6 +729,22 @@ subroutine fluid_scheme_free(this)
     call this%bc_wall%free()
     call this%bc_sym%free()
 
+    !
+    ! Free everything related to field_dirichlet BCs
+    !
+    call this%bc_field_prs%field_bc%free()
+    call this%bc_field_prs%free()
+    call this%bc_field_vel%field_dirichlet_u%field_bc%free()
+    call this%bc_field_vel%field_dirichlet_v%field_bc%free()
+    call this%bc_field_vel%field_dirichlet_w%field_bc%free()
+    call this%bc_field_vel%free()
+
+    call this%field_dirichlet_fields%free()
+    call bc_list_free(this%field_dirichlet_bcs)
+    if (associated(this%dirichlet_update_)) then
+       this%dirichlet_update_ => null()
+    end if
+
     call this%Xh%free()
 
     if (allocated(this%ksp_vel)) then
@@ -652,7 +778,7 @@ subroutine fluid_scheme_free(this)
     call this%c_Xh%free()
 
     call bc_list_free(this%bclst_vel)
-    
+
     call this%scratch%free()
 
     nullify(this%params)
@@ -662,23 +788,28 @@ subroutine fluid_scheme_free(this)
     nullify(this%w)
     nullify(this%p)
 
+    call this%ulag%free()
+    call this%vlag%free()
+    call this%wlag%free()
+
+
     if (associated(this%f_x)) then
-      call this%f_x%free()
+       call this%f_x%free()
     end if
 
     if (associated(this%f_y)) then
-      call this%f_y%free()
+       call this%f_y%free()
     end if
 
     if (associated(this%f_z)) then
-      call this%f_z%free()
+       call this%f_z%free()
     end if
 
     nullify(this%f_x)
     nullify(this%f_y)
     nullify(this%f_z)
-    
-    
+
+
   end subroutine fluid_scheme_free
 
   !> Validate that all fields, solvers etc necessary for
@@ -686,7 +817,7 @@ end subroutine fluid_scheme_free
   subroutine fluid_scheme_validate(this)
     class(fluid_scheme_t), target, intent(inout) :: this
     ! Variables for retrieving json parameters
-    logical :: found, logical_val
+    logical :: logical_val
 
     if ( (.not. associated(this%u)) .or. &
          (.not. associated(this%v)) .or. &
@@ -694,7 +825,7 @@ subroutine fluid_scheme_validate(this)
          (.not. associated(this%p))) then
        call neko_error('Fields are not registered')
     end if
-    
+
     if ( (.not. allocated(this%u%x)) .or. &
          (.not. allocated(this%v%x)) .or. &
          (.not. allocated(this%w%x)) .or. &
@@ -705,7 +836,7 @@ subroutine fluid_scheme_validate(this)
     if (.not. allocated(this%ksp_vel)) then
        call neko_error('No Krylov solver for velocity defined')
     end if
-    
+
     if (.not. allocated(this%ksp_prs)) then
        call neko_error('No Krylov solver for pressure defined')
     end if
@@ -714,10 +845,12 @@ subroutine fluid_scheme_validate(this)
        call neko_error('No parameters defined')
     end if
 
-    select type(ip => this%bc_inflow)
-    type is(usr_inflow_t)
-       call ip%validate
-    end select
+    if (allocated(this%bc_inflow)) then
+       select type(ip => this%bc_inflow)
+       type is(usr_inflow_t)
+          call ip%validate
+       end select
+    end if
 
     !
     ! Setup checkpoint structure (if everything is fine)
@@ -744,35 +877,42 @@ subroutine fluid_scheme_validate(this)
   end subroutine fluid_scheme_validate
 
   !> Apply all boundary conditions defined for velocity
+  !! Here we perform additional gs operations to take care of
+  !! shared points between elements that have different BCs, as done in Nek5000.
   !! @todo Why can't we call the interface here?
   subroutine fluid_scheme_bc_apply_vel(this, t, tstep)
     class(fluid_scheme_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
+
     call bc_list_apply_vector(this%bclst_vel,&
          this%u%x, this%v%x, this%w%x, this%dm_Xh%size(), t, tstep)
+
   end subroutine fluid_scheme_bc_apply_vel
-  
+
   !> Apply all boundary conditions defined for pressure
   !! @todo Why can't we call the interface here?
   subroutine fluid_scheme_bc_apply_prs(this, t, tstep)
     class(fluid_scheme_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
+
     call bc_list_apply_scalar(this%bclst_prs, this%p%x, &
                               this%p%dof%size(), t, tstep)
+
   end subroutine fluid_scheme_bc_apply_prs
-  
+
   !> Initialize a linear solver
   !! @note Currently only supporting Krylov solvers
-  subroutine fluid_scheme_solver_factory(ksp, n, solver, abstol)
+  subroutine fluid_scheme_solver_factory(ksp, n, solver, max_iter, abstol)
     class(ksp_t), allocatable, target, intent(inout) :: ksp
     integer, intent(in), value :: n
     character(len=*), intent(in) :: solver
+    integer, intent(in) :: max_iter
     real(kind=rp), intent(in) :: abstol
-    
-    call krylov_solver_factory(ksp, n, solver, abstol)
-    
+
+    call krylov_solver_factory(ksp, n, solver, max_iter, abstol)
+
   end subroutine fluid_scheme_solver_factory
 
   !> Initialize a Krylov preconditioner
@@ -784,9 +924,9 @@ subroutine fluid_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype)
     type(gs_t), target, intent(inout) :: gs
     type(bc_list_t), target, intent(inout) :: bclst
     character(len=*) :: pctype
-    
+
     call precon_factory(pc, pctype)
-    
+
     select type(pcp => pc)
     type is(jacobi_t)
        call pcp%init(coef, dof, gs)
@@ -808,7 +948,7 @@ subroutine fluid_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype)
     end select
 
     call ksp%set_pc(pc)
-    
+
   end subroutine fluid_scheme_precon_factory
 
   !> Initialize a user defined inflow condition
@@ -818,9 +958,9 @@ subroutine fluid_scheme_set_usr_inflow(this, usr_eval)
 
     select type(bc_if => this%bc_inflow)
     type is(usr_inflow_t)
-      call bc_if%set_eval(usr_eval)
+       call bc_if%set_eval(usr_eval)
     class default
-      call neko_error("Not a user defined inflow condition")
+       call neko_error("Not a user defined inflow condition")
     end select
   end subroutine fluid_scheme_set_usr_inflow
 
@@ -832,7 +972,7 @@ function fluid_compute_cfl(this, dt) result(c)
 
     c = cfl(dt, this%u%x, this%v%x, this%w%x, &
          this%Xh, this%c_Xh, this%msh%nelv, this%msh%gdim)
-    
+
   end function fluid_compute_cfl
-      
+
 end module fluid_scheme
diff --git a/src/fluid/fluid_source_term.f90 b/src/fluid/fluid_source_term.f90
index 69a86da6525..52bfe345503 100644
--- a/src/fluid/fluid_source_term.f90
+++ b/src/fluid/fluid_source_term.f90
@@ -43,7 +43,8 @@ module fluid_source_term
   use json_utils, only : json_get
   use json_module, only : json_file, json_core, json_value
   use coefs, only : coef_t
-  use user_intf, only : user_t 
+  use user_intf, only : user_t
+  use utils, only : neko_warning
   implicit none
   private
 
@@ -65,7 +66,7 @@ module fluid_source_term
      procedure, pass(this) :: init => fluid_source_term_init
      !> Destructor.
      procedure, pass(this) :: free => fluid_source_term_free
-     !> Add all the source term to the passed right-hand side fields.
+     !> Add all the source terms to the passed right-hand side fields.
      procedure, pass(this) :: compute => fluid_source_term_compute
      !> Initialize the user source term.
      procedure, nopass, private :: init_user_source
@@ -86,7 +87,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user)
     ! Json low-level manipulator.
     type(json_core) :: core
     ! Pointer to the source_terms JSON object and the individual sources.
-    type(json_value), pointer :: source_object, source_pointer 
+    type(json_value), pointer :: source_object, source_pointer
     ! Buffer for serializing the json.
     character(len=:), allocatable :: buffer
     ! A single source term as its own json_file.
@@ -106,7 +107,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user)
 
 
     if (json%valid_path('case.fluid.source_terms')) then
-      ! We package the fields for the source term to operate on in a field list.
+       ! We package the fields for the source term to operate on in a field list.
        allocate(rhs_fields%fields(3))
        rhs_fields%fields(1)%f => f_x
        rhs_fields%fields(2)%f => f_y
@@ -120,7 +121,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user)
 
 
        do i=1, n_sources
-         ! Create a new json containing just the subdict for this source.
+          ! Create a new json containing just the subdict for this source.
           call core%get_child(source_object, i, source_pointer, found)
           call core%print_to_string(source_pointer, buffer)
           call source_subdict%load_from_string(buffer)
@@ -129,24 +130,30 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user)
           ! The user source is treated separately
           if ((trim(type) .eq. "user_vector") .or. &
               (trim(type) .eq. "user_pointwise")) then
-              
-              call init_user_source(this%source_terms(i)%source_term, &
+
+             if (source_subdict%valid_path("start_time") .or. &
+                 source_subdict%valid_path("end_time")) then
+                 call neko_warning("The start_time and end_time parameters have&
+                                    & no effect on the fluid user source term")
+             end if
+
+             call init_user_source(this%source_terms(i)%source_term, &
                                     rhs_fields, coef, type, user)
-          else 
-              
-              call source_term_factory(this%source_terms(i)%source_term, &
+          else
+
+             call source_term_factory(this%source_terms(i)%source_term, &
                                        source_subdict, rhs_fields, coef)
           end if
-      end do 
+       end do
     end if
-    
+
   end subroutine fluid_source_term_init
 
   !> Initialize the user source term.
   !! @param source_term The allocatable source term to be initialized to a user.
   !! @param rhs_fields The field list with the 3 right-hand-side components.
   !! @param coef The SEM coefs.
-  !! @param type The type of the user source term, "user_vector" or 
+  !! @param type The type of the user source term, "user_vector" or
   !! "user_poinwise".
   !! @param user The user type containing the user source term routines.
   subroutine init_user_source(source_term, rhs_fields, coef, type, user)
@@ -160,11 +167,11 @@ subroutine init_user_source(source_term, rhs_fields, coef, type, user)
 
     select type (source_term)
     type is (fluid_user_source_term_t)
-      call source_term%init_from_components(rhs_fields, coef, type, &
+       call source_term%init_from_components(rhs_fields, coef, type, &
                                             user%fluid_user_f_vector, &
                                             user%fluid_user_f)
     end select
-  end subroutine
+  end subroutine init_user_source
 
   !> Destructor.
   subroutine fluid_source_term_free(this)
diff --git a/src/fluid/fluid_stats.f90 b/src/fluid/fluid_stats.f90
index e2a6434dec2..a80f88073b3 100644
--- a/src/fluid/fluid_stats.f90
+++ b/src/fluid/fluid_stats.f90
@@ -156,7 +156,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf)
     type(coef_t), target, optional :: coef
     type(mean_field_t), target, intent(inout) :: u_mf, v_mf, w_mf, p_mf
     this%coef => coef
-  
+
     this%u_mean => u_mf%mf
     this%v_mean => v_mf%mf
     this%w_mean => w_mf%mf
@@ -171,7 +171,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf)
     call this%stats_v%init(this%u%dof, 'v temp')
     call this%stats_w%init(this%u%dof, 'w temp')
     call this%stats_p%init(this%u%dof, 'p temp')
- 
+
     call this%dudx%init(this%u%dof, 'dudx')
     call this%dudy%init(this%u%dof, 'dudy')
     call this%dudz%init(this%u%dof, 'dudz')
@@ -181,7 +181,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf)
     call this%dwdx%init(this%u%dof, 'dwdx')
     call this%dwdy%init(this%u%dof, 'dwdy')
     call this%dwdz%init(this%u%dof, 'dwdz')
-    
+
     call this%uu%init(this%stats_u, 'uu')
     call this%vv%init(this%stats_v, 'vv')
     call this%ww%init(this%stats_w, 'ww')
@@ -205,11 +205,11 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf)
     call this%pp%init(this%stats_p, 'pp')
     call this%ppp%init(this%stats_work, 'ppp')
     call this%pppp%init(this%stats_work, 'pppp')
-     !> Pressure * velocity
+    !> Pressure * velocity
     call this%pu%init(this%stats_work, 'pu')
     call this%pv%init(this%stats_work, 'pv')
     call this%pw%init(this%stats_work, 'pw')
- 
+
     call this%pdudx%init(this%stats_work, 'pdudx')
     call this%pdudy%init(this%stats_work, 'pdudy')
     call this%pdudz%init(this%stats_work, 'pdudz')
@@ -284,223 +284,223 @@ subroutine fluid_stats_update(this, k)
 
     associate(stats_work => this%stats_work, stats_u => this%stats_u,&
               stats_v => this%stats_v, stats_w => this%stats_w, stats_p => this%stats_p)
-    n = stats_work%dof%size()
-
-    !> U%f is u and U%mf is <u>
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-
-       call device_col3(stats_u%x_d,this%u%x_d, this%u%x_d,n)
-       call device_col3(stats_v%x_d,this%v%x_d, this%v%x_d,n)
-       call device_col3(stats_w%x_d,this%w%x_d, this%w%x_d,n)
-       call device_col3(stats_p%x_d,this%p%x_d, this%p%x_d,n)
-
-       call this%uu%update(k)
-       call this%vv%update(k)
-       call this%ww%update(k)
-       call this%pp%update(k)
-
-       call device_col3(stats_work%x_d,this%u%x_d, this%v%x_d,n)
-       call this%uv%update(k)
-       call device_col3(stats_work%x_d,this%u%x_d, this%w%x_d,n)
-       call this%uw%update(k)
-       call device_col3(stats_work%x_d,this%v%x_d, this%w%x_d,n)
-       call this%vw%update(k)
-
-       call device_col2(stats_work%x_d, this%u%x_d,n)
-       call this%uvw%update(k)
-       call device_col3(stats_work%x_d,this%stats_u%x_d, this%u%x_d,n)
-       call this%uuu%update(k)
-       call device_col3(stats_work%x_d,this%stats_v%x_d, this%v%x_d,n)
-       call this%vvv%update(k)
-       call device_col3(stats_work%x_d,this%stats_w%x_d, this%w%x_d,n)
-       call this%www%update(k)
-       call device_col3(stats_work%x_d,this%stats_u%x_d, this%v%x_d,n)
-       call this%uuv%update(k)
-       call device_col3(stats_work%x_d,this%stats_u%x_d, this%w%x_d,n)
-       call this%uuw%update(k)
-       call device_col3(stats_work%x_d,this%stats_v%x_d, this%u%x_d,n)
-       call this%uvv%update(k)
-       call device_col3(stats_work%x_d,this%stats_v%x_d, this%w%x_d,n)
-       call this%vvw%update(k)
-       call device_col3(stats_work%x_d,this%stats_w%x_d, this%u%x_d,n)
-       call this%uww%update(k)
-       call device_col3(stats_work%x_d,this%stats_w%x_d, this%v%x_d,n)
-       call this%vww%update(k)
-
-       call device_col3(stats_work%x_d,this%stats_u%x_d, this%stats_u%x_d,n)
-       call this%uuuu%update(k)
-       call device_col3(stats_work%x_d,this%stats_v%x_d, this%stats_v%x_d,n)
-       call this%vvvv%update(k)
-       call device_col3(stats_work%x_d,this%stats_w%x_d, this%stats_w%x_d,n)
-       call this%wwww%update(k)
-
-       call device_col3(stats_work%x_d,this%stats_p%x_d, this%p%x_d,n)
-       call this%ppp%update(k)
-       call device_col3(stats_work%x_d,this%stats_p%x_d, this%stats_p%x_d,n)
-       call this%pppp%update(k)
-
-       call device_col3(stats_work%x_d,this%p%x_d, this%u%x_d,n)
-       call this%pu%update(k)
-       call device_col3(stats_work%x_d,this%p%x_d, this%v%x_d,n)
-       call this%pv%update(k)
-       call device_col3(stats_work%x_d,this%p%x_d, this%w%x_d,n)
-       call this%pw%update(k)
-
-    else
-
-       call col3(stats_u%x,this%u%x, this%u%x,n)
-       call col3(stats_v%x,this%v%x, this%v%x,n)
-       call col3(stats_w%x,this%w%x, this%w%x,n)
-       call col3(stats_p%x,this%p%x, this%p%x,n)
-
-       call this%uu%update(k)
-       call this%vv%update(k)
-       call this%ww%update(k)
-       call this%pp%update(k)
-
-       call col3(stats_work%x,this%u%x, this%v%x,n)
-       call this%uv%update(k)
-       call col3(stats_work%x,this%u%x, this%w%x,n)
-       call this%uw%update(k)
-       call col3(stats_work%x,this%v%x, this%w%x,n)
-       call this%vw%update(k)
-
-       call col2(stats_work%x, this%u%x,n)
-       call this%uvw%update(k)
-       call col3(stats_work%x,this%stats_u%x, this%u%x,n)
-       call this%uuu%update(k)
-       call col3(stats_work%x,this%stats_v%x, this%v%x,n)
-       call this%vvv%update(k)
-       call col3(stats_work%x,this%stats_w%x, this%w%x,n)
-       call this%www%update(k)
-       call col3(stats_work%x,this%stats_u%x, this%v%x,n)
-       call this%uuv%update(k)
-       call col3(stats_work%x,this%stats_u%x, this%w%x,n)
-       call this%uuw%update(k)
-       call col3(stats_work%x,this%stats_v%x, this%u%x,n)
-       call this%uvv%update(k)
-       call col3(stats_work%x,this%stats_v%x, this%w%x,n)
-       call this%vvw%update(k)
-       call col3(stats_work%x,this%stats_w%x, this%u%x,n)
-       call this%uww%update(k)
-       call col3(stats_work%x,this%stats_w%x, this%v%x,n)
-       call this%vww%update(k)
-
-       call col3(stats_work%x,this%stats_u%x, this%stats_u%x,n)
-       call this%uuuu%update(k)
-       call col3(stats_work%x,this%stats_v%x, this%stats_v%x,n)
-       call this%vvvv%update(k)
-       call col3(stats_work%x,this%stats_w%x, this%stats_w%x,n)
-       call this%wwww%update(k)
-
-       call col3(stats_work%x,this%stats_p%x, this%p%x,n)
-       call this%ppp%update(k)
-       call col3(stats_work%x,this%stats_p%x, this%stats_p%x,n)
-       call this%pppp%update(k)
-
-       call col3(stats_work%x,this%p%x, this%u%x,n)
-       call this%pu%update(k)
-       call col3(stats_work%x,this%p%x, this%v%x,n)
-       call this%pv%update(k)
-       call col3(stats_work%x,this%p%x, this%w%x,n)
-       call this%pw%update(k)
-
-
-    end if
-    call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u%x,this%coef)
-    call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v%x,this%coef)
-    call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w%x,this%coef)
-
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_col3(stats_work%x_d,this%dudx%x_d, this%p%x_d,n)
-       call this%pdudx%update(k)
-       call device_col3(stats_work%x_d,this%dudy%x_d, this%p%x_d,n)
-       call this%pdudy%update(k)
-       call device_col3(stats_work%x_d,this%dudz%x_d, this%p%x_d,n)
-       call this%pdudz%update(k)
-
-       call device_col3(stats_work%x_d,this%dvdx%x_d, this%p%x_d,n)
-       call this%pdvdx%update(k)
-       call device_col3(stats_work%x_d,this%dvdy%x_d, this%p%x_d,n)
-       call this%pdvdy%update(k)
-       call device_col3(stats_work%x_d,this%dvdz%x_d, this%p%x_d,n)
-       call this%pdvdz%update(k)
-
-       call device_col3(stats_work%x_d,this%dwdx%x_d, this%p%x_d,n)
-       call this%pdwdx%update(k)
-       call device_col3(stats_work%x_d,this%dwdy%x_d, this%p%x_d,n)
-       call this%pdwdy%update(k)
-       call device_col3(stats_work%x_d,this%dwdz%x_d, this%p%x_d,n)
-       call this%pdwdz%update(k)
-
-       call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dudx%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dudy%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dudz%x_d,n)
-       call this%e11%update(k)
-       call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dvdx%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dvdy%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dvdz%x_d,n)
-       call this%e22%update(k)
-       call device_col3(this%stats_work%x_d,this%dwdx%x_d, this%dwdx%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dwdy%x_d, this%dwdy%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dwdz%x_d, this%dwdz%x_d,n)
-       call this%e33%update(k)
-       call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dvdx%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dvdy%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dvdz%x_d,n)
-       call this%e12%update(k)
-       call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dwdx%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dwdy%x_d,n)
-       call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dwdz%x_d,n)
-       call this%e23%update(k)
-
-
-    else
-       call col3(stats_work%x,this%dudx%x, this%p%x,n)
-       call this%pdudx%update(k)
-       call col3(stats_work%x,this%dudy%x, this%p%x,n)
-       call this%pdudy%update(k)
-       call col3(stats_work%x,this%dudz%x, this%p%x,n)
-       call this%pdudz%update(k)
-
-       call col3(stats_work%x,this%dvdx%x, this%p%x,n)
-       call this%pdvdx%update(k)
-       call col3(stats_work%x,this%dvdy%x, this%p%x,n)
-       call this%pdvdy%update(k)
-       call col3(stats_work%x,this%dvdz%x, this%p%x,n)
-       call this%pdvdz%update(k)
-
-       call col3(stats_work%x,this%dwdx%x, this%p%x,n)
-       call this%pdwdx%update(k)
-       call col3(stats_work%x,this%dwdy%x, this%p%x,n)
-       call this%pdwdy%update(k)
-       call col3(stats_work%x,this%dwdz%x, this%p%x,n)
-       call this%pdwdz%update(k)
-
-       call col3(this%stats_work%x,this%dudx%x, this%dudx%x,n)
-       call addcol3(this%stats_work%x,this%dudy%x, this%dudy%x,n)
-       call addcol3(this%stats_work%x,this%dudz%x, this%dudz%x,n)
-       call this%e11%update(k)
-       call col3(this%stats_work%x,this%dvdx%x, this%dvdx%x,n)
-       call addcol3(this%stats_work%x,this%dvdy%x, this%dvdy%x,n)
-       call addcol3(this%stats_work%x,this%dvdz%x, this%dvdz%x,n)
-       call this%e22%update(k)
-       call col3(this%stats_work%x,this%dwdx%x, this%dwdx%x,n)
-       call addcol3(this%stats_work%x,this%dwdy%x, this%dwdy%x,n)
-       call addcol3(this%stats_work%x,this%dwdz%x, this%dwdz%x,n)
-       call this%e33%update(k)
-       call col3(this%stats_work%x,this%dudx%x, this%dvdx%x,n)
-       call addcol3(this%stats_work%x,this%dudy%x, this%dvdy%x,n)
-       call addcol3(this%stats_work%x,this%dudz%x, this%dvdz%x,n)
-       call this%e12%update(k)
-       call col3(this%stats_work%x,this%dvdx%x, this%dwdx%x,n)
-       call addcol3(this%stats_work%x,this%dvdy%x, this%dwdy%x,n)
-       call addcol3(this%stats_work%x,this%dvdz%x, this%dwdz%x,n)
-       call this%e23%update(k)
-
-    end if
-
-  end associate
+      n = stats_work%dof%size()
+
+      !> U%f is u and U%mf is <u>
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+
+         call device_col3(stats_u%x_d,this%u%x_d, this%u%x_d,n)
+         call device_col3(stats_v%x_d,this%v%x_d, this%v%x_d,n)
+         call device_col3(stats_w%x_d,this%w%x_d, this%w%x_d,n)
+         call device_col3(stats_p%x_d,this%p%x_d, this%p%x_d,n)
+
+         call this%uu%update(k)
+         call this%vv%update(k)
+         call this%ww%update(k)
+         call this%pp%update(k)
+
+         call device_col3(stats_work%x_d,this%u%x_d, this%v%x_d,n)
+         call this%uv%update(k)
+         call device_col3(stats_work%x_d,this%u%x_d, this%w%x_d,n)
+         call this%uw%update(k)
+         call device_col3(stats_work%x_d,this%v%x_d, this%w%x_d,n)
+         call this%vw%update(k)
+
+         call device_col2(stats_work%x_d, this%u%x_d,n)
+         call this%uvw%update(k)
+         call device_col3(stats_work%x_d,this%stats_u%x_d, this%u%x_d,n)
+         call this%uuu%update(k)
+         call device_col3(stats_work%x_d,this%stats_v%x_d, this%v%x_d,n)
+         call this%vvv%update(k)
+         call device_col3(stats_work%x_d,this%stats_w%x_d, this%w%x_d,n)
+         call this%www%update(k)
+         call device_col3(stats_work%x_d,this%stats_u%x_d, this%v%x_d,n)
+         call this%uuv%update(k)
+         call device_col3(stats_work%x_d,this%stats_u%x_d, this%w%x_d,n)
+         call this%uuw%update(k)
+         call device_col3(stats_work%x_d,this%stats_v%x_d, this%u%x_d,n)
+         call this%uvv%update(k)
+         call device_col3(stats_work%x_d,this%stats_v%x_d, this%w%x_d,n)
+         call this%vvw%update(k)
+         call device_col3(stats_work%x_d,this%stats_w%x_d, this%u%x_d,n)
+         call this%uww%update(k)
+         call device_col3(stats_work%x_d,this%stats_w%x_d, this%v%x_d,n)
+         call this%vww%update(k)
+
+         call device_col3(stats_work%x_d,this%stats_u%x_d, this%stats_u%x_d,n)
+         call this%uuuu%update(k)
+         call device_col3(stats_work%x_d,this%stats_v%x_d, this%stats_v%x_d,n)
+         call this%vvvv%update(k)
+         call device_col3(stats_work%x_d,this%stats_w%x_d, this%stats_w%x_d,n)
+         call this%wwww%update(k)
+
+         call device_col3(stats_work%x_d,this%stats_p%x_d, this%p%x_d,n)
+         call this%ppp%update(k)
+         call device_col3(stats_work%x_d,this%stats_p%x_d, this%stats_p%x_d,n)
+         call this%pppp%update(k)
+
+         call device_col3(stats_work%x_d,this%p%x_d, this%u%x_d,n)
+         call this%pu%update(k)
+         call device_col3(stats_work%x_d,this%p%x_d, this%v%x_d,n)
+         call this%pv%update(k)
+         call device_col3(stats_work%x_d,this%p%x_d, this%w%x_d,n)
+         call this%pw%update(k)
+
+      else
+
+         call col3(stats_u%x,this%u%x, this%u%x,n)
+         call col3(stats_v%x,this%v%x, this%v%x,n)
+         call col3(stats_w%x,this%w%x, this%w%x,n)
+         call col3(stats_p%x,this%p%x, this%p%x,n)
+
+         call this%uu%update(k)
+         call this%vv%update(k)
+         call this%ww%update(k)
+         call this%pp%update(k)
+
+         call col3(stats_work%x,this%u%x, this%v%x,n)
+         call this%uv%update(k)
+         call col3(stats_work%x,this%u%x, this%w%x,n)
+         call this%uw%update(k)
+         call col3(stats_work%x,this%v%x, this%w%x,n)
+         call this%vw%update(k)
+
+         call col2(stats_work%x, this%u%x,n)
+         call this%uvw%update(k)
+         call col3(stats_work%x,this%stats_u%x, this%u%x,n)
+         call this%uuu%update(k)
+         call col3(stats_work%x,this%stats_v%x, this%v%x,n)
+         call this%vvv%update(k)
+         call col3(stats_work%x,this%stats_w%x, this%w%x,n)
+         call this%www%update(k)
+         call col3(stats_work%x,this%stats_u%x, this%v%x,n)
+         call this%uuv%update(k)
+         call col3(stats_work%x,this%stats_u%x, this%w%x,n)
+         call this%uuw%update(k)
+         call col3(stats_work%x,this%stats_v%x, this%u%x,n)
+         call this%uvv%update(k)
+         call col3(stats_work%x,this%stats_v%x, this%w%x,n)
+         call this%vvw%update(k)
+         call col3(stats_work%x,this%stats_w%x, this%u%x,n)
+         call this%uww%update(k)
+         call col3(stats_work%x,this%stats_w%x, this%v%x,n)
+         call this%vww%update(k)
+
+         call col3(stats_work%x,this%stats_u%x, this%stats_u%x,n)
+         call this%uuuu%update(k)
+         call col3(stats_work%x,this%stats_v%x, this%stats_v%x,n)
+         call this%vvvv%update(k)
+         call col3(stats_work%x,this%stats_w%x, this%stats_w%x,n)
+         call this%wwww%update(k)
+
+         call col3(stats_work%x,this%stats_p%x, this%p%x,n)
+         call this%ppp%update(k)
+         call col3(stats_work%x,this%stats_p%x, this%stats_p%x,n)
+         call this%pppp%update(k)
+
+         call col3(stats_work%x,this%p%x, this%u%x,n)
+         call this%pu%update(k)
+         call col3(stats_work%x,this%p%x, this%v%x,n)
+         call this%pv%update(k)
+         call col3(stats_work%x,this%p%x, this%w%x,n)
+         call this%pw%update(k)
+
+
+      end if
+      call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u%x,this%coef)
+      call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v%x,this%coef)
+      call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w%x,this%coef)
+
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+         call device_col3(stats_work%x_d,this%dudx%x_d, this%p%x_d,n)
+         call this%pdudx%update(k)
+         call device_col3(stats_work%x_d,this%dudy%x_d, this%p%x_d,n)
+         call this%pdudy%update(k)
+         call device_col3(stats_work%x_d,this%dudz%x_d, this%p%x_d,n)
+         call this%pdudz%update(k)
+
+         call device_col3(stats_work%x_d,this%dvdx%x_d, this%p%x_d,n)
+         call this%pdvdx%update(k)
+         call device_col3(stats_work%x_d,this%dvdy%x_d, this%p%x_d,n)
+         call this%pdvdy%update(k)
+         call device_col3(stats_work%x_d,this%dvdz%x_d, this%p%x_d,n)
+         call this%pdvdz%update(k)
+
+         call device_col3(stats_work%x_d,this%dwdx%x_d, this%p%x_d,n)
+         call this%pdwdx%update(k)
+         call device_col3(stats_work%x_d,this%dwdy%x_d, this%p%x_d,n)
+         call this%pdwdy%update(k)
+         call device_col3(stats_work%x_d,this%dwdz%x_d, this%p%x_d,n)
+         call this%pdwdz%update(k)
+
+         call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dudx%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dudy%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dudz%x_d,n)
+         call this%e11%update(k)
+         call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dvdx%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dvdy%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dvdz%x_d,n)
+         call this%e22%update(k)
+         call device_col3(this%stats_work%x_d,this%dwdx%x_d, this%dwdx%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dwdy%x_d, this%dwdy%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dwdz%x_d, this%dwdz%x_d,n)
+         call this%e33%update(k)
+         call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dvdx%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dvdy%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dvdz%x_d,n)
+         call this%e12%update(k)
+         call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dwdx%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dwdy%x_d,n)
+         call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dwdz%x_d,n)
+         call this%e23%update(k)
+
+
+      else
+         call col3(stats_work%x,this%dudx%x, this%p%x,n)
+         call this%pdudx%update(k)
+         call col3(stats_work%x,this%dudy%x, this%p%x,n)
+         call this%pdudy%update(k)
+         call col3(stats_work%x,this%dudz%x, this%p%x,n)
+         call this%pdudz%update(k)
+
+         call col3(stats_work%x,this%dvdx%x, this%p%x,n)
+         call this%pdvdx%update(k)
+         call col3(stats_work%x,this%dvdy%x, this%p%x,n)
+         call this%pdvdy%update(k)
+         call col3(stats_work%x,this%dvdz%x, this%p%x,n)
+         call this%pdvdz%update(k)
+
+         call col3(stats_work%x,this%dwdx%x, this%p%x,n)
+         call this%pdwdx%update(k)
+         call col3(stats_work%x,this%dwdy%x, this%p%x,n)
+         call this%pdwdy%update(k)
+         call col3(stats_work%x,this%dwdz%x, this%p%x,n)
+         call this%pdwdz%update(k)
+
+         call col3(this%stats_work%x,this%dudx%x, this%dudx%x,n)
+         call addcol3(this%stats_work%x,this%dudy%x, this%dudy%x,n)
+         call addcol3(this%stats_work%x,this%dudz%x, this%dudz%x,n)
+         call this%e11%update(k)
+         call col3(this%stats_work%x,this%dvdx%x, this%dvdx%x,n)
+         call addcol3(this%stats_work%x,this%dvdy%x, this%dvdy%x,n)
+         call addcol3(this%stats_work%x,this%dvdz%x, this%dvdz%x,n)
+         call this%e22%update(k)
+         call col3(this%stats_work%x,this%dwdx%x, this%dwdx%x,n)
+         call addcol3(this%stats_work%x,this%dwdy%x, this%dwdy%x,n)
+         call addcol3(this%stats_work%x,this%dwdz%x, this%dwdz%x,n)
+         call this%e33%update(k)
+         call col3(this%stats_work%x,this%dudx%x, this%dvdx%x,n)
+         call addcol3(this%stats_work%x,this%dudy%x, this%dvdy%x,n)
+         call addcol3(this%stats_work%x,this%dudz%x, this%dvdz%x,n)
+         call this%e12%update(k)
+         call col3(this%stats_work%x,this%dvdx%x, this%dwdx%x,n)
+         call addcol3(this%stats_work%x,this%dvdy%x, this%dwdy%x,n)
+         call addcol3(this%stats_work%x,this%dvdz%x, this%dwdz%x,n)
+         call this%e23%update(k)
+
+      end if
+
+    end associate
 
   end subroutine fluid_stats_update
 
@@ -513,7 +513,7 @@ subroutine fluid_stats_free(this)
     call this%stats_u%free()
     call this%stats_v%free()
     call this%stats_w%free()
-    
+
     call this%uu%free()
     call this%vv%free()
     call this%ww%free()
@@ -531,13 +531,13 @@ subroutine fluid_stats_free(this)
     call this%dWdx%free()
     call this%dWdy%free()
     call this%dWdz%free()
- 
+
   end subroutine fluid_stats_free
- 
+
   !> Initialize a mean flow field
   subroutine fluid_stats_reset(this)
     class(fluid_stats_t), intent(inout), target:: this
-    
+
     call this%uu%reset()
     call this%vv%reset()
     call this%ww%reset()
@@ -561,11 +561,11 @@ subroutine fluid_stats_reset(this)
     call this%pp%reset()
     call this%ppp%reset()
     call this%pppp%reset()
-     !> Pressure * velocity
+    !> Pressure * velocity
     call this%pu%reset()
     call this%pv%reset()
     call this%pw%reset()
- 
+
     call this%pdudx%reset()
     call this%pdudy%reset()
     call this%pdudz%reset()
@@ -611,7 +611,7 @@ subroutine fluid_stats_make_strong_grad(this)
        call device_col2(this%e23%mf%x_d,this%stats_work%x_d, n)
 
 
-    else 
+    else
        call invers2(this%stats_work%x, this%coef%B,n)
        call col2(this%pdudx%mf%x, this%stats_work%x, n)
        call col2(this%pdudy%mf%x, this%stats_work%x, n)
@@ -648,35 +648,35 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,&
     integer :: n
 
     if (present(mean)) then
-        n = mean%fields(1)%f%dof%size()
-        call copy(mean%fields(1)%f%x,this%u_mean%x,n)
-        call copy(mean%fields(2)%f%x,this%v_mean%x,n)
-        call copy(mean%fields(3)%f%x,this%w_mean%x,n)
-        call copy(mean%fields(4)%f%x,this%p_mean%x,n)
+       n = mean%fields(1)%f%dof%size()
+       call copy(mean%fields(1)%f%x,this%u_mean%x,n)
+       call copy(mean%fields(2)%f%x,this%v_mean%x,n)
+       call copy(mean%fields(3)%f%x,this%w_mean%x,n)
+       call copy(mean%fields(4)%f%x,this%p_mean%x,n)
     end if
 
     if (present(reynolds)) then
-        n = reynolds%fields(1)%f%dof%size()
-        call copy(reynolds%fields(1)%f%x,this%pp%mf%x,n)
-        call subcol3(reynolds%fields(1)%f%x,this%p_mean%x,this%p_mean%x,n)
-
-        call copy(reynolds%fields(2)%f%x,this%uu%mf%x,n)
-        call subcol3(reynolds%fields(2)%f%x,this%u_mean%x,this%u_mean%x,n)
-        
-        call copy(reynolds%fields(3)%f%x,this%vv%mf%x,n)
-        call subcol3(reynolds%fields(3)%f%x,this%v_mean%x,this%v_mean%x,n)
-        
-        call copy(reynolds%fields(4)%f%x,this%ww%mf%x,n)
-        call subcol3(reynolds%fields(4)%f%x,this%w_mean%x,this%w_mean%x,n)
-        
-        call copy(reynolds%fields(5)%f%x,this%uv%mf%x,n)
-        call subcol3(reynolds%fields(5)%f%x,this%u_mean%x,this%v_mean%x,n)
-        
-        call copy(reynolds%fields(6)%f%x,this%uw%mf%x,n)
-        call subcol3(reynolds%fields(6)%f%x,this%u_mean%x,this%w_mean%x,n)
-        
-        call copy(reynolds%fields(7)%f%x,this%vw%mf%x,n)
-        call subcol3(reynolds%fields(7)%f%x,this%v_mean%x,this%w_mean%x,n)
+       n = reynolds%fields(1)%f%dof%size()
+       call copy(reynolds%fields(1)%f%x,this%pp%mf%x,n)
+       call subcol3(reynolds%fields(1)%f%x,this%p_mean%x,this%p_mean%x,n)
+
+       call copy(reynolds%fields(2)%f%x,this%uu%mf%x,n)
+       call subcol3(reynolds%fields(2)%f%x,this%u_mean%x,this%u_mean%x,n)
+
+       call copy(reynolds%fields(3)%f%x,this%vv%mf%x,n)
+       call subcol3(reynolds%fields(3)%f%x,this%v_mean%x,this%v_mean%x,n)
+
+       call copy(reynolds%fields(4)%f%x,this%ww%mf%x,n)
+       call subcol3(reynolds%fields(4)%f%x,this%w_mean%x,this%w_mean%x,n)
+
+       call copy(reynolds%fields(5)%f%x,this%uv%mf%x,n)
+       call subcol3(reynolds%fields(5)%f%x,this%u_mean%x,this%v_mean%x,n)
+
+       call copy(reynolds%fields(6)%f%x,this%uw%mf%x,n)
+       call subcol3(reynolds%fields(6)%f%x,this%u_mean%x,this%w_mean%x,n)
+
+       call copy(reynolds%fields(7)%f%x,this%vw%mf%x,n)
+       call subcol3(reynolds%fields(7)%f%x,this%v_mean%x,this%w_mean%x,n)
     end if
     if (present(pressure_skewness)) then
 
@@ -697,22 +697,37 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,&
        !Compute gradient of mean flow
        n = mean_vel_grad%fields(1)%f%dof%size()
        if (NEKO_BCKND_DEVICE .eq. 1) then
-          call device_memcpy(this%u_mean%x, this%u_mean%x_d, n, HOST_TO_DEVICE)
-          call device_memcpy(this%v_mean%x, this%v_mean%x_d, n, HOST_TO_DEVICE)
-          call device_memcpy(this%w_mean%x, this%w_mean%x_d, n, HOST_TO_DEVICE)
-          call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u_mean%x,this%coef)
-          call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v_mean%x,this%coef)
-          call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w_mean%x,this%coef)
-          call device_memcpy(this%dudx%x, this%dudx%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dvdx%x, this%dvdx%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dwdx%x, this%dwdx%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dudy%x, this%dudy%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dvdy%x, this%dvdy%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dwdy%x, this%dwdy%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dudz%x, this%dudz%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dvdz%x, this%dvdz%x_d, n, DEVICE_TO_HOST)
-          call device_memcpy(this%dwdz%x, this%dwdz%x_d, n, DEVICE_TO_HOST, sync=.true.)
-       else 
+          call device_memcpy(this%u_mean%x, this%u_mean%x_d, n, &
+                             HOST_TO_DEVICE, sync=.false.)
+          call device_memcpy(this%v_mean%x, this%v_mean%x_d, n, &
+                             HOST_TO_DEVICE, sync=.false.)
+          call device_memcpy(this%w_mean%x, this%w_mean%x_d, n, &
+                             HOST_TO_DEVICE, sync=.false.)
+          call opgrad(this%dudx%x, this%dudy%x, this%dudz%x, &
+                      this%u_mean%x, this%coef)
+          call opgrad(this%dvdx%x, this%dvdy%x, this%dvdz%x, &
+                      this%v_mean%x, this%coef)
+          call opgrad(this%dwdx%x, this%dwdy%x, this%dwdz%x, &
+                      this%w_mean%x, this%coef)
+          call device_memcpy(this%dudx%x, this%dudx%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dvdx%x, this%dvdx%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dwdx%x, this%dwdx%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dudy%x, this%dudy%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dvdy%x, this%dvdy%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dwdy%x, this%dwdy%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dudz%x, this%dudz%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dvdz%x, this%dvdz%x_d, n, &
+                             DEVICE_TO_HOST, sync=.false.)
+          call device_memcpy(this%dwdz%x, this%dwdz%x_d, n, &
+                             DEVICE_TO_HOST, sync=.true.)
+       else
           call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u_mean%x,this%coef)
           call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v_mean%x,this%coef)
           call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w_mean%x,this%coef)
@@ -735,5 +750,5 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,&
     end if
 
   end subroutine fluid_stats_post_process
- 
+
 end module fluid_stats
diff --git a/src/fluid/fluid_user_source_term.f90 b/src/fluid/fluid_user_source_term.f90
index 098e509c8d4..02518c5699f 100644
--- a/src/fluid/fluid_user_source_term.f90
+++ b/src/fluid/fluid_user_source_term.f90
@@ -36,7 +36,6 @@ module fluid_user_source_term
   use num_types, only : rp
   use utils, only : neko_error
   use source_term, only : source_term_t
-  use field, only : field_t
   use json_module, only : json_file
   use field_list, only : field_list_t
   use coefs, only : coef_t
@@ -54,13 +53,13 @@ module fluid_user_source_term
   !! actual implementation in the user file.
   !! @details The user source term can be applied either pointiwse or acting
   !! on the whole array in a single call, which is referred to as "vector"
-  !! application. 
+  !! application.
   !! @warning
   !! The user source term does not support init from JSON and should instead be
   !! directly initialized from components.
   type, public, extends(source_term_t) :: fluid_user_source_term_t
      !> Pointer to the dofmap of the right-hand-side fields.
-     type(dofmap_t), pointer :: dm 
+     type(dofmap_t), pointer :: dm
      !> x-component of source term.
      real(kind=rp), allocatable :: u(:, :, :, :)
      !> y-component of source term.
@@ -78,7 +77,7 @@ module fluid_user_source_term
      procedure(fluid_source_compute_pointwise), nopass, pointer :: compute_pw_ &
        => null()
      !> Compute the source term for the entire boundary
-     procedure(fluid_source_compute_vector), nopass, pointer :: compute_ & 
+     procedure(fluid_source_compute_vector), nopass, pointer :: compute_vector_&
        => null()
    contains
      !> Constructor from JSON (will throw!).
@@ -89,14 +88,14 @@ module fluid_user_source_term
      !> Destructor.
      procedure, pass(this) :: free => fluid_user_source_term_free
      !> Computes the source term and adds the result to `fields`.
-     procedure, pass(this) :: compute => fluid_user_source_term_compute
+     procedure, pass(this) :: compute_ => fluid_user_source_term_compute
   end type fluid_user_source_term_t
 
   abstract interface
      !> Computes the source term and adds the result to `fields`.
      !! @param t The time value.
      !! @param tstep The current time-step.
-     subroutine fluid_source_compute_vector(this, t)  
+     subroutine fluid_source_compute_vector(this, t)
        import fluid_user_source_term_t, rp
        class(fluid_user_source_term_t), intent(inout) :: this
        real(kind=rp), intent(in) :: t
@@ -111,7 +110,7 @@ end subroutine fluid_source_compute_vector
      !! @param j The x-index of GLL point.
      !! @param k The y-index of GLL point.
      !! @param l The z-index of GLL point.
-     !! @param e The index of element. 
+     !! @param e The index of element.
      !! @param t The time value.
      subroutine fluid_source_compute_pointwise(u, v, w, j, k, l, e, t)
        import rp
@@ -145,7 +144,7 @@ end subroutine fluid_user_source_term_init
   !> Costructor from components.
   !! @param fields A list of 3 fields for adding the source values.
   !! @param coef The SEM coeffs.
-  !! @param sourc_termtype The type of the user source term, "user_vector" or 
+  !! @param sourc_termtype The type of the user source term, "user_vector" or
   !! "user_pointwise".
   !! @param eval_vector The procedure to vector-compute the source term.
   !! @param eval_pointwise The procedure to pointwise-compute the source term.
@@ -159,7 +158,7 @@ subroutine fluid_user_source_term_init_from_components(this, fields, coef, &
     procedure(fluid_source_compute_pointwise), optional :: eval_pointwise
 
     call this%free()
-    call this%init_base(fields, coef)
+    call this%init_base(fields, coef, 0.0_rp, huge(0.0_rp))
 
     this%dm => fields%fields(1)%f%dof
 
@@ -186,11 +185,11 @@ subroutine fluid_user_source_term_init_from_components(this, fields, coef, &
        if (NEKO_BCKND_DEVICE .eq. 1) then
           call neko_error('Pointwise source terms not supported on accelerators')
        end if
-       this%compute_ => pointwise_eval_driver
+       this%compute_vector_ => pointwise_eval_driver
        this%compute_pw_ => eval_pointwise
     else if (trim(source_term_type) .eq. 'user_vector' .and. &
              present(eval_vector)) then
-       this%compute_ => eval_vector
+       this%compute_vector_ => eval_vector
     else
        call neko_error('Invalid fluid source term '//source_term_type)
     end if
@@ -208,7 +207,7 @@ subroutine fluid_user_source_term_free(this)
     if (c_associated(this%v_d)) call device_free(this%v_d)
     if (c_associated(this%w_d)) call device_free(this%w_d)
 
-    nullify(this%compute_)
+    nullify(this%compute_vector_)
     nullify(this%compute_pw_)
     nullify(this%dm)
 
@@ -224,14 +223,14 @@ subroutine fluid_user_source_term_compute(this, t, tstep)
     integer, intent(in) :: tstep
     integer :: n
 
-    call this%compute_(this, t)
+    call this%compute_vector_(this, t)
     n = this%fields%fields(1)%f%dof%size()
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_add2(this%fields%fields(1)%f%x_d, this%u_d, n)
        call device_add2(this%fields%fields(2)%f%x_d, this%v_d, n)
        call device_add2(this%fields%fields(3)%f%x_d, this%w_d, n)
-    else 
+    else
        call add2(this%fields%fields(1)%f%x, this%u, n)
        call add2(this%fields%fields(2)%f%x, this%v, n)
        call add2(this%fields%fields(3)%f%x, this%w, n)
@@ -249,26 +248,26 @@ subroutine pointwise_eval_driver(this, t)
 
     select type (this)
     type is (fluid_user_source_term_t)
-      do e = 1, size(this%u, 4)
-         ee = e
-         do l = 1, size(this%u, 3)
-            ll = l
-            do k = 1, size(this%u, 2)
-               kk = k
-               do j = 1, size(this%u, 1)
-                  jj =j
-                  call this%compute_pw_(this%u(j,k,l,e), &
+       do e = 1, size(this%u, 4)
+          ee = e
+          do l = 1, size(this%u, 3)
+             ll = l
+             do k = 1, size(this%u, 2)
+                kk = k
+                do j = 1, size(this%u, 1)
+                   jj =j
+                   call this%compute_pw_(this%u(j,k,l,e), &
                                         this%v(j,k,l,e), &
                                         this%w(j,k,l,e), &
                                         jj, kk, ll, ee, t)
-               end do
-            end do
-         end do
-      end do
+                end do
+             end do
+          end do
+       end do
     class default
        call neko_error('Incorrect source type in pointwise eval driver!')
     end select
-    
+
   end subroutine pointwise_eval_driver
-  
+
 end module fluid_user_source_term
diff --git a/src/fluid/fluid_volflow.f90 b/src/fluid/fluid_volflow.f90
index 47980232b2f..c328d83c1e6 100644
--- a/src/fluid/fluid_volflow.f90
+++ b/src/fluid/fluid_volflow.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,46 +21,46 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 module fluid_volflow
   use operators
   use num_types
-  use mathops    
+  use mathops
   use krylov, only : ksp_t, ksp_monitor_t
   use precon
   use dofmap
@@ -76,17 +76,16 @@ module fluid_volflow
   use json_module, only : json_file
   use json_utils, only: json_get
   use scratch_registry, only : scratch_registry_t
-  use bc, only : bc_list_t, bc_list_apply, bc_list_apply_vector, &
-                 bc_list_apply_scalar
+  use bc, only : bc_list_t, bc_list_apply_vector, bc_list_apply_scalar
   use ax_product, only : ax_t
   implicit none
   private
-  
+
   !> Defines volume flow
   type, public :: fluid_volflow_t
      integer :: flow_dir !< these two should be moved to params
-     logical :: avflow 
-     real(kind=rp) :: flow_rate 
+     logical :: avflow
+     real(kind=rp) :: flow_rate
      real(kind=rp) :: dtlag = 0d0
      real(kind=rp) :: bdlag = 0d0 !< Really quite pointless since we do not vary the timestep
      type(field_t) :: u_vol, v_vol, w_vol, p_vol
@@ -130,7 +129,7 @@ subroutine fluid_vol_flow_init(this, dm_Xh, params)
     end if
 
     this%scratch = scratch_registry_t(dm_Xh, 3, 1)
-    
+
   end subroutine fluid_vol_flow_init
 
   subroutine fluid_vol_flow_free(this)
@@ -142,7 +141,7 @@ subroutine fluid_vol_flow_free(this)
     call this%p_vol%free()
 
     call this%scratch%free()
-        
+
   end subroutine fluid_vol_flow_free
 
   !> Compute flow adjustment
@@ -172,11 +171,11 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
     type(ksp_monitor_t) :: ksp_result
     type(field_t), pointer :: ta1, ta2, ta3
     integer :: temp_indices(3)
-    
+
     call this%scratch%request_field(ta1, temp_indices(1))
     call this%scratch%request_field(ta2, temp_indices(2))
     call this%scratch%request_field(ta3, temp_indices(3))
-    
+
 
     associate(msh => c_Xh%msh, p_vol => this%p_vol, &
          u_vol => this%u_vol, v_vol => this%v_vol, w_vol => this%w_vol)
@@ -201,32 +200,32 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
             c_Xh%h2(i,1,1,1) = 0.0_rp
          end do
       end if
-      c_Xh%ifh2 = .false.         
+      c_Xh%ifh2 = .false.
 
       !   Compute pressure
-      
+
       if (this%flow_dir .eq. 1) then
          call cdtp(p_res%x, c_Xh%h1, c_Xh%drdx, c_Xh%dsdx, c_Xh%dtdx, c_Xh)
       end if
-      
+
       if (this%flow_dir .eq. 2) then
          call cdtp(p_res%x, c_Xh%h1, c_Xh%drdy, c_Xh%dsdy, c_Xh%dtdy, c_Xh)
       end if
-      
+
       if (this%flow_dir .eq. 3) then
          call cdtp(p_res%x, c_Xh%h1, c_Xh%drdz, c_Xh%dsdz, c_Xh%dtdz, c_Xh)
       end if
 
-      call gs_Xh%op(p_res, GS_OP_ADD) 
+      call gs_Xh%op(p_res, GS_OP_ADD)
       call bc_list_apply_scalar(bclst_dp, p_res%x, n)
       call pc_prs%update()
       ksp_result = ksp_prs%solve(Ax, p_vol, p_res%x, n, &
-           c_Xh, bclst_dp, gs_Xh, prs_max_iter)    
+           c_Xh, bclst_dp, gs_Xh, prs_max_iter)
 
       !   Compute velocity
-      
+
       call opgrad(u_res%x, v_res%x, w_res%x, p_vol%x, c_Xh)
-         
+
       if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
            (NEKO_BCKND_OPENCL .eq. 1)) then
          call device_opchsign(u_res%x_d, v_res%x_d, w_res%x_d, msh%gdim, n)
@@ -241,12 +240,12 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
       end if
       call bc_list_apply_vector(bclst_vel_res,&
            ta1%x, ta2%x, ta3%x, n)
-       
+
       ! add forcing
 
       if (NEKO_BCKND_DEVICE .eq. 1) then
          if (this%flow_dir .eq. 1) then
-            call device_add2(u_res%x_d, ta1%x_d, n) 
+            call device_add2(u_res%x_d, ta1%x_d, n)
          else if (this%flow_dir .eq. 2) then
             call device_add2(v_res%x_d, ta2%x_d, n)
          else if (this%flow_dir .eq. 3) then
@@ -254,7 +253,7 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
          end if
       else
          if (this%flow_dir .eq. 1) then
-            call add2(u_res%x, ta1%x, n) 
+            call add2(u_res%x, ta1%x, n)
          else if (this%flow_dir .eq. 2) then
             call add2(v_res%x, ta2%x, n)
          else if (this%flow_dir .eq. 3) then
@@ -273,19 +272,19 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
       end if
       c_Xh%ifh2 = .true.
 
-       call gs_Xh%op(u_res, GS_OP_ADD) 
-       call gs_Xh%op(v_res, GS_OP_ADD) 
-       call gs_Xh%op(w_res, GS_OP_ADD) 
+      call gs_Xh%op(u_res, GS_OP_ADD)
+      call gs_Xh%op(v_res, GS_OP_ADD)
+      call gs_Xh%op(w_res, GS_OP_ADD)
 
-       call bc_list_apply_vector(bclst_vel_res,&
+      call bc_list_apply_vector(bclst_vel_res,&
             u_res%x, v_res%x, w_res%x, n)
-       call pc_vel%update()
+      call pc_vel%update()
 
-       ksp_result = ksp_vel%solve(Ax, u_vol, u_res%x, n, &
+      ksp_result = ksp_vel%solve(Ax, u_vol, u_res%x, n, &
             c_Xh, bclst_du, gs_Xh, vel_max_iter)
-       ksp_result = ksp_vel%solve(Ax, v_vol, v_res%x, n, &
+      ksp_result = ksp_vel%solve(Ax, v_vol, v_res%x, n, &
             c_Xh, bclst_dv, gs_Xh, vel_max_iter)
-       ksp_result = ksp_vel%solve(Ax, w_vol, w_res%x, n, &
+      ksp_result = ksp_vel%solve(Ax, w_vol, w_res%x, n, &
             c_Xh, bclst_dw, gs_Xh, vel_max_iter)
 
       if (NEKO_BCKND_DEVICE .eq. 1) then
@@ -293,12 +292,12 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
             this%base_flow = &
                  device_glsc2(u_vol%x_d, c_Xh%B_d, n) / this%domain_length
          end if
-         
+
          if (this%flow_dir .eq. 2) then
             this%base_flow = &
                  device_glsc2(v_vol%x_d, c_Xh%B_d, n) / this%domain_length
          end if
-         
+
          if (this%flow_dir .eq. 3) then
             this%base_flow = &
                  device_glsc2(w_vol%x_d, c_Xh%B_d, n) / this%domain_length
@@ -307,16 +306,16 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, &
          if (this%flow_dir .eq. 1) then
             this%base_flow = glsc2(u_vol%x, c_Xh%B, n) / this%domain_length
          end if
-         
+
          if (this%flow_dir .eq. 2) then
             this%base_flow = glsc2(v_vol%x, c_Xh%B, n) / this%domain_length
          end if
-         
+
          if (this%flow_dir .eq. 3) then
             this%base_flow = glsc2(w_vol%x, c_Xh%B, n) / this%domain_length
          end if
       end if
-     end associate
+    end associate
 
     call this%scratch%relinquish_field(temp_indices)
   end subroutine fluid_vol_flow_compute
@@ -353,34 +352,34 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, &
     integer :: n, ierr
     type(field_t), pointer :: ta1, ta2, ta3
     integer :: temp_indices(3)
-    
+
     associate(u_vol => this%u_vol, v_vol => this%v_vol, &
          w_vol => this%w_vol, p_vol => this%p_vol)
-      
+
       n = c_Xh%dof%size()
 
       ! If either dt or the backwards difference coefficient change,
       ! then recompute base flow solution corresponding to unit forcing:
-      
+
       ifcomp = 0.0_rp
 
       if (dt .ne. this%dtlag .or. ext_bdf%diffusion_coeffs(1) .ne. this%bdlag) then
          ifcomp = 1.0_rp
       end if
-      
+
       this%dtlag = dt
       this%bdlag = ext_bdf%diffusion_coeffs(1)
 
       call MPI_Allreduce(MPI_IN_PLACE, ifcomp, 1, &
            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-    
+
       if (ifcomp .gt. 0d0) then
          call this%compute(u_res, v_res, w_res, p_res, &
               ext_bdf, gs_Xh, c_Xh, rho, mu, ext_bdf%diffusion_coeffs(1), dt, &
               bclst_dp, bclst_du, bclst_dv, bclst_dw, bclst_vel_res, &
-              Ax, ksp_vel, ksp_prs, pc_prs, pc_vel, prs_max_iter, vel_max_iter)
+              Ax, ksp_prs, ksp_vel, pc_prs, pc_vel, prs_max_iter, vel_max_iter)
       end if
-      
+
       if (NEKO_BCKND_DEVICE .eq. 1) then
          if (this%flow_dir .eq. 1) then
             current_flow = &
@@ -408,10 +407,10 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, &
       else
          flow_rate = this%flow_rate
       endif
-      
-      delta_flow = flow_rate - current_flow            
+
+      delta_flow = flow_rate - current_flow
       scale = delta_flow / this%base_flow
-      
+
       if (NEKO_BCKND_DEVICE .eq. 1) then
          call device_add2s2(u%x_d, u_vol%x_d, scale, n)
          call device_add2s2(v%x_d, v_vol%x_d, scale, n)
@@ -424,7 +423,7 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, &
          call add2s2(p%x, p_vol%x, scale, n)
       end if
     end associate
-    
+
   end subroutine fluid_vol_flow
 
 
diff --git a/src/fluid/mean_flow.f90 b/src/fluid/mean_flow.f90
index fb55c7ae976..ea55da9a089 100644
--- a/src/fluid/mean_flow.f90
+++ b/src/fluid/mean_flow.f90
@@ -64,7 +64,7 @@ subroutine mean_flow_init(this, u, v, w, p)
     call this%v%init(v)
     call this%w%init(w)
     call this%p%init(p)
-    
+
   end subroutine mean_flow_init
 
 
@@ -78,7 +78,7 @@ subroutine mean_flow_free(this)
     call this%p%free()
 
   end subroutine mean_flow_free
- 
+
   !> Resets a mean flow field
   subroutine mean_flow_reset(this)
     class(mean_flow_t), intent(inout) :: this
@@ -89,5 +89,5 @@ subroutine mean_flow_reset(this)
     call this%p%reset()
 
   end subroutine mean_flow_reset
-   
+
 end module mean_flow
diff --git a/src/fluid/mean_sqr_flow.f90 b/src/fluid/mean_sqr_flow.f90
index e16200f491f..e7f5c942d54 100644
--- a/src/fluid/mean_sqr_flow.f90
+++ b/src/fluid/mean_sqr_flow.f90
@@ -46,9 +46,9 @@ module mean_sqr_flow
      procedure, pass(this) :: init => mean_sqr_flow_init
      procedure, pass(this) :: free => mean_sqr_flow_free
   end type mean_sqr_flow_t
-  
+
 contains
-  
+
   !> Initialize a mean squared flow field
   subroutine mean_sqr_flow_init(this, u, v, w, p)
     class(mean_sqr_flow_t), intent(inout) :: this
@@ -63,7 +63,7 @@ subroutine mean_sqr_flow_init(this, u, v, w, p)
     call this%vv%init(v)
     call this%ww%init(w)
     call this%pp%init(p)
-    
+
   end subroutine mean_sqr_flow_init
 
 
@@ -77,5 +77,5 @@ subroutine mean_sqr_flow_free(this)
     call this%pp%free()
 
   end subroutine mean_sqr_flow_free
-  
+
 end module mean_sqr_flow
diff --git a/src/fluid/pnpn_res.f90 b/src/fluid/pnpn_res.f90
index b3187fcb55e..1d6ab091f28 100644
--- a/src/fluid/pnpn_res.f90
+++ b/src/fluid/pnpn_res.f90
@@ -32,7 +32,7 @@
 !
 !> Defines Pressure and velocity residuals in the Pn-Pn formulation
 module pnpn_residual
-  use gather_scatter, only : gs_t  
+  use gather_scatter, only : gs_t
   use ax_product, only : ax_t
   use field, only : field_t
   use coefs, only : coef_t
@@ -40,14 +40,13 @@ module pnpn_residual
   use space, only : space_t
   use mesh, only : mesh_t
   use num_types, only : rp
-  use scratch_registry, only : scratch_registry_t
   implicit none
   private
-  
+
   !> Abstract type to compute pressure residual
   type, public, abstract :: pnpn_prs_res_t
    contains
-     procedure(prs_res), nopass, deferred :: compute     
+     procedure(prs_res), nopass, deferred :: compute
   end type pnpn_prs_res_t
 
   !> Abstract type to compute velocity residual
@@ -55,7 +54,7 @@ module pnpn_residual
    contains
      procedure(vel_res), nopass, deferred :: compute
   end type pnpn_vel_res_t
-    
+
   abstract interface
      subroutine prs_res(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh,&
           gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt, mu, rho)
@@ -89,13 +88,13 @@ subroutine vel_res(Ax, u, v, w, u_res, v_res, w_res, &
        import Ax_t
        import gs_t
        import facet_normal_t
-       import space_t              
+       import space_t
        import coef_t
        import mesh_t
        import rp
        class(ax_t), intent(in) :: Ax
        type(mesh_t), intent(inout) :: msh
-       type(space_t), intent(inout) :: Xh    
+       type(space_t), intent(inout) :: Xh
        type(field_t), intent(inout) :: p, u, v, w
        type(field_t), intent(inout) :: u_res, v_res, w_res
        type(field_t), intent(inout) :: f_x, f_y, f_z
@@ -108,5 +107,5 @@ subroutine vel_res(Ax, u, v, w, u_res, v_res, w_res, &
      end subroutine vel_res
 
   end interface
- 
+
 end module pnpn_residual
diff --git a/src/fluid/pnpn_res_fctry.f90 b/src/fluid/pnpn_res_fctry.f90
index ba19353875c..cf583253271 100644
--- a/src/fluid/pnpn_res_fctry.f90
+++ b/src/fluid/pnpn_res_fctry.f90
@@ -43,7 +43,7 @@ module pnpn_res_fctry
 
   public :: pnpn_prs_res_t, pnpn_vel_res_t, &
        pnpn_prs_res_factory, pnpn_vel_res_factory
-  
+
 contains
 
   subroutine pnpn_prs_res_factory(prs_res)
@@ -53,7 +53,7 @@ subroutine pnpn_prs_res_factory(prs_res)
        deallocate(prs_res)
     end if
 
-    
+
     if (NEKO_BCKND_SX .eq. 1) then
        allocate(pnpn_prs_res_sx_t::prs_res)
     else if (NEKO_BCKND_DEVICE .eq. 1) then
@@ -61,9 +61,9 @@ subroutine pnpn_prs_res_factory(prs_res)
     else
        allocate(pnpn_prs_res_cpu_t::prs_res)
     end if
-    
+
   end subroutine pnpn_prs_res_factory
-  
+
   subroutine pnpn_vel_res_factory(vel_res)
     class(pnpn_vel_res_t), allocatable, intent(inout) :: vel_res
 
@@ -78,8 +78,8 @@ subroutine pnpn_vel_res_factory(vel_res)
     else
        allocate(pnpn_vel_res_cpu_t::vel_res)
     end if
-       
-    
+
+
   end subroutine pnpn_vel_res_factory
-  
+
 end module pnpn_res_fctry
diff --git a/src/gs/bcknd/cpu/gs_cpu.f90 b/src/gs/bcknd/cpu/gs_cpu.f90
index 6e26096372b..b95605f095b 100644
--- a/src/gs/bcknd/cpu/gs_cpu.f90
+++ b/src/gs/bcknd/cpu/gs_cpu.f90
@@ -47,9 +47,9 @@ module gs_cpu
      procedure, pass(this) :: gather => gs_gather_cpu
      procedure, pass(this) :: scatter => gs_scatter_cpu
   end type gs_cpu_t
-  
+
 contains
-  
+
   !> Dummy backend initialisation
   subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
     class(gs_cpu_t), intent(inout) :: this
@@ -78,7 +78,7 @@ subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
     integer, intent(in) :: o
     integer, intent(in) :: op
     logical, intent(in) :: shrd
-    
+
     select case(op)
     case (GS_OP_ADD)
        call gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
@@ -89,9 +89,9 @@ subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
     case (GS_OP_MAX)
        call gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
     end select
-    
+
   end subroutine gs_gather_cpu
- 
+
   !> Gather kernel for addition of data
   !! \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$
   subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
@@ -115,9 +115,9 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
           tmp = tmp + u(gd(k + j))
        end do
        v(dg(k + 1)) = tmp
-       k = k + blk_len        
+       k = k + blk_len
     end do
-    
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -128,7 +128,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_add
 
   !> Gather kernel for multiplication of data
@@ -145,18 +145,18 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
     integer, intent(in) :: o
     integer :: i, j, k, blk_len
     real(kind=rp) :: tmp
-    
+
     k = 0
     do i = 1, nb
        blk_len = b(i)
-       tmp = u(gd(k + 1))              
+       tmp = u(gd(k + 1))
        do j = 2, blk_len
           tmp = tmp * u(gd(k + j))
        end do
        v(dg(k + 1)) = tmp
-       k = k + blk_len        
+       k = k + blk_len
     end do
-       
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -167,9 +167,9 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_mul
-  
+
   !> Gather kernel for minimum of data
   !! \f$ v(dg(i)) = \min(v(dg(i)), u(gd(i))) \f$
   subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
@@ -193,9 +193,9 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
           tmp = min(tmp, u(gd(k + j)))
        end do
        v(dg(k + 1)) = tmp
-       k = k + blk_len        
+       k = k + blk_len
     end do
-       
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -206,7 +206,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_min
 
   !> Gather kernel for maximum of data
@@ -232,9 +232,9 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
           tmp = max(tmp, u(gd(k + j)))
        end do
        v(dg(k + 1)) = tmp
-       k = k + blk_len        
+       k = k + blk_len
     end do
-       
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -245,7 +245,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_max
 
   !> Scatter kernel  @todo Make the kernel abstract
@@ -261,7 +261,7 @@ subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event)
     integer, dimension(nb), intent(inout) :: b
     logical, intent(in) :: shrd
     type(c_ptr) :: event
-        
+
     call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
 
   end subroutine gs_scatter_cpu
@@ -278,7 +278,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b)
     integer, dimension(nb), intent(inout) :: b
     integer :: i, j, k, blk_len
     real(kind=rp) :: tmp
-    
+
     k = 0
     do i = 1, nb
        blk_len = b(i)
diff --git a/src/gs/bcknd/device/cuda/gs_kernels.h b/src/gs/bcknd/device/cuda/gs_kernels.h
index cf0d81af850..ad7539b6905 100644
--- a/src/gs/bcknd/device/cuda/gs_kernels.h
+++ b/src/gs/bcknd/device/cuda/gs_kernels.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __GS_GS_KERNELS__
+#define __GS_GS_KERNELS__
+
 /**
  * Device gather kernel for addition of data
  * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$
@@ -283,3 +286,5 @@ __global__ void gs_unpack_add_kernel(T * __restrict__ u,
     u[idx-1] += val;
   }
 }
+
+#endif // __GS_GS_KERNELS__
\ No newline at end of file
diff --git a/src/gs/bcknd/device/gs_device.F90 b/src/gs/bcknd/device/gs_device.F90
index f87fc0592ed..5f6ab705985 100644
--- a/src/gs/bcknd/device/gs_device.F90
+++ b/src/gs/bcknd/device/gs_device.F90
@@ -35,14 +35,14 @@ module gs_device
   use neko_config
   use num_types
   use gs_bcknd
-  use device    
+  use device
   use gs_ops
   use utils
   use, intrinsic :: iso_c_binding, only : c_ptr, c_int, C_NULL_PTR, &
                                           c_associated
   implicit none
   private
-  
+
   !> Gather-scatter backend for offloading devices
   type, public, extends(gs_bcknd_t) :: gs_device_t
      integer, allocatable :: local_blk_off(:)    !< Local block offset
@@ -57,7 +57,7 @@ module gs_device
      type(c_ptr) :: shared_blk_len_d = C_NULL_PTR!< Dev. ptr shared n-f blocks
      type(c_ptr) :: local_blk_off_d = C_NULL_PTR !< Dev. ptr local blk offset
      type(c_ptr) :: shared_blk_off_d = C_NULL_PTR!< Dev. ptr shared blk offset
-     integer :: nlocal              
+     integer :: nlocal
      integer :: nshared
      logical :: shared_on_host !< Shared points are handled on host
    contains
@@ -131,7 +131,7 @@ end subroutine opencl_scatter_kernel
 #endif
 
 contains
-  
+
   !> Accelerator backend initialisation
   subroutine gs_device_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
     class(gs_device_t), intent(inout) :: this
@@ -165,7 +165,7 @@ subroutine gs_device_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
     call device_event_create(this%scatter_event, 2)
 
     this%gs_stream = glb_cmd_queue
-    
+
   end subroutine gs_device_init
 
   !> Dummy backend deallocation
@@ -214,7 +214,7 @@ subroutine gs_device_free(this)
     if (c_associated(this%gather_event)) then
        call device_event_destroy(this%gather_event)
     end if
-    
+
     if (c_associated(this%scatter_event)) then
        call device_event_destroy(this%scatter_event)
     end if
@@ -222,7 +222,7 @@ subroutine gs_device_free(this)
     if (c_associated(this%gs_stream)) then
        this%gs_stream = C_NULL_PTR
     end if
-    
+
   end subroutine gs_device_free
 
   !> Gather kernel
@@ -243,7 +243,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
     type(c_ptr) :: u_d
 
     u_d = device_get_ptr(u)
-        
+
     if (.not. shrd) then
        associate(v_d=>this%local_gs_d, dg_d=>this%local_dof_gs_d, &
             gd_d=>this%local_gs_dof_d, b_d=>this%local_blk_len_d, &
@@ -256,17 +256,20 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
 
          if (.not. c_associated(dg_d)) then
             call device_map(dg, dg_d, m)
-            call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(gd_d)) then
             call device_map(gd, gd_d, m)
-            call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(b_d)) then
             call device_map(b, b_d, nb)
-            call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(bo_d)) then
@@ -275,9 +278,10 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
             do  i = 2, nb
                bo(i) = bo(i - 1) + b(i - 1)
             end do
-            call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
-         
+
 #ifdef HAVE_HIP
          call hip_gather_kernel(v_d, m, o, dg_d, u_d, n, gd_d, &
                                 nb, b_d, bo_d, op, strm)
@@ -290,7 +294,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
 #else
          call neko_error('No device backend configured')
 #endif
-         
+
        end associate
     else if (shrd) then
        associate(v_d=>this%shared_gs_d, dg_d=>this%shared_dof_gs_d, &
@@ -304,17 +308,20 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
 
          if (.not. c_associated(dg_d)) then
             call device_map(dg, dg_d, m)
-            call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(gd_d)) then
             call device_map(gd, gd_d, m)
-            call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(b_d)) then
             call device_map(b, b_d, nb)
-            call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
          if (.not. c_associated(bo_d)) then
@@ -323,11 +330,12 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
             do  i = 2, nb
                bo(i) = bo(i - 1) + b(i - 1)
             end do
-            call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
 
-         
-#ifdef HAVE_HIP   
+
+#ifdef HAVE_HIP
          call hip_gather_kernel(v_d, m, o, dg_d, u_d, n, gd_d, &
                                 nb, b_d, bo_d, op, strm)
 #elif HAVE_CUDA
@@ -340,11 +348,12 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
          call neko_error('No device backend configured')
 #endif
 
-         call device_event_record(this%gather_event, strm)         
-         
+         call device_event_record(this%gather_event, strm)
+
          if (this%shared_on_host) then
             if (this%nshared .eq. m) then
-               call device_memcpy(v, v_d, m, DEVICE_TO_HOST,strm=strm)
+               call device_memcpy(v, v_d, m, DEVICE_TO_HOST, &
+                                  sync=.true., strm=strm)
             end if
          end if
 
@@ -352,7 +361,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
     end if
 
   end subroutine gs_gather_device
- 
+
   !> Scatter kernel
   subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event)
     integer, intent(in) :: m
@@ -390,9 +399,10 @@ subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event)
             bo_d=>this%shared_blk_off_d, strm=>this%gs_stream)
 
          if (this%shared_on_host) then
-            call device_memcpy(v, v_d, m, HOST_TO_DEVICE, strm=strm)
+            call device_memcpy(v, v_d, m, HOST_TO_DEVICE, &
+                               sync=.false., strm=strm)
          end if
-         
+
 #ifdef HAVE_HIP
          call hip_scatter_kernel(v_d, m, dg_d, u_d, n, gd_d, nb, b_d, bo_d, strm)
 #elif HAVE_CUDA
@@ -407,8 +417,8 @@ subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event)
             call device_event_record(event, strm)
          else
             call device_sync(strm)
-         end if         
-         
+         end if
+
        end associate
     end if
 
diff --git a/src/gs/bcknd/device/gs_device_mpi.F90 b/src/gs/bcknd/device/gs_device_mpi.F90
index 2cb6320662b..e6cb4c55b58 100644
--- a/src/gs/bcknd/device/gs_device_mpi.F90
+++ b/src/gs/bcknd/device/gs_device_mpi.F90
@@ -65,7 +65,7 @@ module gs_device_mpi
      type(c_ptr), allocatable :: stream(:)
      type(c_ptr), allocatable :: event(:)
      integer :: nb_strtgy
-     type(c_ptr) :: send_event = C_NULL_PTR          
+     type(c_ptr) :: send_event = C_NULL_PTR
    contains
      procedure, pass(this) :: init => gs_device_mpi_init
      procedure, pass(this) :: free => gs_device_mpi_free
@@ -94,7 +94,7 @@ subroutine hip_gs_unpack(u_d, op, buf_d, dof_d, offset, n, stream) &
        type(c_ptr), value :: u_d, buf_d, dof_d, stream
      end subroutine hip_gs_unpack
   end interface
-#elif HAVE_CUDA  
+#elif HAVE_CUDA
   interface
      subroutine cuda_gs_pack(u_d, buf_d, dof_d, offset, n, stream) &
           bind(c, name='cuda_gs_pack')
@@ -230,28 +230,28 @@ subroutine gs_device_mpi_buf_init(this, pe_order, dof_stack, mark_dupes)
        ! %array() breaks on cray
        select type (arr => dof_stack(pe_order(i))%data)
        type is (integer)
-         do j = 1, this%ndofs(i)
-            k = this%offset(i) + j
-            if (mark_dupes) then
-               if (doftable%get(arr(j), dupe) .eq. 0) then
-                  if (dofs(dupe) .gt. 0) then
-                     dofs(dupe) = -dofs(dupe)
-                     marked = marked + 1
-                  end if
-                  dofs(k) = -arr(j)
-                  marked = marked + 1
-               else
-                  call doftable%set(arr(j), k)
-                  dofs(k) = arr(j)
-               end if
-            else
-               dofs(k) = arr(j)
-            end if
-         end do
+          do j = 1, this%ndofs(i)
+             k = this%offset(i) + j
+             if (mark_dupes) then
+                if (doftable%get(arr(j), dupe) .eq. 0) then
+                   if (dofs(dupe) .gt. 0) then
+                      dofs(dupe) = -dofs(dupe)
+                      marked = marked + 1
+                   end if
+                   dofs(k) = -arr(j)
+                   marked = marked + 1
+                else
+                   call doftable%set(arr(j), k)
+                   dofs(k) = arr(j)
+                end if
+             else
+                dofs(k) = arr(j)
+             end if
+          end do
        end select
     end do
 
-    call device_memcpy(dofs, this%dof_d, total, HOST_TO_DEVICE)
+    call device_memcpy(dofs, this%dof_d, total, HOST_TO_DEVICE, sync=.false.)
 
     deallocate(dofs)
     call doftable%free()
@@ -268,7 +268,7 @@ subroutine gs_device_mpi_buf_free(this)
 
     if (c_associated(this%buf_d)) call device_free(this%buf_d)
     if (c_associated(this%dof_d)) call device_free(this%dof_d)
-  end subroutine
+  end subroutine gs_device_mpi_buf_free
 
   !> Initialise MPI based communication method
   subroutine gs_device_mpi_init(this, send_pe, recv_pe)
@@ -319,7 +319,7 @@ subroutine gs_device_mpi_free(this)
        deallocate(this%stream)
     end if
 #endif
-    
+
   end subroutine gs_device_mpi_free
 
   !> Post non-blocking send operations
@@ -353,16 +353,16 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm)
 #endif
 
        call device_sync(strm)
-       
+
        do i = 1, size(this%send_pe)
           call device_mpi_isend(this%send_buf%buf_d, &
                                 rp*this%send_buf%offset(i), &
                                 rp*this%send_buf%ndofs(i), this%send_pe(i), &
                                 this%send_buf%reqs, i)
        end do
-       
+
     else
-       
+
        do i = 1, size(this%send_pe)
           call device_stream_wait_event(this%stream(i), deps, 0)
 #ifdef HAVE_HIP
@@ -383,7 +383,7 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm)
           call neko_error('gs_device_mpi: no backend')
 #endif
        end do
-   
+
        ! Consider adding a poll loop here once we have device_query in place
        do i = 1, size(this%send_pe)
           call device_sync(this%stream(i))
@@ -393,7 +393,7 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm)
                                 this%send_buf%reqs, i)
        end do
     end if
-    
+
   end subroutine gs_device_mpi_nbsend
 
   !> Post non-blocking receive operations
@@ -443,12 +443,12 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm)
 
        ! Syncing here seems to prevent some race condition
        call device_sync(strm)
-       
+
     else
 
        do while(device_mpi_waitany(size(this%recv_pe), &
                                   this%recv_buf%reqs, done_req) .ne. 0)
-       
+
 #ifdef HAVE_HIP
           call hip_gs_unpack(u_d, op, &
                              this%recv_buf%buf_d, &
@@ -456,7 +456,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm)
                              this%recv_buf%offset(done_req), &
                              this%recv_buf%ndofs(done_req), &
                              this%stream(done_req))
-#elif HAVE_CUDA    
+#elif HAVE_CUDA
           call cuda_gs_unpack(u_d, op, &
                               this%recv_buf%buf_d, &
                               this%recv_buf%dof_d, &
@@ -468,7 +468,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm)
 #endif
           call device_event_record(this%event(done_req), this%stream(done_req))
        end do
-    
+
        call device_mpi_waitall(size(this%send_pe), this%send_buf%reqs)
 
        ! Sync non-blocking streams
@@ -476,7 +476,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm)
           call device_stream_wait_event(strm, &
                this%event(done_req), 0)
        end do
-       
+
     end if
 
   end subroutine gs_device_mpi_nbwait
diff --git a/src/gs/bcknd/device/hip/gs_kernels.h b/src/gs/bcknd/device/hip/gs_kernels.h
index 0825e4e1c85..2858171a48b 100644
--- a/src/gs/bcknd/device/hip/gs_kernels.h
+++ b/src/gs/bcknd/device/hip/gs_kernels.h
@@ -32,6 +32,9 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+#ifndef __GS_GS_KERNELS__
+#define __GS_GS_KERNELS__
+
 /**
  * Device gather kernel for addition of data
  * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$
@@ -74,7 +77,7 @@ __global__ void gather_kernel_add(T * __restrict__ v,
       }
     }
   }
-  
+
 }
 
 /**
@@ -91,7 +94,7 @@ __global__ void gather_kernel_mul(T * __restrict__ v,
                                   const int * __restrict__ gd,
                                   const int nb,
                                   const int * __restrict__ b,
-                                  const int * __restrict__ bo) { 
+                                  const int * __restrict__ bo) {
 
   const int idx = blockIdx.x * blockDim.x + threadIdx.x;
   const int str = blockDim.x * gridDim.x;
@@ -136,7 +139,7 @@ __global__ void gather_kernel_min(T * __restrict__ v,
                                   const int * __restrict__ gd,
                                   const int nb,
                                   const int * __restrict__ b,
-                                  const int * __restrict__ bo) { 
+                                  const int * __restrict__ bo) {
 
   const int idx = blockIdx.x * blockDim.x + threadIdx.x;
   const int str = blockDim.x * gridDim.x;
@@ -164,7 +167,7 @@ __global__ void gather_kernel_min(T * __restrict__ v,
       }
     }
   }
-  
+
 }
 
 /**
@@ -181,7 +184,7 @@ __global__ void gather_kernel_max(T * __restrict__ v,
                                   const int * __restrict__ gd,
                                   const int nb,
                                   const int * __restrict__ b,
-                                  const int * __restrict__ bo) { 
+                                  const int * __restrict__ bo) {
 
   const int idx = blockIdx.x * blockDim.x + threadIdx.x;
   const int str = blockDim.x * gridDim.x;
@@ -195,7 +198,7 @@ __global__ void gather_kernel_max(T * __restrict__ v,
     }
     v[dg[k] - 1] = tmp;
   }
-  
+
   if (o < 0) {
     for (int i = ((abs(o) - 1) + idx); i < m ; i += str) {
       v[dg[i] - 1] = u[gd[i] - 1];
@@ -209,7 +212,7 @@ __global__ void gather_kernel_max(T * __restrict__ v,
       }
     }
   }
-  
+
 }
 
 /**
@@ -226,25 +229,25 @@ __global__ void scatter_kernel(T * __restrict__ v,
                                const int nb,
                                const int *__restrict__ b,
                                const int *__restrict__ bo) {
-  
+
   const int idx = blockIdx.x * blockDim.x + threadIdx.x;
   const int str = blockDim.x * gridDim.x;
-  
+
   for (int i = idx; i < nb; i += str) {
     const int blk_len = b[i];
     const int k = bo[i];
     T tmp = v[dg[k] - 1];
     for (int j = 0; j < blk_len; j++) {
       u[gd[k + j] - 1] = tmp;
-    }      
+    }
   }
 
   const int facet_offset = bo[nb - 1] + b[nb - 1];
-  
+
   for (int i = ((facet_offset - 1) + idx); i < m; i += str) {
     u[gd[i] - 1] = v[dg[i] - 1];
   }
-  
+
 }
 
 template< typename T >
@@ -281,3 +284,5 @@ __global__ void gs_unpack_add_kernel(T * __restrict__ u,
     u[idx-1] += val;
   }
 }
+
+#endif // __GS_GS_KERNELS__
\ No newline at end of file
diff --git a/src/gs/bcknd/device/opencl/gs_kernels.cl b/src/gs/bcknd/device/opencl/gs_kernels.cl
index 9d978821f25..334b2922dc5 100644
--- a/src/gs/bcknd/device/opencl/gs_kernels.cl
+++ b/src/gs/bcknd/device/opencl/gs_kernels.cl
@@ -32,6 +32,10 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+
+#ifndef __GS_GS_KERNELS__
+#define __GS_GS_KERNELS__
+
 /**
  * Device gather kernel for addition of data
  * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$
@@ -241,3 +245,5 @@ __kernel void scatter_kernel(__global real * __restrict__ v,
   }
   
 }
+
+#endif // __GS_GS_KERNELS__
\ No newline at end of file
diff --git a/src/gs/bcknd/sx/gs_sx.f90 b/src/gs/bcknd/sx/gs_sx.f90
index 58d68215d7d..3f5535d5818 100644
--- a/src/gs/bcknd/sx/gs_sx.f90
+++ b/src/gs/bcknd/sx/gs_sx.f90
@@ -51,9 +51,9 @@ module gs_sx
      procedure, pass(this) :: gather => gs_gather_sx
      procedure, pass(this) :: scatter => gs_scatter_sx
   end type gs_sx_t
-  
+
 contains
-  
+
   !> SX backend initialisation
   subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
     class(gs_sx_t), intent(inout) :: this
@@ -61,7 +61,7 @@ subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
     integer, intent(in) :: nshared
     integer, intent(in) :: nlcl_blks
     integer, intent(in) :: nshrd_blks
-       
+
     call this%free()
 
     this%nlocal = nlocal
@@ -69,7 +69,7 @@ subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks)
 
     allocate(this%local_wrk(nlocal))
     allocate(this%shared_wrk(nshared))
-    
+
   end subroutine gs_sx_init
 
   !> SX backend deallocation
@@ -86,7 +86,7 @@ subroutine gs_sx_free(this)
 
     this%nlocal = 0
     this%nshared = 0
-    
+
   end subroutine gs_sx_free
 
   !> Gather kernel
@@ -131,9 +131,9 @@ subroutine gs_gather_sx(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
          end select
        end associate
     end if
-    
+
   end subroutine gs_gather_sx
- 
+
   !> Gather kernel for addition of data
   !! \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$
   subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w)
@@ -150,7 +150,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w)
     integer :: i
     real(kind=rp) :: tmp
 
-    v = 0d0 
+    v = 0d0
     do i = 1, abs(o) - 1
        w(i) = u(gd(i))
     end do
@@ -158,7 +158,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w)
     do i = 1, abs(o) - 1
        v(dg(i)) = v(dg(i)) + w(i)
     end do
-    
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -169,7 +169,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_add
 
   !> Gather kernel for multiplication of data
@@ -187,7 +187,7 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w)
     integer, intent(in) :: o
     integer :: i
     real(kind=rp) :: tmp
-    
+
     do i = 1, abs(o) - 1
        w(i) = u(gd(i))
     end do
@@ -195,7 +195,7 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w)
     do i = 1, abs(o) - 1
        v(dg(i)) = v(dg(i)) * w(i)
     end do
-    
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -206,9 +206,9 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_mul
-  
+
   !> Gather kernel for minimum of data
   !! \f$ v(dg(i)) = \min(v(dg(i)), u(gd(i))) \f$
   subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w)
@@ -232,7 +232,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w)
     do i = 1, abs(o) - 1
        v(dg(i)) = min(v(dg(i)), w(i))
     end do
-    
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -243,7 +243,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_min
 
   !> Gather kernel for maximum of data
@@ -269,7 +269,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, w)
     do i = 1, abs(o) - 1
        v(dg(i)) = max(v(dg(i)), w(i))
     end do
-    
+
     if (o .lt. 0) then
        do i = abs(o), m
           v(dg(i)) = u(gd(i))
@@ -280,7 +280,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, w)
           v(dg(i)) = tmp
        end do
     end if
-    
+
   end subroutine gs_gather_kernel_max
 
   !> Scatter kernel  @todo Make the kernel abstract
@@ -296,7 +296,7 @@ subroutine gs_scatter_sx(this, v, m, dg, u, n, gd, nb, b, shrd, event)
     integer, dimension(nb), intent(inout) :: b
     logical, intent(in) :: shrd
     type(c_ptr) :: event
-        
+
     if (.not. shrd) then
        call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, this%local_wrk)
     else if (shrd) then
@@ -317,7 +317,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, w)
     integer, dimension(m), intent(inout) :: gd
     integer, dimension(nb), intent(inout) :: b
     integer :: i
-    
+
     !NEC$ IVDEP
     do i = 1, m
        w(i) = v(dg(i))
@@ -327,7 +327,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, w)
     do i = 1, m
        u(gd(i)) = w(i)
     end do
-    
+
   end subroutine gs_scatter_kernel
 
 end module gs_sx
diff --git a/src/gs/gather_scatter.f90 b/src/gs/gather_scatter.f90
index 6b8473a994e..5665d55c830 100644
--- a/src/gs/gather_scatter.f90
+++ b/src/gs/gather_scatter.f90
@@ -84,7 +84,7 @@ module gather_scatter
   end type gs_t
 
   public :: GS_OP_ADD, GS_OP_MUL, GS_OP_MIN, GS_OP_MAX
-  
+
 contains
 
   !> Initialize a gather-scatter kernel
@@ -107,9 +107,9 @@ subroutine gs_init(gs, dofmap, bcknd)
     call gs%free()
 
     call neko_log%section('Gather-Scatter')
-    
+
     gs%dofmap => dofmap
-    
+
     ! Here one could use some heuristic or autotuning to select comm method,
     ! such as only using device MPI when there is enough data.
     !use_device_mpi = NEKO_DEVICE_MPI .and. gs%nshared .gt. 20000
@@ -130,17 +130,17 @@ subroutine gs_init(gs, dofmap, bcknd)
 
     glb_nlocal = int(gs%nlocal, i8)
     glb_nshared = int(gs%nshared, i8)
-    
+
     if (pe_rank .eq. 0) then
        call MPI_Reduce(MPI_IN_PLACE, glb_nlocal, 1, &
             MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr)
-       
+
        call MPI_Reduce(MPI_IN_PLACE, glb_nshared, 1, &
             MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr)
     else
        call MPI_Reduce(glb_nlocal, glb_nlocal, 1, &
             MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr)
-       
+
        call MPI_Reduce(glb_nshared, glb_nshared, 1, &
             MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr)
     end if
@@ -149,7 +149,7 @@ subroutine gs_init(gs, dofmap, bcknd)
     call neko_log%message(log_buf)
     write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size
     call neko_log%message(log_buf)
-    
+
     if (present(bcknd)) then
        bcknd_ = bcknd
     else
@@ -185,7 +185,7 @@ subroutine gs_init(gs, dofmap, bcknd)
 
     write(log_buf, '(A)') 'Backend      : ' // trim(bcknd_str)
     call neko_log%message(log_buf)
-    
+
 
 
     call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks)
@@ -201,14 +201,15 @@ subroutine gs_init(gs, dofmap, bcknd)
           select type(c => gs%comm)
           type is (gs_device_mpi_t)
              call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, env_len)
-             if (env_len .eq. 0) then             
+             if (env_len .eq. 0) then
                 allocate(tmp(dofmap%size()))
                 call device_map(tmp, tmp_d, dofmap%size())
                 tmp = 1.0_rp
-                call device_memcpy(tmp, tmp_d, dofmap%size(), HOST_TO_DEVICE)
+                call device_memcpy(tmp, tmp_d, dofmap%size(), &
+                                   HOST_TO_DEVICE, sync=.false.)
                 call gs_op_vector(gs, tmp, dofmap%size(), GS_OP_ADD)
-                   
-                do i = 1, size(strtgy)          
+
+                do i = 1, size(strtgy)
                    c%nb_strtgy = strtgy(i)
                    call device_sync
                    call MPI_Barrier(NEKO_COMM)
@@ -218,13 +219,13 @@ subroutine gs_init(gs, dofmap, bcknd)
                    end do
                    strtgy_time(i) = (MPI_Wtime() - strtgy_time(i)) / 100d0
                 end do
-                
+
                 call device_deassociate(tmp)
                 call device_free(tmp_d)
                 deallocate(tmp)
-                
+
                 c%nb_strtgy = strtgy(minloc(strtgy_time, 1))
-             
+
                 avg_strtgy = minloc(strtgy_time, 1)
                 call MPI_Allreduce(MPI_IN_PLACE, avg_strtgy, 1, &
                                    MPI_INTEGER, MPI_SUM, NEKO_COMM)
@@ -239,22 +240,22 @@ subroutine gs_init(gs, dofmap, bcknd)
                 if (i .lt. 1 .or. i .gt. 4) then
                    call neko_error('Invalid gs sync strtgy')
                 end if
-                
+
                 c%nb_strtgy = strtgy(i)
                 avg_strtgy = i
 
                 write(log_buf, '(A,B0.2,A)') 'Env. strtgy  :         [', &
                      strtgy(avg_strtgy),']'
              end if
-             
-             call neko_log%message(log_buf)             
+
+             call neko_log%message(log_buf)
 
           end select
        end if
     end if
 
     call neko_log%end_section()
-    
+
   end subroutine gs_init
 
   !> Deallocate a gather-scatter kernel
@@ -266,7 +267,7 @@ subroutine gs_free(gs)
     if (allocated(gs%local_gs)) then
        deallocate(gs%local_gs)
     end if
-    
+
     if (allocated(gs%local_dof_gs)) then
        deallocate(gs%local_dof_gs)
     end if
@@ -282,7 +283,7 @@ subroutine gs_free(gs)
     if (allocated(gs%shared_gs)) then
        deallocate(gs%shared_gs)
     end if
-    
+
     if (allocated(gs%shared_dof_gs)) then
        deallocate(gs%shared_dof_gs)
     end if
@@ -311,7 +312,7 @@ subroutine gs_free(gs)
        call gs%comm%free()
        deallocate(gs%comm)
     end if
-    
+
   end subroutine gs_free
 
   !> Setup mapping of dofs to gather-scatter operations
@@ -339,24 +340,24 @@ subroutine gs_init_mapping(gs)
     !>@note this might be a bit overkill,
     !!but having many collisions makes the init take too long.
     call sdm%init(dofmap%size(), i)
-    
+
 
     call local_dof%init()
     call dof_local%init()
 
     call local_face_dof%init()
-    call face_dof_local%init()   
-    
+    call face_dof_local%init()
+
     call shared_dof%init()
     call dof_shared%init()
-    
+
     call shared_face_dof%init()
-    call face_dof_shared%init()   
+    call face_dof_shared%init()
 
     !
     ! Setup mapping for dofs points
     !
-    
+
     max_id = 0
     max_sid = 0
     do i = 1, msh%nelv
@@ -496,7 +497,7 @@ subroutine gs_init_mapping(gs)
                 id = linear_index(j, ly, 1, i, lx, ly, lz)
                 call dof_shared%push(id)
              end do
-             
+
           else
              do j = 2, lx - 1
                 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
@@ -595,7 +596,7 @@ subroutine gs_init_mapping(gs)
                 id = linear_index(1, 1, l, i, lx, ly, lz)
                 call dof_shared%push(id)
              end do
-          else          
+          else
              do l = 2, lz - 1
                 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id)
                 call local_dof%push(id)
@@ -603,7 +604,7 @@ subroutine gs_init_mapping(gs)
                 call dof_local%push(id)
              end do
           end if
-       
+
           if (dofmap%shared_dof(lx, 1, 2, i)) then
              do l = 2, lz - 1
                 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid)
@@ -635,14 +636,14 @@ subroutine gs_init_mapping(gs)
                 call dof_local%push(id)
              end do
           end if
-          
+
           if (dofmap%shared_dof(lx, ly, 2, i)) then
              do l = 2, lz - 1
                 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid)
                 call shared_dof%push(id)
                 id = linear_index(lx, ly, l, i, lx, ly, lz)
                 call dof_shared%push(id)
-             end do       
+             end do
           else
              do l = 2, lz - 1
                 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id)
@@ -688,7 +689,7 @@ subroutine gs_init_mapping(gs)
                    id = linear_index(j, ly, 1, i, lx, ly, lz)
                    call face_dof_shared%push(id)
                 end do
-                
+
              else
                 do j = 2, lx - 1
                    id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id)
@@ -738,7 +739,7 @@ subroutine gs_init_mapping(gs)
              end if
           end if
        end do
-    else 
+    else
        do i = 1, msh%nelv
 
           ! Facets in x-direction (s, t)-plane
@@ -785,7 +786,7 @@ subroutine gs_init_mapping(gs)
                 end do
              end if
           end if
-             
+
           ! Facets in y-direction (r, t)-plane
           if (msh%facet_neigh(3, i) .ne. 0) then
              if (dofmap%shared_dof(2, 1, 2, i)) then
@@ -830,7 +831,7 @@ subroutine gs_init_mapping(gs)
                 end do
              end if
           end if
-          
+
           ! Facets in z-direction (r, s)-plane
           if (msh%facet_neigh(5, i) .ne. 0) then
              if (dofmap%shared_dof(2, 2, 1, i)) then
@@ -877,13 +878,13 @@ subroutine gs_init_mapping(gs)
           end if
        end do
     end if
-       
+
 
     call dm%free()
-    
+
     gs%nlocal = local_dof%size() + local_face_dof%size()
     gs%local_facet_offset = local_dof%size() + 1
-    
+
     ! Finalize local dof to gather-scatter index
     allocate(gs%local_dof_gs(gs%nlocal))
 
@@ -941,15 +942,15 @@ subroutine gs_init_mapping(gs)
        end do
     end select
     call face_dof_local%free()
-       
+
     call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, &
          gs%nlocal, 1, gs%nlocal)
-    
+
     call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, &
          gs%nlocal_blks, gs%nlocal, gs%local_facet_offset)
-    
+
     ! Allocate buffer for local gs-ops
-    allocate(gs%local_gs(gs%nlocal))   
+    allocate(gs%local_gs(gs%nlocal))
 
     gs%nshared = shared_dof%size() + shared_face_dof%size()
     gs%shared_facet_offset = shared_dof%size() + 1
@@ -983,11 +984,11 @@ subroutine gs_init_mapping(gs)
        end do
     end select
     call shared_face_dof%free()
-    
+
     ! Finalize shared gather-scatter index to dof
     allocate(gs%shared_gs_dof(gs%nshared))
 
-    ! Add dofs on points and edges 
+    ! Add dofs on points and edges
 
     ! We should use the %array() procedure, which works great for
     ! GNU, Intel and NEC, but it breaks horribly on Cray when using
@@ -1022,9 +1023,9 @@ subroutine gs_init_mapping(gs)
        call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, &
             gs%nshared_blks, gs%nshared, gs%shared_facet_offset)
     end if
-    
+
   contains
-    
+
     !> Register a unique dof
     function gs_mapping_add_dof(map_, dof, max_id) result(id)
       type(htable_i8_t), intent(inout) :: map_
@@ -1037,7 +1038,7 @@ function gs_mapping_add_dof(map_, dof, max_id) result(id)
          call map_%set(dof, max_id)
          id = max_id
       end if
-      
+
     end function gs_mapping_add_dof
 
     !> Sort the dof lists based on the dof to gather-scatter list
@@ -1052,16 +1053,16 @@ recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
       j = hi + 1
       pivot = dg((lo + hi) / 2)
       do
-         do 
+         do
             i = i + 1
             if (dg(i) .ge. pivot) exit
          end do
-         
-         do 
+
+         do
             j = j - 1
             if (dg(j) .le. pivot) exit
          end do
-         
+
          if (i .lt. j) then
             tmp = dg(i)
             dg(i) = dg(j)
@@ -1076,10 +1077,10 @@ recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi)
          else
             exit
          end if
-      end do      
+      end do
       if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j)
       if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi)
-      
+
     end subroutine gs_qsort_dofmap
 
     !> Find blocks sharing dofs in non-facet data
@@ -1092,7 +1093,7 @@ subroutine gs_find_blks(dg, blk_len, nblks, n, m)
       integer :: i, j
       integer :: id, count
       type(stack_i4_t), target :: blks
-      
+
       call blks%init()
       i = 1
       do while( i .lt. m)
@@ -1116,7 +1117,7 @@ subroutine gs_find_blks(dg, blk_len, nblks, n, m)
          end do
       end select
       call blks%free()
-      
+
     end subroutine gs_find_blks
 
   end subroutine gs_init_mapping
@@ -1135,7 +1136,7 @@ subroutine gs_schedule(gs)
     integer :: nshared_unique
 
     nshared_unique = gs%shared_dofs%num_entries()
-    
+
     call it%init(gs%shared_dofs)
     allocate(send_buf(nshared_unique))
     i = 1
@@ -1146,7 +1147,7 @@ subroutine gs_schedule(gs)
 
     call send_pe%init()
     call recv_pe%init()
-    
+
 
     !
     ! Schedule exchange of shared dofs
@@ -1163,7 +1164,7 @@ subroutine gs_schedule(gs)
     do i = 1, size(gs%dofmap%msh%neigh_order)
        src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size)
        dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size)
-       
+
        if (gs%dofmap%msh%neigh(src)) then
           call MPI_Irecv(recv_buf, max_recv, MPI_INTEGER8, &
                src, 0, NEKO_COMM, recv_req, ierr)
@@ -1177,7 +1178,7 @@ subroutine gs_schedule(gs)
        if (gs%dofmap%msh%neigh(src)) then
           call MPI_Wait(recv_req, status, ierr)
           call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr)
-          
+
           do j = 1, n_recv
              shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id)
              if (shared_flg(j) .eq. 0) then
@@ -1185,7 +1186,7 @@ subroutine gs_schedule(gs)
                 call gs%comm%recv_dof(src)%push(shared_gs_id)
              end if
           end do
-          
+
           if (gs%comm%recv_dof(src)%size() .gt. 0) then
              call recv_pe%push(src)
           end if
@@ -1194,7 +1195,7 @@ subroutine gs_schedule(gs)
        if (gs%dofmap%msh%neigh(dst)) then
           call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr)
           call MPI_Irecv(recv_flg, max_recv, MPI_INTEGER2, &
-               dst, 0, NEKO_COMM, recv_req, ierr)          
+               dst, 0, NEKO_COMM, recv_req, ierr)
        end if
 
        if (gs%dofmap%msh%neigh(src)) then
@@ -1205,10 +1206,10 @@ subroutine gs_schedule(gs)
        if (gs%dofmap%msh%neigh(dst)) then
           call MPI_Wait(recv_req, status, ierr)
           call MPI_Get_count(status, MPI_INTEGER2, n_recv, ierr)
-       
+
           do j = 1, n_recv
              if (recv_flg(j) .eq. 0) then
-                tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id) 
+                tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id)
                 !> @todo don't touch others data...
                 call gs%comm%send_dof(dst)%push(shared_gs_id)
              end if
@@ -1222,18 +1223,18 @@ subroutine gs_schedule(gs)
        if (gs%dofmap%msh%neigh(src)) then
           call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr)
        end if
-       
+
     end do
 
     call gs%comm%init(send_pe, recv_pe)
-    
+
     call send_pe%free()
     call recv_pe%free()
 
     deallocate(send_buf)
     deallocate(recv_flg)
     deallocate(shared_flg)
-    !This arrays seems to take massive amounts of memory... 
+    !This arrays seems to take massive amounts of memory...
     call gs%shared_dofs%free()
 
   end subroutine gs_schedule
@@ -1244,21 +1245,21 @@ subroutine gs_op_fld(gs, u, op, event)
     type(field_t), intent(inout) :: u
     type(c_ptr), optional, intent(inout) :: event
     integer :: n, op
-    
+
     n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz
     if (present(event)) then
        call gs_op_vector(gs, u%x, n, op, event)
     else
        call gs_op_vector(gs, u%x, n, op)
     end if
-    
+
   end subroutine gs_op_fld
-  
+
   !> Gather-scatter operation on a rank 4 array
   subroutine gs_op_r4(gs, u, n, op, event)
     class(gs_t), intent(inout) :: gs
     integer, intent(in) :: n
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: u
+    real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u
     type(c_ptr), optional, intent(inout) :: event
     integer :: op
 
@@ -1267,9 +1268,9 @@ subroutine gs_op_r4(gs, u, n, op, event)
     else
        call gs_op_vector(gs, u, n, op)
     end if
-    
+
   end subroutine gs_op_r4
-  
+
   !> Gather-scatter operation on a vector @a u with op @a op
   subroutine gs_op_vector(gs, u, n, op, event)
     class(gs_t), intent(inout) :: gs
@@ -1277,31 +1278,31 @@ subroutine gs_op_vector(gs, u, n, op, event)
     real(kind=rp), dimension(n), intent(inout) :: u
     type(c_ptr), optional, intent(inout) :: event
     integer :: m, l, op, lo, so
-    
+
     lo = gs%local_facet_offset
     so = -gs%shared_facet_offset
     m = gs%nlocal
     l = gs%nshared
 
-    call profiler_start_region("gather-scatter")
+    call profiler_start_region("gather-scatter", 5)
     ! Gather shared dofs
     if (pe_size .gt. 1) then
-       call profiler_start_region("gs_nbrecv")
+       call profiler_start_region("gs_nbrecv", 13)
        call gs%comm%nbrecv()
        call profiler_end_region
-       call profiler_start_region("gs_gather_shared")
+       call profiler_start_region("gs_gather_shared", 14)
        call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, &
             gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.)
        call profiler_end_region
-       call profiler_start_region("gs_nbsend")
+       call profiler_start_region("gs_nbsend", 6)
        call gs%comm%nbsend(gs%shared_gs, l, &
             gs%bcknd%gather_event, gs%bcknd%gs_stream)
        call profiler_end_region
-       
+
     end if
-    
+
     ! Gather-scatter local dofs
-    call profiler_start_region("gs_local")
+    call profiler_start_region("gs_local", 12)
     call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, &
          gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.)
     call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, &
@@ -1309,10 +1310,10 @@ subroutine gs_op_vector(gs, u, n, op, event)
     call profiler_end_region
     ! Scatter shared dofs
     if (pe_size .gt. 1) then
-       call profiler_start_region("gs_nbwait")
+       call profiler_start_region("gs_nbwait", 7)
        call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream)
        call profiler_end_region
-       call profiler_start_region("gs_scatter_shared")
+       call profiler_start_region("gs_scatter_shared", 15)
        if (present(event)) then
           call gs%bcknd%scatter(gs%shared_gs, l,&
                                 gs%shared_dof_gs, u, n, &
@@ -1328,7 +1329,7 @@ subroutine gs_op_vector(gs, u, n, op, event)
     end if
 
     call profiler_end_region
-    
+
   end subroutine gs_op_vector
-  
+
 end module gather_scatter
diff --git a/src/gs/gs_bcknd.f90 b/src/gs/gs_bcknd.f90
index 8d65a6a2dbe..efad3707b0e 100644
--- a/src/gs/gs_bcknd.f90
+++ b/src/gs/gs_bcknd.f90
@@ -39,7 +39,7 @@ module gs_bcknd
 
   integer, public, parameter :: GS_BCKND_CPU = 1, GS_BCKND_SX = 2, &
        GS_BCKND_DEV = 3
-  
+
   !> Gather-scatter backend
   type, public, abstract :: gs_bcknd_t
      type(c_ptr) :: gather_event = C_NULL_PTR
@@ -76,7 +76,7 @@ end subroutine gs_backend_free
   !! \f$ v(dg(i)) = op(v(dg(i)), u(gd(i)) \f$
   abstract interface
      subroutine gs_gather(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
-       import gs_bcknd_t       
+       import gs_bcknd_t
        import rp
        integer, intent(in) :: m
        integer, intent(in) :: n
@@ -89,10 +89,10 @@ subroutine gs_gather(this, v, m, o, dg, u, n, gd, nb, b, op, shrd)
        integer, dimension(nb), intent(inout) :: b
        integer, intent(in) :: o
        integer, intent(in) :: op
-       logical, intent(in) :: shrd    
+       logical, intent(in) :: shrd
      end subroutine gs_gather
   end interface
-  
+
   !> Abstract interface for the Scatter kernel
   !! \f$ u(gd(i) = v(dg(i)) \f$
   abstract interface
@@ -103,7 +103,7 @@ subroutine gs_scatter(this, v, m, dg, u, n, gd, nb, b, shrd, event)
        integer, intent(in) :: m
        integer, intent(in) :: n
        integer, intent(in) :: nb
-       class(gs_bcknd_t), intent(inout) :: this              
+       class(gs_bcknd_t), intent(inout) :: this
        real(kind=rp), dimension(m), intent(inout) :: v
        integer, dimension(m), intent(inout) :: dg
        real(kind=rp), dimension(n), intent(inout) :: u
@@ -114,5 +114,5 @@ subroutine gs_scatter(this, v, m, dg, u, n, gd, nb, b, shrd, event)
      end subroutine gs_scatter
   end interface
 
-  
+
 end module gs_bcknd
diff --git a/src/gs/gs_comm.f90 b/src/gs/gs_comm.f90
index 398c78ac423..f3c573e823e 100644
--- a/src/gs/gs_comm.f90
+++ b/src/gs/gs_comm.f90
@@ -35,7 +35,7 @@ module gs_comm
   use num_types, only : rp
   use comm, only : pe_size
   use stack, only : stack_i4_t
-  use, intrinsic :: iso_c_binding 
+  use, intrinsic :: iso_c_binding
   implicit none
   private
 
@@ -63,10 +63,10 @@ module gs_comm
   abstract interface
      subroutine gs_comm_init(this, send_pe, recv_pe)
        import gs_comm_t
-       import stack_i4_t       
+       import stack_i4_t
        class(gs_comm_t), intent(inout) :: this
        type(stack_i4_t), intent(inout) :: send_pe
-       type(stack_i4_t), intent(inout) :: recv_pe       
+       type(stack_i4_t), intent(inout) :: recv_pe
      end subroutine gs_comm_init
   end interface
 
@@ -132,7 +132,7 @@ subroutine init_dofs(this)
        call this%send_dof(i)%init()
        call this%recv_dof(i)%init()
     end do
-    
+
   end subroutine init_dofs
 
   subroutine free_dofs(this)
@@ -152,7 +152,7 @@ subroutine free_dofs(this)
        end do
        deallocate(this%recv_dof)
     end if
-    
+
   end subroutine free_dofs
 
   subroutine init_order(this, send_pe, recv_pe)
@@ -161,7 +161,7 @@ subroutine init_order(this, send_pe, recv_pe)
     type(stack_i4_t), intent(inout) :: recv_pe
     integer, pointer :: sp(:)
     integer :: i
-    
+
     allocate(this%send_pe(send_pe%size()))
 
     sp => send_pe%array()
@@ -175,7 +175,7 @@ subroutine init_order(this, send_pe, recv_pe)
     do i = 1, recv_pe%size()
        this%recv_pe(i) = sp(i)
     end do
-    
+
   end subroutine init_order
 
   subroutine free_order(this)
@@ -188,7 +188,7 @@ subroutine free_order(this)
     if (allocated(this%recv_pe)) then
        deallocate(this%recv_pe)
     end if
-    
+
   end subroutine free_order
-     
+
 end module gs_comm
diff --git a/src/gs/gs_mpi.f90 b/src/gs/gs_mpi.f90
index c4e4c84e485..a6e358be1ab 100644
--- a/src/gs/gs_mpi.f90
+++ b/src/gs/gs_mpi.f90
@@ -30,7 +30,7 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Defines MPI gather-scatter communication 
+!> Defines MPI gather-scatter communication
 module gs_mpi
   use neko_config
   use num_types
@@ -38,18 +38,18 @@ module gs_mpi
   use gs_ops
   use stack, only : stack_i4_t
   use comm
-  use, intrinsic :: iso_c_binding 
+  use, intrinsic :: iso_c_binding
   !$ use omp_lib
   implicit none
   private
-  
+
   !> MPI buffer for non-blocking operations
   type, private :: gs_comm_mpi_t
      type(MPI_Status) :: status
      type(MPI_Request) :: request
      logical :: flag
      real(kind=rp), allocatable :: data(:)
-  end type  gs_comm_mpi_t
+  end type gs_comm_mpi_t
 
   !> Gather-scatter communication using MPI
   type, public, extends(gs_comm_t) :: gs_mpi_t
@@ -69,10 +69,10 @@ module gs_mpi
   subroutine gs_mpi_init(this, send_pe, recv_pe)
     class(gs_mpi_t), intent(inout) :: this
     type(stack_i4_t), intent(inout) :: send_pe
-    type(stack_i4_t), intent(inout) :: recv_pe       
+    type(stack_i4_t), intent(inout) :: recv_pe
     integer, pointer :: sp(:), rp(:)
     integer :: i
-    
+
     call this%init_order(send_pe, recv_pe)
 
     allocate(this%send_buf(send_pe%size()))
@@ -116,7 +116,7 @@ subroutine gs_mpi_free(this)
 
     call this%free_order()
     call this%free_dofs()
-    
+
   end subroutine gs_mpi_free
 
   !> Post non-blocking send operations
@@ -130,8 +130,8 @@ subroutine gs_nbsend_mpi(this, u, n, deps, strm)
     integer , pointer :: sp(:)
 
     thrdid = 0
-    !$ thrdid = omp_get_thread_num()
-    
+!$  thrdid = omp_get_thread_num()
+
     do i = 1, size(this%send_pe)
        dst = this%send_pe(i)
        sp => this%send_dof(dst)%array()
@@ -157,7 +157,7 @@ subroutine gs_nbrecv_mpi(this)
 
     thrdid = 0
     !$ thrdid = omp_get_thread_num()
-    
+
     do i = 1, size(this%recv_pe)
        ! We should not need this extra associate block, ant it works
        ! great without it for GNU, Intel, NEC and Cray, but throws an
@@ -169,13 +169,13 @@ subroutine gs_nbrecv_mpi(this)
        end associate
        this%recv_buf(i)%flag = .false.
     end do
-    
+
   end subroutine gs_nbrecv_mpi
 
   !> Wait for non-blocking operations
   subroutine gs_nbwait_mpi(this, u, n, op, strm)
     class(gs_mpi_t), intent(inout) :: this
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: u
     type(c_ptr), intent(inout) :: strm
     integer :: i, j, src, ierr
@@ -185,7 +185,7 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm)
 
     nreqs = size(this%recv_pe)
 
-    do while (nreqs .gt. 0) 
+    do while (nreqs .gt. 0)
        do i = 1, size(this%recv_pe)
           if (.not. this%recv_buf(i)%flag) then
              call MPI_Test(this%recv_buf(i)%request, this%recv_buf(i)%flag, &
@@ -223,7 +223,7 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm)
     end do
 
     nreqs = size(this%send_pe)
-    do while (nreqs .gt. 0) 
+    do while (nreqs .gt. 0)
        do i = 1, size(this%send_pe)
           if (.not. this%send_buf(i)%flag) then
              call MPI_Test(this%send_buf(i)%request, this%send_buf(i)%flag, &
@@ -234,5 +234,5 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm)
     end do
 
   end subroutine gs_nbwait_mpi
-  
+
 end module gs_mpi
diff --git a/src/gs/gs_ops.f90 b/src/gs/gs_ops.f90
index 1b5afa6a093..c7afde7c5dc 100644
--- a/src/gs/gs_ops.f90
+++ b/src/gs/gs_ops.f90
@@ -35,5 +35,5 @@ module gs_ops
 
   integer, public, parameter :: GS_OP_ADD = 1, GS_OP_MUL = 2, &
        GS_OP_MIN = 3, GS_OP_MAX = 4
-      
+
 end module gs_ops
diff --git a/src/io/chkp_file.f90 b/src/io/chkp_file.f90
index a45c535244b..0cf37306457 100644
--- a/src/io/chkp_file.f90
+++ b/src/io/chkp_file.f90
@@ -35,7 +35,7 @@
 module chkp_file
   use generic_file
   use field_series
-  use checkpoint    
+  use checkpoint
   use num_types
   use field
   use dofmap, only: dofmap_t
@@ -65,7 +65,7 @@ module chkp_file
   end type chkp_file_t
 
 contains
-  
+
   !> Write a Neko checkpoint
   subroutine chkp_file_write(this, data, t)
     class(chkp_file_t), intent(inout) :: this
@@ -98,7 +98,7 @@ subroutine chkp_file_write(this, data, t)
     else
        time = 0d0
     end if
-    
+
     select type(data)
     type is (chkp_t)
 
@@ -108,16 +108,16 @@ subroutine chkp_file_write(this, data, t)
             .not. associated(data%p) ) then
           call neko_error('Checkpoint not initialized')
        end if
-    
+
        u => data%u
        v => data%v
        w => data%w
        p => data%p
        msh => u%msh
-       
+
        optional_fields = 0
 
-       if (associated(data%ulag)) then       
+       if (associated(data%ulag)) then
           ulag => data%ulag
           vlag => data%vlag
           wlag => data%wlag
@@ -126,23 +126,23 @@ subroutine chkp_file_write(this, data, t)
        else
           write_lag = .false.
        end if
- 
-       if (associated(data%s)) then       
+
+       if (associated(data%s)) then
           s => data%s
           write_scalar = .true.
           optional_fields = optional_fields + 2
        else
           write_scalar = .false.
-       end if      
+       end if
 
-       if (associated(data%tlag)) then       
+       if (associated(data%tlag)) then
           tlag => data%tlag
           dtlag => data%dtlag
           write_dtlag = .true.
           optional_fields = optional_fields + 4
        else
           write_dtlag = .false.
-       end if 
+       end if
        write_abvel = .false.
        if (associated(data%abx1)) then
           abx1 => data%abx1
@@ -166,7 +166,7 @@ subroutine chkp_file_write(this, data, t)
        call neko_error('Invalid data')
     end select
 
-    
+
     suffix_pos = filename_suffix_pos(this%fname)
     write(id_str, '(i5.5)') this%counter
     fname = trim(this%fname(1:suffix_pos-1))//id_str//'.chkp'
@@ -174,7 +174,7 @@ subroutine chkp_file_write(this, data, t)
 
     dof_offset = int(msh%offset_el, i8) * int(u%Xh%lx * u%Xh%ly * u%Xh%lz, i8)
     n_glb_dofs = int(u%Xh%lx * u%Xh%ly * u%Xh%lz, i8) * int(msh%glb_nelv, i8)
-    
+
     call MPI_File_open(NEKO_COMM, trim(fname), &
          MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr)
     call MPI_File_write_all(fh, msh%glb_nelv, 1, MPI_INTEGER, status, ierr)
@@ -182,21 +182,22 @@ subroutine chkp_file_write(this, data, t)
     call MPI_File_write_all(fh, u%Xh%lx, 1, MPI_INTEGER, status, ierr)
     call MPI_File_write_all(fh, optional_fields, 1, MPI_INTEGER, status, ierr)
     call MPI_File_write_all(fh, time, 1, MPI_DOUBLE_PRECISION, status, ierr)
-    
-    
+
+
     !
     ! Dump mandatory checkpoint data
     !
-    
+
+
     byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8)
     byte_offset = byte_offset + &
          dof_offset * int(MPI_REAL_PREC_SIZE, i8)
     call MPI_File_write_at_all(fh, byte_offset,u%x, u%dof%size(), &
          MPI_REAL_PRECISION, status, ierr)
-    mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) 
+    mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8)
     mpi_offset = mpi_offset +&
          n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
-    
+
     byte_offset = mpi_offset + &
          dof_offset * int(MPI_REAL_PREC_SIZE, i8)
     call MPI_File_write_at_all(fh, byte_offset, v%x, v%dof%size(), &
@@ -259,10 +260,10 @@ subroutine chkp_file_write(this, data, t)
           end associate
           mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
        end do
-              
+
     end if
 
-    if (write_scalar) then 
+    if (write_scalar) then
        byte_offset = mpi_offset + &
             dof_offset * int(MPI_REAL_PREC_SIZE, i8)
        call MPI_File_write_at_all(fh, byte_offset, s%x, p%dof%size(), &
@@ -323,7 +324,7 @@ subroutine chkp_file_write(this, data, t)
           end associate
           mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
        end do
-              
+
        byte_offset = mpi_offset + &
             dof_offset * int(MPI_REAL_PREC_SIZE, i8)
        call MPI_File_write_at_all(fh, byte_offset, abs1%x, abx1%dof%size(), &
@@ -335,13 +336,13 @@ subroutine chkp_file_write(this, data, t)
             MPI_REAL_PRECISION, status, ierr)
        mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
     end if
-   
+
     call MPI_File_close(fh, ierr)
 
     this%counter = this%counter + 1
-    
+
   end subroutine chkp_file_write
-  
+
   !> Load a checkpoint from file
   subroutine chkp_file_read(this, data)
     class(chkp_file_t) :: this
@@ -375,10 +376,11 @@ subroutine chkp_file_read(this, data)
     real(kind=rp) :: center_x, center_y, center_z
     integer :: i, e
     type(dofmap_t) :: dof
+   
+    call this%check_exists()
 
-    
     select type(data)
-    type is (chkp_t)       
+    type is (chkp_t)
 
        if ( .not. associated(data%u) .or. &
             .not. associated(data%v) .or. &
@@ -386,7 +388,7 @@ subroutine chkp_file_read(this, data)
             .not. associated(data%p) ) then
           call neko_error('Checkpoint not initialized')
        end if
-    
+
        u => data%u
        v => data%v
        w => data%w
@@ -399,9 +401,9 @@ subroutine chkp_file_read(this, data)
        else !< The checkpoint was written on the same mesh
           msh => u%msh
           this%mesh2mesh = .false.
-       end if 
+       end if
 
-       if (associated(data%ulag)) then       
+       if (associated(data%ulag)) then
           ulag => data%ulag
           vlag => data%vlag
           wlag => data%wlag
@@ -410,13 +412,13 @@ subroutine chkp_file_read(this, data)
           read_lag = .false.
        end if
 
-       if (associated(data%s)) then       
+       if (associated(data%s)) then
           s => data%s
           read_scalar = .true.
        else
           read_scalar = .false.
        end if
-       if (associated(data%dtlag)) then       
+       if (associated(data%dtlag)) then
           dtlag => data%dtlag
           tlag => data%tlag
           read_dtlag = .true.
@@ -442,13 +444,13 @@ subroutine chkp_file_read(this, data)
        end if
 
        chkp => data
-       
+
     class default
        call neko_error('Invalid data')
     end select
 
 
-    
+
     call MPI_File_open(NEKO_COMM, trim(this%fname), &
          MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
     call MPI_File_read_all(fh, glb_nelv, 1, MPI_INTEGER, status, ierr)
@@ -509,23 +511,23 @@ subroutine chkp_file_read(this, data)
        deallocate(y_coord)
        deallocate(z_coord)
     else
-       call this%space_interp%init(this%sim_Xh, this%chkp_Xh) 
+       call this%space_interp%init(this%sim_Xh, this%chkp_Xh)
     end if
     dof_offset = int(msh%offset_el, i8) * int(this%chkp_Xh%lxyz, i8)
-    n_glb_dofs = int(this%chkp_Xh%lxyz, i8) * int(msh%glb_nelv, i8)    
-    
+    n_glb_dofs = int(this%chkp_Xh%lxyz, i8) * int(msh%glb_nelv, i8)
+
     !
     ! Read mandatory checkpoint data
     !
-    
+
     byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8)
     byte_offset = byte_offset + &
          dof_offset * int(MPI_REAL_PREC_SIZE, i8)
     call this%read_field(fh, byte_offset, u%x, nel)
-    mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) 
+    mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8)
     mpi_offset = mpi_offset +&
          n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
-    
+
     byte_offset = mpi_offset + &
          dof_offset * int(MPI_REAL_PREC_SIZE, i8)
     call this%read_field(fh, byte_offset, v%x, nel)
@@ -559,7 +561,7 @@ subroutine chkp_file_read(this, data)
           call this%read_field(fh, byte_offset, vlag%lf(i)%x, nel)
           mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
        end do
-       
+
        do i = 1, wlag%size()
           byte_offset = mpi_offset + &
                dof_offset * int(MPI_REAL_PREC_SIZE, i8)
@@ -568,21 +570,21 @@ subroutine chkp_file_read(this, data)
        end do
     end if
 
-    if (read_scalar) then 
+    if (read_scalar) then
        byte_offset = mpi_offset + &
             dof_offset * int(MPI_REAL_PREC_SIZE, i8)
        call this%read_field(fh, byte_offset, s%x, nel)
        mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
     end if
 
-    if (read_dtlag .and. have_dtlag .eq. 1) then 
+    if (read_dtlag .and. have_dtlag .eq. 1) then
        call MPI_File_read_at_all(fh, mpi_offset, tlag, 10, MPI_REAL_PRECISION, status, ierr)
        mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8)
        call MPI_File_read_at_all(fh, mpi_offset, dtlag, 10, MPI_REAL_PRECISION, status, ierr)
        mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8)
     end if
 
-    if (read_abvel .and. have_abvel .eq. 1) then 
+    if (read_abvel .and. have_abvel .eq. 1) then
        byte_offset = mpi_offset + &
             dof_offset * int(MPI_REAL_PREC_SIZE, i8)
        call this%read_field(fh, byte_offset, abx1%x, nel)
@@ -608,7 +610,7 @@ subroutine chkp_file_read(this, data)
        call this%read_field(fh, byte_offset, abz2%x, nel)
        mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
     end if
-    if (read_scalarlag .and. have_scalarlag .eq. 1) then 
+    if (read_scalarlag .and. have_scalarlag .eq. 1) then
        do i = 1, slag%size()
           byte_offset = mpi_offset + &
                dof_offset * int(MPI_REAL_PREC_SIZE, i8)
@@ -625,7 +627,7 @@ subroutine chkp_file_read(this, data)
        mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8)
     end if
 
-    call MPI_File_close(fh, ierr)      
+    call MPI_File_close(fh, ierr)
 
     call this%global_interp%free()
     call this%space_interp%free()
@@ -643,7 +645,7 @@ subroutine chkp_read_field(this, fh, byte_offset, x, nel)
     type(MPI_Status) :: status
     integer :: ierr
 
-    allocate(read_array(this%chkp_Xh%lxyz*nel)) 
+    allocate(read_array(this%chkp_Xh%lxyz*nel))
 
     call rzero(read_array,this%chkp_xh%lxyz*nel)
     call MPI_File_read_at_all(fh, byte_offset, read_array, &
@@ -657,5 +659,5 @@ subroutine chkp_read_field(this, fh, byte_offset, x, nel)
     end if
     deallocate(read_array)
   end subroutine chkp_read_field
-  
+
 end module chkp_file
diff --git a/src/io/chkp_output.f90 b/src/io/chkp_output.f90
index ec765ec35b1..8302348eeda 100644
--- a/src/io/chkp_output.f90
+++ b/src/io/chkp_output.f90
@@ -34,7 +34,7 @@
 module chkp_output
   use checkpoint, only : chkp_t
   use output
-  use num_types, only : rp    
+  use num_types, only : rp
   implicit none
 
   type, public, extends(output_t) :: chkp_output_t
@@ -46,7 +46,7 @@ module chkp_output
   interface chkp_output_t
      module procedure chkp_output_init
   end interface chkp_output_t
-  
+
 contains
 
   function chkp_output_init(chkp, name, path) result(this)
@@ -66,7 +66,7 @@ function chkp_output_init(chkp, name, path) result(this)
        fname = 'fluid.chkp'
     end if
 
-    call output_init(this, fname)
+    call this%init_base(fname)
     this%chkp => chkp
   end function chkp_output_init
 
@@ -79,5 +79,5 @@ subroutine chkp_output_sample(this, t)
     call this%file_%write(this%chkp, t)
 
   end subroutine chkp_output_sample
-  
+
 end module chkp_output
diff --git a/src/io/csv_file.f90 b/src/io/csv_file.f90
old mode 100755
new mode 100644
index d177a624a87..3bfca41158f
--- a/src/io/csv_file.f90
+++ b/src/io/csv_file.f90
@@ -68,29 +68,32 @@ subroutine csv_file_write(this, data, t)
     real(kind=rp), intent(in), optional :: t
 
     real(kind=rp) :: time
-    type(vector_t), pointer :: vec => null()
-    type(matrix_t), pointer :: mat => null()
+    type(vector_t), pointer :: vec
+    type(matrix_t), pointer :: mat
+
+    nullify(vec)
+    nullify(mat)
 
     select type(data)
     type is (vector_t)
        if (.not. allocated(data%x)) then
           call neko_error("Vector is not allocated! Use &
-               &vector%init() to associate your array &
-               &with a vector_t object")
+          &vector%init() to associate your array &
+          &with a vector_t object")
        end if
        vec => data
 
     type is (matrix_t)
        if (.not. allocated(data%x)) then
           call neko_error("Matrix is not allocated! Use &
-               &matrix%init() to associate your array &
-               &with a matrix_t object")
+          &matrix%init() to associate your array &
+          &with a matrix_t object")
        end if
        mat => data
 
     class default
        call neko_error("Invalid data. Expected vector_t or &
-            &matrix_t")
+       &matrix_t")
     end select
 
     ! Write is performed on rank 0
@@ -177,30 +180,35 @@ end subroutine csv_file_write_matrix
   subroutine csv_file_read(this, data)
     class(csv_file_t) :: this
     class(*), target, intent(inout) :: data
-    type(vector_t), pointer :: vec => null()
-    type(matrix_t), pointer :: mat => null()
+    type(vector_t), pointer :: vec
+    type(matrix_t), pointer :: mat
+
+    call this%check_exists()
+
+    nullify(vec)
+    nullify(mat)
 
     select type(data)
     type is (vector_t)
        vec => data
        if (.not. allocated(data%x)) then
           call neko_error("Vector is not allocated! Use &
-               &vector%init() to associate your array &
-               &with a vector_t object")
+          &vector%init() to associate your array &
+          &with a vector_t object")
        end if
 
     type is (matrix_t)
        mat => data
        if (.not. allocated(data%x)) then
           call neko_error("Matrix is not allocated! Use &
-               &matrix%init() to associate your array &
-               &with a matrix_t object")
+          &matrix%init() to associate your array &
+          &with a matrix_t object")
        end if
 
 
     class default
        call neko_error("Invalid data type for csv_file (expected: vector_t, &
-            &matrix_t)")
+       &matrix_t)")
     end select
 
     if (pe_rank .eq. 0) then
@@ -301,7 +309,7 @@ function csv_file_count_lines(this) result(n)
     rewind(file_unit)
 
     n = 0
-    
+
     ! Keep reading (ierr = 0) until we reach the end (ierr != 0)
     do
        read (file_unit,*,iostat=ierr)
diff --git a/src/io/data_streamer.F90 b/src/io/data_streamer.F90
index 820263b70ef..2407237f70f 100644
--- a/src/io/data_streamer.F90
+++ b/src/io/data_streamer.F90
@@ -36,24 +36,24 @@ module data_streamer
   use field, only: field_t
   use coefs, only: coef_t
   use utils, only: neko_warning
-  use device 
+  use device
   use comm
   use neko_mpi_types
   use neko_config
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   !> Provides access to data streaming by interfacing with c++
   !! ADIOS2 subroutines.
   !! @details
-  !! Adios2 is an API that allows for easy coupling of codes 
+  !! Adios2 is an API that allows for easy coupling of codes
   !! through data streaming and gives the posibility to perform
   !! other IO operations such as data compression, etc.
-  !! This type wraps and interfaces the needed calls to allow 
+  !! This type wraps and interfaces the needed calls to allow
   !! the use of the c++ routines that ultimately expose the data
   !! from neko to any executable that counts with a proper reader.
-  type, public :: data_streamer_t 
+  type, public :: data_streamer_t
      !> Define if the execution is asyncrhonous
      integer :: if_asynch
      !> global element numbers
@@ -65,6 +65,8 @@ module data_streamer
      procedure, pass(this) :: free => data_streamer_free
      !> Stream data
      procedure, pass(this) :: stream => data_streamer_stream
+     !> Stream back the data
+     procedure, pass(this) :: recieve => data_streamer_recieve
 
   end type data_streamer_t
 
@@ -76,34 +78,20 @@ module data_streamer
   !! on the case.
   !! @param if_asynch Controls whether the asyncrhonous executions
   !! is to be enabled.
-  subroutine data_streamer_init(this, coef, if_asynch)
+  subroutine data_streamer_init(this, coef)
     class(data_streamer_t), intent(inout) :: this
     type(coef_t), intent(inout) :: coef
-    integer, intent(in) :: if_asynch
-    integer :: nelb, nelb2, nelv, nelgv,npts,e
-
-    !Allocate and initialize the global element number
-    allocate(this%lglel(coef%msh%nelv))
-    do e = 1, coef%msh%nelv
-      this%lglel(e) = e + coef%msh%offset_el
-    end do
-
-    !Assign if the streaming is asynchronous
-    this%if_asynch = if_asynch
+    integer :: nelb, nelv, nelgv, npts, gdim
 
     !Assign the set up parameters
     nelv  = coef%msh%nelv
     npts  = coef%Xh%lx*coef%Xh%ly*coef%Xh%lz
     nelgv = coef%msh%glb_nelv
     nelb  = coef%msh%offset_el
-    ! Alternative way to get nelb:
-    !nelb = elem_running_sum(nelv)
-    !nelb = nelb - nelv
+    gdim = coef%msh%gdim
 
 #ifdef HAVE_ADIOS2
-    call fortran_adios2_setup(npts, nelv, nelb, nelgv, &
-                nelgv, coef%dof%x, coef%dof%y, &
-                coef%dof%z, if_asynch, NEKO_COMM)
+    call fortran_adios2_initialize(npts, nelv, nelb, nelgv, gdim, NEKO_COMM)
 #else
     call neko_warning('Is not being built with ADIOS2 support.')
     call neko_warning('Not able to use stream/compression functionality')
@@ -117,46 +105,23 @@ end subroutine data_streamer_init
   subroutine data_streamer_free(this)
     class(data_streamer_t), intent(inout) :: this
 
-    if (allocated(this%lglel))        deallocate(this%lglel)
-
 #ifdef HAVE_ADIOS2
     call fortran_adios2_finalize()
 #else
     call neko_warning('Is not being built with ADIOS2 support.')
     call neko_warning('Not able to use stream/compression functionality')
 #endif
-    
+
   end subroutine data_streamer_free
-  
+   
   !> streamer
-  !! wraps the adios2 stream function.
-  !! @param u velocity in x
-  !! @param v velocity in y
-  !! @param w velocity in z
-  !! @param p pressure
-  !! @param coef type
-  subroutine data_streamer_stream(this, u, v, w, p, coef)
+  !! @param fld array of shape field%x
+  subroutine data_streamer_stream(this, fld)
     class(data_streamer_t), intent(inout) :: this
-    type(coef_t), intent(inout) :: coef
-    type(field_t), intent(inout) :: u
-    type(field_t), intent(inout) :: v
-    type(field_t), intent(inout) :: w
-    type(field_t), intent(inout) :: p
-    integer :: nelv, npts
-    
-    nelv  = coef%msh%nelv
-    npts  = coef%Xh%lx*coef%Xh%ly*coef%Xh%lz
-    
-    if (NEKO_BCKND_DEVICE .eq. 1) then 
-      ! Move the data to the CPU to be able to write it
-      call device_memcpy(u%x, u%x_d, nelv*npts, DEVICE_TO_HOST)
-      call device_memcpy(v%x, v%x_d, nelv*npts, DEVICE_TO_HOST)
-      call device_memcpy(w%x, w%x_d, nelv*npts, DEVICE_TO_HOST)
-      call device_memcpy(p%x, p%x_d, nelv*npts, DEVICE_TO_HOST, sync=.true.)
-    end if
+    real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld
 
 #ifdef HAVE_ADIOS2
-    call fortran_adios2_stream(this%lglel, p%x, u%x, v%x, w%x, coef%B, u%x)
+    call fortran_adios2_stream(fld)
 #else
     call neko_warning('Is not being built with ADIOS2 support.')
     call neko_warning('Not able to use stream/compression functionality')
@@ -164,54 +129,47 @@ subroutine data_streamer_stream(this, u, v, w, p, coef)
 
   end subroutine data_streamer_stream
   
-  !> Supporting function to calculate the element number offset.
-  !! returns the number of elements that the ranks previous to the 
-  !! present one have.
-  !! @param nelv number of elements in current rank.  
-  function elem_running_sum(nelv) result(rbuff)
-    integer, intent(in) :: nelv
-    integer ::  ierr, xbuff, wbuff, rbuff
+  !> reciever
+  !! @param fld array of shape field%x
+  subroutine data_streamer_recieve(this, fld)
+    class(data_streamer_t), intent(inout) :: this
+    real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld
+
+#ifdef HAVE_ADIOS2
+    call fortran_adios2_recieve(fld)
+#else
+    call neko_warning('Is not being built with ADIOS2 support.')
+    call neko_warning('Not able to use stream/compression functionality')
+#endif
+
+  end subroutine data_streamer_recieve
 
-    xbuff = nelv  ! running sum
-    wbuff = nelv  ! working buff
-    rbuff = 0   ! recv buff
-     
-    call mpi_scan(xbuff, rbuff, 1, mpi_integer, mpi_sum, NEKO_COMM, ierr)
-  end function elem_running_sum
 
 #ifdef HAVE_ADIOS2
-  
-  !> Interface to adios2_setup in c++.
+
+  !> Interface to adios2_initialize in c++.
   !! @details This routine interfaces with c++ routine that set up adios2
   !! if streaming, the global array to pair writer and reader is opened.
   !! @param npts number of points per element
   !! @param nelv number of elements in this rank
   !! @param nelb number of elements in ranks before this one
   !! @param nelgv total number of elements in velocity mesh
-  !! @param nelgt total number of elements in temperature mesh (not used)
-  !! @param x coordinates in x direction
-  !! @param y coordinates in y direction
-  !! @param z coordinates in z direction
-  !! @param asynch integer that indicates asynchronous execution
+  !! @param gdim dimension (2d or 3d)
   !! @param comm simulation communicator
-  subroutine fortran_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, &
-                                  z, asynch, comm)  
+  subroutine fortran_adios2_initialize(npts, nelv, nelb, nelgv, gdim, comm)
     use, intrinsic :: ISO_C_BINDING
     implicit none
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: x
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: y
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: z
-    integer, intent(in) :: npts, nelv, nelb,nelgv, nelgt, asynch
+    integer, intent(in) :: npts, nelv, nelb, nelgv, gdim
     type(MPI_COMM) :: comm
-    
+
     interface
-       !> C-definition is: void adios2_setup_(const int *nval,
+       !> C-definition is: void adios2_initialize_(const int *nval,
        !! const int *nelvin,const int *nelb, const int *nelgv,
        !! const int *nelgt, const double *xml,const double *yml,
        !! const double *zml, const int *if_asynchronous,
        !! const int *comm_int)
-       subroutine c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, &
-                             z, asynch, comm) bind(C,name="adios2_setup_")
+       subroutine c_adios2_initialize(npts, nelv, nelb, nelgv, gdim, &
+                                      comm) bind(C,name="adios2_initialize_")
          use, intrinsic :: ISO_C_BINDING
          import c_rp
          implicit none
@@ -219,25 +177,20 @@ subroutine c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, &
          integer(kind=C_INT) :: nelv
          integer(kind=C_INT) :: nelb
          integer(kind=C_INT) :: nelgv
-         integer(kind=C_INT) :: nelgt
-         real(kind=c_rp), intent(INOUT) :: x(*)
-         real(kind=c_rp), intent(INOUT)  :: y(*)
-         real(kind=c_rp), intent(INOUT)  :: z(*)
-         integer(kind=C_INT) :: asynch
+         integer(kind=C_INT) :: gdim
          type(*) :: comm
-       end subroutine c_adios2_setup
+       end subroutine c_adios2_initialize
     end interface
-    
-    call c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, z, &
-                        asynch, comm)
-  end subroutine fortran_adios2_setup
+
+    call c_adios2_initialize(npts, nelv, nelb, nelgv, gdim, comm)
+  end subroutine fortran_adios2_initialize
 
   !> Interface to adios2_finalize in c++.
   !! closes any writer openned at initialization time
-  subroutine fortran_adios2_finalize()  
+  subroutine fortran_adios2_finalize()
     use, intrinsic :: ISO_C_BINDING
     implicit none
-    
+
     interface
        !> C-definition is: void adios2_finalize_()
        subroutine c_adios2_finalize() bind(C,name="adios2_finalize_")
@@ -245,54 +198,58 @@ subroutine c_adios2_finalize() bind(C,name="adios2_finalize_")
          implicit none
        end subroutine c_adios2_finalize
     end interface
-    
-    call c_adios2_finalize()  
+
+    call c_adios2_finalize()
   end subroutine fortran_adios2_finalize
   
   !> Interface to adios2_stream in c++.
   !! @details This routine communicates the data to a global array that
   !! is accessed by a data processor. The operations do not write to disk.
   !! data is communicated with mpi.
-  !! @param lglel global element number
-  !! @param p pressure
-  !! @param u velocity in x
-  !! @param v velocity in y
-  !! @param w velocity in z
-  !! @param bm1 mass matrix
-  !! @param t temperature / (Not really used in adios2 routine)
-  subroutine fortran_adios2_stream(lglel, p, u, v, w, bm1, t)  
+  !! @param fld array of shape field%x
+  subroutine fortran_adios2_stream(fld)
     use, intrinsic :: ISO_C_BINDING
     implicit none
-    integer, dimension(:), intent(inout) :: lglel
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: p
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: u
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: v
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: w
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: bm1
-    real(kind=rp), dimension(:,:,:,:), intent(inout) :: t
+    real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld
 
     interface
-       !> C-definition is: void adios2_stream_(
-       !! const int *lglel, const double *pr, const double *u,
-       !! const double *v, const double *w, const double *mass1,
-       !! const double *temp)
-       subroutine c_adios2_stream(lglel, p, u, v, w, bm1, t) &
+       !> C-definition is: void adios2_stream_(const double *fld)
+       subroutine c_adios2_stream(fld) &
                                   bind(C,name="adios2_stream_")
          use, intrinsic :: ISO_C_BINDING
          import c_rp
          implicit none
-         integer(kind=C_INT), intent(INOUT) :: lglel(*)
-         real(kind=c_rp), intent(INOUT) :: p(*)
-         real(kind=c_rp), intent(INOUT)  :: u(*)
-         real(kind=c_rp), intent(INOUT)  :: v(*)
-         real(kind=c_rp), intent(INOUT)  :: w(*)
-         real(kind=c_rp), intent(INOUT)  :: bm1(*)
-         real(kind=c_rp), intent(INOUT)  :: t(*)
+         real(kind=c_rp), intent(INOUT) :: fld(*)
        end subroutine c_adios2_stream
     end interface
-    
-    call c_adios2_stream(lglel, p, u, v, w, bm1, t)  
+
+    call c_adios2_stream(fld)
   end subroutine fortran_adios2_stream
+  
+  !> Interface to adios2_recieve in ci++.
+  !! @details This routine communicates the data to a global array that
+  !! is accessed by a data processor. The operations do not write to disk.
+  !! data is communicated with mpi.
+  !! @param fld array of shape field%x
+  subroutine fortran_adios2_recieve(fld)
+    use, intrinsic :: ISO_C_BINDING
+    implicit none
+    real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld
+
+    interface
+       !> C-definition is: void adios2_stream_(const double *fld)
+       subroutine c_adios2_recieve(fld) &
+                                  bind(C,name="adios2_recieve_")
+         use, intrinsic :: ISO_C_BINDING
+         import c_rp
+         implicit none
+         real(kind=c_rp), intent(INOUT) :: fld(*)
+       end subroutine c_adios2_recieve
+    end interface
+
+    call c_adios2_recieve(fld)
+  end subroutine fortran_adios2_recieve
+
 #endif
 
 end module data_streamer
diff --git a/src/io/file.f90 b/src/io/file.f90
index bc4dc6376f0..95cdb298e9f 100644
--- a/src/io/file.f90
+++ b/src/io/file.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2019-2023, The Neko Authors
+! Copyright (c) 2019-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -32,20 +32,24 @@
 !
 !> Module for file I/O operations.
 module file
-  use utils
-  use generic_file
-  use nmsh_file
-  use chkp_file        
-  use map_file
-  use rea_file
-  use re2_file
-  use fld_file
-  use fld_file_data
-  use vtk_file
-  use stl_file
-  use csv_file
+  use utils, only : neko_error, neko_warning, filename_suffix
+  use num_types, only : rp
+  use generic_file, only : generic_file_t
+  use nmsh_file, only : nmsh_file_t
+  use chkp_file, only : chkp_file_t
+  use map_file, only : map_file_t
+  use rea_file, only : rea_file_t
+  use re2_file, only : re2_file_t
+  use fld_file, only : fld_file_t
+  use fld_file_data, only : fld_file_data_t
+  use vtk_file, only : vtk_file_t
+  use stl_file, only : stl_file_t
+  use csv_file, only : csv_file_t
   implicit none
-  
+
+  !> A wrapper around a polymorphic `generic_file_t` that handles its init.
+  !! This is essentially a factory for `generic_file_t` descendants additionally
+  !! handling special CSV file parameters (header and precision).
   type file_t
      class(generic_file_t), allocatable :: file_type
    contains
@@ -57,8 +61,10 @@ module file
      procedure :: set_counter => file_set_counter
      !> Set a file's start counter.
      procedure :: set_start_counter => file_set_start_counter
-     !> Set a file's header
+     !> Set a file's header.
      procedure :: set_header => file_set_header
+     !> Set a file's output precision.
+     procedure :: set_precision => file_set_precision
      !> File operation destructor.
      final :: file_free
   end type file_t
@@ -71,18 +77,20 @@ module file
 
   !> File reader/writer constructor.
   !! @param fname Filename.
-  function file_init(fname) result(this)
+  function file_init(fname, header, precision) result(this)
     character(len=*) :: fname
+    character(len=*), optional :: header
+    integer, optional :: precision
     type(file_t), target :: this
     character(len=80) :: suffix
     class(generic_file_t), pointer :: q
-    
+
     call filename_suffix(fname, suffix)
-    
+
     if (allocated(this%file_type)) then
        deallocate(this%file_type)
     end if
-    
+
     if (suffix .eq. "rea") then
        allocate(rea_file_t::this%file_type)
     else if (suffix .eq. "re2") then
@@ -100,13 +108,22 @@ function file_init(fname) result(this)
     else if (suffix .eq. "stl") then
        allocate(stl_file_t::this%file_type)
     else if (suffix .eq. "csv") then
-      allocate(csv_file_t::this%file_type)
+       allocate(csv_file_t::this%file_type)
+       this%file_type%serial = .true.
     else
        call neko_error('Unknown file format')
     end if
 
     call this%file_type%init(fname)
 
+    if (present(header)) then
+       call this%set_header(header)
+    end if
+
+    if (present(precision)) then
+       call this%set_precision(precision)
+    end if
+
   end function file_init
 
   !> File operation destructor.
@@ -131,9 +148,9 @@ subroutine file_write(this, data, t)
     else
        call this%file_type%write(data)
     end if
-    
+
   end subroutine file_write
-   
+
   !> Read @a data from a file.
   !! @param data Read data.
   subroutine file_read(this, data)
@@ -141,7 +158,7 @@ subroutine file_read(this, data)
     class(*), intent(inout) :: data
 
     call this%file_type%read(data)
-    
+
   end subroutine file_read
 
   !> Set a file's counter.
@@ -153,7 +170,7 @@ subroutine file_set_counter(this, n)
     class is (generic_file_t)
        call ft%set_counter(n)
     end select
-    
+
   end subroutine file_set_counter
 
   !> Set a file's start counter.
@@ -165,10 +182,10 @@ subroutine file_set_start_counter(this, n)
     class is (generic_file_t)
        call ft%set_start_counter(n)
     end select
-    
+
   end subroutine file_set_start_counter
 
-  !> Set a file's header, mainly for csv_file for now.
+  !> Set a file's header.
   subroutine file_set_header(this, hd)
     class(file_t), intent(inout) :: this
     character(len=*), intent(in) :: hd
@@ -185,5 +202,23 @@ subroutine file_set_header(this, hd)
 
   end subroutine file_set_header
 
+  !> Set a file's output precision.
+  !! @param precision Precision as defined in `num_types`.
+  subroutine file_set_precision(this, precision)
+    class(file_t), intent(inout) :: this
+    integer, intent(in) :: precision
+
+    character(len=80) :: suffix
+
+    select type(ft => this%file_type)
+    type is (fld_file_t)
+       call ft%set_precision(precision)
+    class default
+       call filename_suffix(this%file_type%fname, suffix)
+       call neko_warning("No precision strategy defined for " // trim(suffix) //&
+            " files!")
+    end select
+
+  end subroutine file_set_precision
 
 end module file
diff --git a/src/io/fld_file.f90 b/src/io/fld_file.f90
index 47ecd622130..626e9e10e1d 100644
--- a/src/io/fld_file.f90
+++ b/src/io/fld_file.f90
@@ -49,10 +49,11 @@ module fld_file
   use utils
   use comm
   use datadist
+  use math, only : vlmin, vlmax
   use neko_mpi_types
   implicit none
   private
-  
+
   real(kind=dp), private, allocatable :: tmp_dp(:)
   real(kind=sp), private, allocatable :: tmp_sp(:)
 
@@ -100,7 +101,7 @@ subroutine fld_file_write(this, data, t)
     else
        time = 0d0
     end if
-    
+
     nullify(msh)
     nullify(dof)
     nullify(Xh)
@@ -114,13 +115,13 @@ subroutine fld_file_write(this, data, t)
        if (data%x%n .gt. 0) x%x => data%x%x
        if (data%y%n .gt. 0) y%x => data%y%x
        if (data%z%n .gt. 0) z%x => data%z%x
-       if (data%u%n .gt. 0) then 
+       if (data%u%n .gt. 0) then
           u%x => data%u%x
           write_velocity = .true.
        end if
        if (data%v%n .gt. 0) v%x => data%v%x
        if (data%w%n .gt. 0) w%x => data%w%x
-       if (data%p%n .gt. 0) then 
+       if (data%p%n .gt. 0) then
           p%x => data%p%x
           write_pressure = .true.
        end if
@@ -131,7 +132,7 @@ subroutine fld_file_write(this, data, t)
        n_scalar_fields = data%n_scalars
        allocate(scalar_fields(n_scalar_fields))
        do i = 1, n_scalar_fields
-            scalar_fields(i)%x => data%s(i)%x
+          scalar_fields(i)%x => data%s(i)%x
        end do
        nelv = data%nelv
        lx = data%lx
@@ -140,7 +141,7 @@ subroutine fld_file_write(this, data, t)
        gdim = data%gdim
        glb_nelv = data%glb_nelv
        offset_el = data%offset_el
- 
+
        allocate(idx(nelv))
        do i = 1, nelv
           idx(i) = data%idx(i)
@@ -173,7 +174,7 @@ subroutine fld_file_write(this, data, t)
           w%x => data%fields(4)%f%x(:,1,1,1)
           write_pressure = .true.
           write_velocity = .true.
-      case (5:99)
+       case (5:99)
           p%x => data%fields(1)%f%x(:,1,1,1)
           u%x => data%fields(2)%f%x(:,1,1,1)
           v%x => data%fields(3)%f%x(:,1,1,1)
@@ -190,7 +191,7 @@ subroutine fld_file_write(this, data, t)
        case default
           call neko_error('This many fields not supported yet, fld_file')
        end select
-       dof => data%fields(1)%f%dof 
+       dof => data%fields(1)%f%dof
 
     type is (mean_flow_t)
        u%x => data%u%mf%x(:,1,1,1)
@@ -205,7 +206,7 @@ subroutine fld_file_write(this, data, t)
        v%x => data%vv%mf%x(:,1,1,1)
        w%x => data%ww%mf%x(:,1,1,1)
        p%x => data%pp%mf%x(:,1,1,1)
-       dof => data%pp%mf%dof              
+       dof => data%pp%mf%dof
        write_pressure = .true.
        write_velocity = .true.
     class default
@@ -252,14 +253,14 @@ subroutine fld_file_write(this, data, t)
     else
        allocate(tmp_sp(gdim*n))
     end if
-     
-    
+
+
     !
     ! Create fld header for NEKTON's multifile output
     !
 
     write_mesh = (this%counter .eq. this%start_counter)
-    
+
     ! Build rdcode note that for field_t, we only support scalar
     ! fields at the moment
     rdcode = ' '
@@ -299,7 +300,7 @@ subroutine fld_file_write(this, data, t)
     suffix_pos = filename_suffix_pos(this%fname)
     write(id_str, '(a,i5.5)') 'f', this%counter
     fname = trim(this%fname(1:suffix_pos-1))//'0.'//id_str
-    
+
     call MPI_File_open(NEKO_COMM, trim(fname), &
          MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr)
 
@@ -316,16 +317,16 @@ subroutine fld_file_write(this, data, t)
     mpi_offset = mpi_offset + int(glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8)
 
     deallocate(idx)
-    
+
     if (write_mesh) then
 
        byte_offset = mpi_offset + int(offset_el, i8) * &
             (int(gdim*lxyz, i8) * &
             int(FLD_DATA_SIZE, i8))
-       
+
        call fld_file_write_vector_field(this, fh, byte_offset, x%x, y%x, z%x, n,  gdim, lxyz, nelv)
        mpi_offset = mpi_offset + int(glb_nelv, i8) * &
-            (int(gdim *lxyz, i8) * & 
+            (int(gdim *lxyz, i8) * &
             int(FLD_DATA_SIZE, i8))
     end if
 
@@ -338,7 +339,7 @@ subroutine fld_file_write(this, data, t)
        mpi_offset = mpi_offset + int(glb_nelv, i8) * &
             (int(gdim * (lxyz), i8) * &
             int(FLD_DATA_SIZE, i8))
-       
+
     end if
 
     if (write_pressure) then
@@ -365,7 +366,7 @@ subroutine fld_file_write(this, data, t)
 
     do i = 1, n_scalar_fields
        !Without this redundant if statement, Cray optimizes this loop to Oblivion
-       if (i .eq. 2) then 
+       if (i .eq. 2) then
           mpi_offset = int(temp_offset,i8) + int(1_i8*glb_nelv, i8) * &
                            (int(lxyz, i8) * &
                        int(FLD_DATA_SIZE, i8))
@@ -378,11 +379,94 @@ subroutine fld_file_write(this, data, t)
             (int(lxyz, i8) * &
             int(FLD_DATA_SIZE, i8))
     end do
-    
+
+
+    !> Include metadata with bounding boxes (Just copying from nek5000)
+    if (write_mesh) then
+       !The offset is: mpioff + element_off*2(min max value)*4(single precision)*gdim(dimensions)
+       byte_offset = int(mpi_offset,i8) + &
+                     int(offset_el, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8) * &
+                     int(gdim, i8)
+       call fld_file_write_metadata_vector(this, fh, byte_offset, x%x, y%x, z%x, gdim, lxyz, nelv)
+       mpi_offset = int(mpi_offset,i8) + &
+                     int(glb_nelv, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8) * &
+                     int(gdim, i8)
+    end if
+
+    if (write_velocity) then
+       byte_offset = int(mpi_offset,i8) + &
+                     int(offset_el, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8) * &
+                     int(gdim, i8)
+       call fld_file_write_metadata_vector(this, fh, byte_offset, u%x, v%x, w%x, gdim, lxyz, nelv)
+       mpi_offset = int(mpi_offset,i8) + &
+                     int(glb_nelv, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8) * &
+                     int(gdim, i8)
+
+    end if
+
+    if (write_pressure) then
+       byte_offset = int(mpi_offset,i8) + &
+                     int(offset_el, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+       call fld_file_write_metadata_scalar(this, fh, byte_offset, p%x, lxyz, nelv)
+       mpi_offset = int(mpi_offset,i8) + &
+                     int(glb_nelv, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+
+    end if
+
+    if (write_temperature) then
+       byte_offset = int(mpi_offset,i8) + &
+                     int(offset_el, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+       call fld_file_write_metadata_scalar(this, fh, byte_offset, tem%x, lxyz, nelv)
+       mpi_offset = int(mpi_offset,i8) + &
+                     int(glb_nelv, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+
+    end if
+
+
+
+    temp_offset = mpi_offset
+
+    do i = 1, n_scalar_fields
+       !Without this redundant if statement, Cray optimizes this loop to Oblivion
+       if (i .eq. 2) then
+          mpi_offset = int(temp_offset,i8) + &
+                       int(1_i8*glb_nelv, i8) * &
+                       int(2, i8) * &
+                       int(MPI_REAL_SIZE, i8)
+       end if
+
+       byte_offset = int(mpi_offset,i8) + &
+                     int(offset_el, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+       call fld_file_write_metadata_scalar(this, fh, byte_offset, scalar_fields(i)%x, lxyz, nelv)
+       mpi_offset = int(mpi_offset,i8) + &
+                     int(glb_nelv, i8) * &
+                     int(2, i8) * &
+                     int(MPI_REAL_SIZE, i8)
+    end do
+
+
     call MPI_File_sync(fh, ierr)
     call MPI_File_close(fh, ierr)
 
-    ! Write metadata file 
+    ! Write metadata file
     if (pe_rank .eq. 0) then
        tslash_pos = filename_tslash_pos(this%fname)
        write(start_field,"(I5,A8)") this%start_counter,'.nek5000'
@@ -397,11 +481,66 @@ subroutine fld_file_write(this, data, t)
     end if
 
     this%counter = this%counter + 1
-    
+
     if (allocated(tmp_dp)) deallocate(tmp_dp)
     if (allocated(tmp_sp)) deallocate(tmp_sp)
   end subroutine fld_file_write
 
+  subroutine fld_file_write_metadata_vector(this, fh, byte_offset, x, y, z, gdim, lxyz, nelv)
+    class(fld_file_t), intent(inout) :: this
+    type(MPI_File), intent(inout) :: fh
+    integer, intent(in) :: gdim, lxyz, nelv
+    real(kind=rp), intent(in) :: x(lxyz,nelv), y(lxyz,nelv), z(lxyz,nelv)
+    integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset
+    integer :: i, el, j, ierr, nout
+    type(MPI_Status) :: status
+    real(kind=sp) :: buffer(2*gdim*nelv)
+
+     j = 1
+     do el=1,nelv
+        buffer(j+0) = real(vlmin(x(1,el),lxyz),sp)
+        buffer(j+1) = real(vlmax(x(1,el),lxyz),sp)
+        buffer(j+2) = real(vlmin(y(1,el),lxyz) ,sp)
+        buffer(j+3) = real(vlmax(y(1,el),lxyz),sp)
+        j = j + 4
+        buffer(j+0) = real(vlmin(z(1,el),lxyz) ,sp)
+        buffer(j+1) = real(vlmax(z(1,el),lxyz),sp)
+        j = j + 2
+     enddo
+
+     ! write out data
+     nout = 2*gdim*nelv
+
+     call MPI_File_write_at_all(fh, byte_offset, buffer, nout, &
+            MPI_REAL, status, ierr)
+
+  end subroutine fld_file_write_metadata_vector
+
+  subroutine fld_file_write_metadata_scalar(this, fh, byte_offset, x, lxyz, nelv)
+    class(fld_file_t), intent(inout) :: this
+    type(MPI_File), intent(inout) :: fh
+    integer, intent(in) :: lxyz, nelv
+    real(kind=rp), intent(in) :: x(lxyz,nelv)
+    integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset
+    integer :: i, el, j, ierr, nout
+    type(MPI_Status) :: status
+    real(kind=sp) :: buffer(2*nelv)
+
+     j = 1
+     do el=1,nelv
+        buffer(j+0) = real(vlmin(x(1,el),lxyz),sp)
+        buffer(j+1) = real(vlmax(x(1,el),lxyz),sp)
+        j = j + 2
+     enddo
+
+     ! write out data
+     nout = 2*nelv
+
+     call MPI_File_write_at_all(fh, byte_offset, buffer, nout, &
+            MPI_REAL, status, ierr)
+
+  end subroutine fld_file_write_metadata_scalar
+
   subroutine fld_file_write_field(this, fh, byte_offset, p, n)
     class(fld_file_t), intent(inout) :: this
     type(MPI_File), intent(inout) :: fh
@@ -415,7 +554,7 @@ subroutine fld_file_write_field(this, fh, byte_offset, p, n)
        do i = 1, n
           tmp_dp(i) = real(p(i),dp)
        end do
-       
+
        call MPI_File_write_at_all(fh, byte_offset, tmp_dp, n, &
             MPI_DOUBLE_PRECISION, status, ierr)
     else
@@ -425,9 +564,9 @@ subroutine fld_file_write_field(this, fh, byte_offset, p, n)
        call MPI_File_write_at_all(fh, byte_offset, tmp_sp, n, &
             MPI_REAL, status, ierr)
     end if
-    
+
   end subroutine fld_file_write_field
- 
+
   subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim, lxyz, nelv)
     class(fld_file_t), intent(inout) :: this
     type(MPI_File), intent(inout) :: fh
@@ -436,7 +575,7 @@ subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim,
     integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset
     integer :: i, el, j, ierr
     type(MPI_Status) :: status
-     
+
     if (this%dp_precision) then
        i = 1
        do el = 1, nelv
@@ -481,7 +620,7 @@ subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim,
 
 
   end subroutine fld_file_write_vector_field
-       
+
   !> Load a field from a NEKTON fld file
   subroutine fld_file_read(this, data)
     class(fld_file_t) :: this
@@ -497,234 +636,238 @@ subroutine fld_file_read(this, data)
     integer (kind=MPI_OFFSET_KIND) :: mpi_offset, byte_offset
     integer :: lx, ly, lz, glb_nelv, counter, lxyz
     integer :: FLD_DATA_SIZE, n_scalars, n, nd
-    real(kind=rp) ::  time 
+    real(kind=rp) ::  time
     real(kind=sp) :: temp
     type(linear_dist_t) :: dist
     real(kind=sp), parameter :: test_pattern = 6.54321
     character :: rdcode(10),temp_str(4)
+
     select type(data)
     type is (fld_file_data_t)
-      call filename_chsuffix(this%fname, meta_fname,'nek5000')
-
-      inquire(file=trim(meta_fname), exist=meta_file)
-      if (meta_file .and. data%meta_nsamples .eq. 0) then
-         if (pe_rank .eq. 0) then
-            open(unit=9, file=trim(meta_fname))
-            read(9, fmt='(A)') string
-            read(string(14:),fmt='(A)') string
-            string = trim(string)
-            data%fld_series_fname = string(:scan(trim(string), '%')-1)
-            data%fld_series_fname = trim(data%fld_series_fname)//'0'   
-            read(9, fmt='(A)') string
-            read(string(scan(string,':')+1:),*) data%meta_start_counter
-            read(9, fmt='(A)') string
-            read(string(scan(string,':')+1:),*) data%meta_nsamples
-
-            close(9)
-            write(*,*) 'Reading meta file for fld series'
-            write(*,*) 'Name: ', trim(data%fld_series_fname)
-            write(*,*) 'Start counter: ', data%meta_start_counter, 'Nsamples: ', data%meta_nsamples
-         end if
-         call MPI_Bcast(data%fld_series_fname, 1024, MPI_CHARACTER, 0, NEKO_COMM, ierr)
-         call MPI_Bcast(data%meta_start_counter, 1, MPI_INTEGER, 0, NEKO_COMM, ierr)
-         call MPI_Bcast(data%meta_nsamples, 1, MPI_INTEGER, 0, NEKO_COMM, ierr)
-         if(this%counter .eq. 0) this%counter = data%meta_start_counter
-      end if
-
-      if (meta_file) then
-         write(id_str, '(a,i5.5)') 'f', this%counter
-         fname = trim(data%fld_series_fname)//'.'//id_str
-         if (this%counter .ge. data%meta_nsamples+data%meta_start_counter) then
-            call neko_error('Trying to read more fld files than exist')
-         end if
-      else
-         suffix_pos = filename_suffix_pos(this%fname)
-         write(id_str, '(a,i5.5)') 'f', this%counter
-         fname = trim(this%fname(1:suffix_pos-1))//'.'//id_str
-      end if
-      call MPI_File_open(NEKO_COMM, trim(fname), &
+       call filename_chsuffix(this%fname, meta_fname,'nek5000')
+
+       inquire(file=trim(meta_fname), exist=meta_file)
+       if (meta_file .and. data%meta_nsamples .eq. 0) then
+          if (pe_rank .eq. 0) then
+             open(unit=9, file=trim(meta_fname))
+             read(9, fmt='(A)') string
+             read(string(14:),fmt='(A)') string
+             string = trim(string)
+             data%fld_series_fname = string(:scan(trim(string), '%')-1)
+             data%fld_series_fname = trim(data%fld_series_fname)//'0'
+             read(9, fmt='(A)') string
+             read(string(scan(string,':')+1:),*) data%meta_start_counter
+             read(9, fmt='(A)') string
+             read(string(scan(string,':')+1:),*) data%meta_nsamples
+
+             close(9)
+             write(*,*) 'Reading meta file for fld series'
+             write(*,*) 'Name: ', trim(data%fld_series_fname)
+             write(*,*) 'Start counter: ', data%meta_start_counter, 'Nsamples: ', data%meta_nsamples
+          end if
+          call MPI_Bcast(data%fld_series_fname, 1024, MPI_CHARACTER, 0, NEKO_COMM, ierr)
+          call MPI_Bcast(data%meta_start_counter, 1, MPI_INTEGER, 0, NEKO_COMM, ierr)
+          call MPI_Bcast(data%meta_nsamples, 1, MPI_INTEGER, 0, NEKO_COMM, ierr)
+          if(this%counter .eq. 0) this%counter = data%meta_start_counter
+       end if
+
+       if (meta_file) then
+          write(id_str, '(a,i5.5)') 'f', this%counter
+          fname = trim(data%fld_series_fname)//'.'//id_str
+          if (this%counter .ge. data%meta_nsamples+data%meta_start_counter) then
+             call neko_error('Trying to read more fld files than exist')
+          end if
+       else
+          suffix_pos = filename_suffix_pos(this%fname)
+          write(id_str, '(a,i5.5)') 'f', this%counter
+          fname = trim(this%fname(1:suffix_pos-1))//'.'//id_str
+       end if
+       call MPI_File_open(NEKO_COMM, trim(fname), &
            MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
-      call MPI_File_read_all(fh, hdr, 132, MPI_CHARACTER,  status, ierr)
-      !This read can prorbably be done wihtout the temp variables, temp_str, i, j       
-      
-      read(hdr, 1) temp_str,FLD_DATA_SIZE, lx, ly, lz, glb_nelv, glb_nelv,&
+
+       if (ierr .ne. 0) call neko_error("Could not read "//trim(fname))
+
+       call MPI_File_read_all(fh, hdr, 132, MPI_CHARACTER,  status, ierr)
+       !This read can prorbably be done wihtout the temp variables, temp_str, i, j
+
+       read(hdr, 1) temp_str,FLD_DATA_SIZE, lx, ly, lz, glb_nelv, glb_nelv,&
           time, counter, i, j, (rdcode(i),i=1,10)
-1     format(4a,1x,i1,1x,i2,1x,i2,1x,i2,1x,i10,1x,i10,1x,e20.13,&
+1      format(4a,1x,i1,1x,i2,1x,i2,1x,i2,1x,i10,1x,i10,1x,e20.13,&
          1x,i9,1x,i6,1x,i6,1x,10a)
-      if (data%nelv .eq. 0) then
-         dist = linear_dist_t(glb_nelv, pe_rank, pe_size, NEKO_COMM)
-         data%nelv = dist%num_local()
-         data%offset_el = dist%start_idx()
-      end if
-      data%lx = lx
-      data%ly = ly
-      data%lz = lz
-      data%glb_nelv = glb_nelv
-      data%t_counter = counter
-      data%time = time
-      lxyz = lx * ly * lz
-      n = lxyz * data%nelv
-
-      if (lz .eq. 1) then
-         data%gdim = 2 
-      else 
-         data%gdim = 3
-      end if
-      
-
-      if (this%dp_precision) then
-         FLD_DATA_SIZE = MPI_DOUBLE_PRECISION_SIZE
-      else
-         FLD_DATA_SIZE = MPI_REAL_SIZE
-      end if
-      if (this%dp_precision) then
-         allocate(tmp_dp(data%gdim*n))
-      else
-         allocate(tmp_sp(data%gdim*n))
-      end if
-     
-
-      i = 1
-      read_mesh = .false.
-      read_velocity = .false.
-      read_pressure = .false.
-      read_temp = .false.
-      if (rdcode(i) .eq. 'X') then
-         read_mesh = .true.
-         if (data%x%n .ne. n) call data%x%init(n)
-         if (data%y%n .ne. n) call data%y%init(n)
-         if (data%z%n .ne. n) call data%z%init(n)
-         i = i + 1
-      end if
-      if (rdcode(i) .eq. 'U') then
-         read_velocity = .true.
-         if (data%u%n .ne. n) call data%u%init(n)
-         if (data%v%n .ne. n) call data%v%init(n)
-         if (data%w%n .ne. n) call data%w%init(n)
-         i = i + 1
-      end if
-      if (rdcode(i) .eq. 'P') then
-         read_pressure = .true.
-         if (data%p%n .ne. n) call data%p%init(n)
-         i = i + 1
-      end if
-      if (rdcode(i) .eq. 'T') then
-         read_temp = .true.
-         if (data%t%n .ne. n) call data%t%init(n)
-         i = i + 1
-      end if
-      n_scalars = 0
-      if (rdcode(i) .eq. 'S') then
-         i = i + 1
-         read(rdcode(i),*) n_scalars
-         n_scalars = n_scalars*10
-         i = i + 1
-         read(rdcode(i),*) j
-         n_scalars = n_scalars+j
-         i = i + 1
-         if (allocated(data%s)) then
-            if (data%n_scalars .ne. n_scalars) then
-               do j = 1, data%n_scalars
-                  call data%s(j)%free()
-               end do
-               deallocate(data%s)
-               data%n_scalars = n_scalars
-               allocate(data%s(n_scalars))
-               do j = 1, data%n_scalars
-                  call data%s(j)%init(n)
-               end do
-            end if
-         else
-            data%n_scalars = n_scalars
-            allocate(data%s(data%n_scalars))
-            do j = 1, data%n_scalars
-               call data%s(j)%init(n)
-            end do
-         end if
-         i = i + 1
-      end if
-
-      mpi_offset = 132 * MPI_CHARACTER_SIZE
-      call MPI_File_read_at_all(fh, mpi_offset, temp, 1, &
+       if (data%nelv .eq. 0) then
+          dist = linear_dist_t(glb_nelv, pe_rank, pe_size, NEKO_COMM)
+          data%nelv = dist%num_local()
+          data%offset_el = dist%start_idx()
+       end if
+       data%lx = lx
+       data%ly = ly
+       data%lz = lz
+       data%glb_nelv = glb_nelv
+       data%t_counter = counter
+       data%time = time
+       lxyz = lx * ly * lz
+       n = lxyz * data%nelv
+
+       if (lz .eq. 1) then
+          data%gdim = 2
+       else
+          data%gdim = 3
+       end if
+
+
+       if (FLD_DATA_SIZE .eq. MPI_DOUBLE_PRECISION_SIZE) then
+          this%dp_precision = .true.
+       else
+          this%dp_precision = .false.
+       end if
+       if (this%dp_precision) then
+          allocate(tmp_dp(data%gdim*n))
+       else
+          allocate(tmp_sp(data%gdim*n))
+       end if
+
+
+       i = 1
+       read_mesh = .false.
+       read_velocity = .false.
+       read_pressure = .false.
+       read_temp = .false.
+       if (rdcode(i) .eq. 'X') then
+          read_mesh = .true.
+          if (data%x%n .ne. n) call data%x%init(n)
+          if (data%y%n .ne. n) call data%y%init(n)
+          if (data%z%n .ne. n) call data%z%init(n)
+          i = i + 1
+       end if
+       if (rdcode(i) .eq. 'U') then
+          read_velocity = .true.
+          if (data%u%n .ne. n) call data%u%init(n)
+          if (data%v%n .ne. n) call data%v%init(n)
+          if (data%w%n .ne. n) call data%w%init(n)
+          i = i + 1
+       end if
+       if (rdcode(i) .eq. 'P') then
+          read_pressure = .true.
+          if (data%p%n .ne. n) call data%p%init(n)
+          i = i + 1
+       end if
+       if (rdcode(i) .eq. 'T') then
+          read_temp = .true.
+          if (data%t%n .ne. n) call data%t%init(n)
+          i = i + 1
+       end if
+       n_scalars = 0
+       if (rdcode(i) .eq. 'S') then
+          i = i + 1
+          read(rdcode(i),*) n_scalars
+          n_scalars = n_scalars*10
+          i = i + 1
+          read(rdcode(i),*) j
+          n_scalars = n_scalars+j
+          i = i + 1
+          if (allocated(data%s)) then
+             if (data%n_scalars .ne. n_scalars) then
+                do j = 1, data%n_scalars
+                   call data%s(j)%free()
+                end do
+                deallocate(data%s)
+                data%n_scalars = n_scalars
+                allocate(data%s(n_scalars))
+                do j = 1, data%n_scalars
+                   call data%s(j)%init(n)
+                end do
+             end if
+          else
+             data%n_scalars = n_scalars
+             allocate(data%s(data%n_scalars))
+             do j = 1, data%n_scalars
+                call data%s(j)%init(n)
+             end do
+          end if
+          i = i + 1
+       end if
+
+       mpi_offset = 132 * MPI_CHARACTER_SIZE
+       call MPI_File_read_at_all(fh, mpi_offset, temp, 1, &
            MPI_REAL, status, ierr)
-      if (temp .ne. test_pattern) then
-         call neko_error('Incorrect format for fld file, test pattern does not match.')
-      end if
-      mpi_offset = mpi_offset  + MPI_REAL_SIZE
-
-
-      if (allocated(data%idx)) then
-         if (size(data%idx) .ne. data%nelv) then
-            deallocate(data%idx)
-            allocate(data%idx(data%nelv))
-         end if
-      else
-         allocate(data%idx(data%nelv))
-      end if
-      
-      byte_offset = mpi_offset + &
+       if (temp .ne. test_pattern) then
+          call neko_error('Incorrect format for fld file, test pattern does not match.')
+       end if
+       mpi_offset = mpi_offset  + MPI_REAL_SIZE
+
+
+       if (allocated(data%idx)) then
+          if (size(data%idx) .ne. data%nelv) then
+             deallocate(data%idx)
+             allocate(data%idx(data%nelv))
+          end if
+       else
+          allocate(data%idx(data%nelv))
+       end if
+
+       byte_offset = mpi_offset + &
            int(data%offset_el, i8) * int(MPI_INTEGER_SIZE, i8)
 
-      call MPI_File_read_at_all(fh, byte_offset, data%idx, data%nelv, &
+       call MPI_File_read_at_all(fh, byte_offset, data%idx, data%nelv, &
            MPI_INTEGER, status, ierr)
-      
-      mpi_offset = mpi_offset + int(data%glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8)
-      
-      if (read_mesh) then
-         byte_offset = mpi_offset + int(data%offset_el, i8) * &
+
+       mpi_offset = mpi_offset + int(data%glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8)
+
+       if (read_mesh) then
+          byte_offset = mpi_offset + int(data%offset_el, i8) * &
              (int(data%gdim*lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-         call fld_file_read_vector_field(this, fh, byte_offset, data%x, data%y, data%z, data)
-         mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
-             (int(data%gdim *lxyz, i8) * & 
+          call fld_file_read_vector_field(this, fh, byte_offset, data%x, data%y, data%z, data)
+          mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
+             (int(data%gdim *lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-      end if
+       end if
 
-      if (read_velocity) then
-         byte_offset = mpi_offset + int(data%offset_el, i8) * &
+       if (read_velocity) then
+          byte_offset = mpi_offset + int(data%offset_el, i8) * &
              (int(data%gdim*lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-         call fld_file_read_vector_field(this, fh, byte_offset, data%u, data%v, data%w, data)
-         mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
-             (int(data%gdim *lxyz, i8) * & 
+          call fld_file_read_vector_field(this, fh, byte_offset, data%u, data%v, data%w, data)
+          mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
+             (int(data%gdim *lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-      end if
+       end if
 
-      if (read_pressure) then
-         byte_offset = mpi_offset + int(data%offset_el, i8) * &
+       if (read_pressure) then
+          byte_offset = mpi_offset + int(data%offset_el, i8) * &
              (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-         call fld_file_read_field(this, fh, byte_offset, data%p, data)
-         mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
-             (int(lxyz, i8) * & 
+          call fld_file_read_field(this, fh, byte_offset, data%p, data)
+          mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
+             (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-      end if
+       end if
 
-      if (read_temp) then
-         byte_offset = mpi_offset + int(data%offset_el, i8) * &
+       if (read_temp) then
+          byte_offset = mpi_offset + int(data%offset_el, i8) * &
              (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-         call fld_file_read_field(this, fh, byte_offset, data%t, data)
-         mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
-             (int(lxyz, i8) * & 
+          call fld_file_read_field(this, fh, byte_offset, data%t, data)
+          mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
+             (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-      end if
+       end if
 
-      do i = 1, n_scalars
-         byte_offset = mpi_offset + int(data%offset_el, i8) * &
+       do i = 1, n_scalars
+          byte_offset = mpi_offset + int(data%offset_el, i8) * &
              (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-         call fld_file_read_field(this, fh, byte_offset, data%s(i), data)
-         mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
-             (int(lxyz, i8) * & 
+          call fld_file_read_field(this, fh, byte_offset, data%s(i), data)
+          mpi_offset = mpi_offset + int(data%glb_nelv, i8) * &
+             (int(lxyz, i8) * &
               int(FLD_DATA_SIZE, i8))
-      end do
+       end do
 
-      this%counter = this%counter + 1
+       this%counter = this%counter + 1
 
-      if (allocated(tmp_dp)) deallocate(tmp_dp)
-      if (allocated(tmp_sp)) deallocate(tmp_sp)
-    class default 
+       if (allocated(tmp_dp)) deallocate(tmp_dp)
+       if (allocated(tmp_sp)) deallocate(tmp_sp)
+    class default
        call neko_error('Currently we only read into fld_file_data_t, please use that data structure instead.')
     end select
 
@@ -749,15 +892,15 @@ subroutine fld_file_read_field(this, fh, byte_offset, x, fld_data)
        call MPI_File_read_at_all(fh, byte_offset, tmp_sp, n, &
             MPI_REAL, status, ierr)
     end if
-  
+
     if (this%dp_precision) then
        do i = 1, n
-          x%x(i) = tmp_dp(i) 
-       end do     
+          x%x(i) = tmp_dp(i)
+       end do
     else
        do i = 1, n
-          x%x(i) = tmp_sp(i) 
-       end do     
+          x%x(i) = tmp_sp(i)
+       end do
     end if
 
 
@@ -785,21 +928,21 @@ subroutine fld_file_read_vector_field(this, fh, byte_offset, x, y, z, fld_data)
             MPI_REAL, status, ierr)
     end if
 
-  
+
     if (this%dp_precision) then
        i = 1
        do e = 1, fld_data%nelv
           do j = 1, lxyz
-             x%x((e-1)*lxyz+j) = tmp_dp(i) 
+             x%x((e-1)*lxyz+j) = tmp_dp(i)
              i = i +1
           end do
           do j = 1, lxyz
-             y%x((e-1)*lxyz+j) = tmp_dp(i) 
+             y%x((e-1)*lxyz+j) = tmp_dp(i)
              i = i +1
           end do
           if (fld_data%gdim .eq. 3) then
              do j = 1, lxyz
-                z%x((e-1)*lxyz+j) = tmp_dp(i) 
+                z%x((e-1)*lxyz+j) = tmp_dp(i)
                 i = i +1
              end do
           end if
@@ -808,27 +951,27 @@ subroutine fld_file_read_vector_field(this, fh, byte_offset, x, y, z, fld_data)
        i = 1
        do e = 1, fld_data%nelv
           do j = 1, lxyz
-             x%x((e-1)*lxyz+j) = tmp_sp(i) 
+             x%x((e-1)*lxyz+j) = tmp_sp(i)
              i = i +1
           end do
           do j = 1, lxyz
-             y%x((e-1)*lxyz+j) = tmp_sp(i) 
+             y%x((e-1)*lxyz+j) = tmp_sp(i)
              i = i +1
           end do
           if (fld_data%gdim .eq. 3) then
              do j = 1, lxyz
-                z%x((e-1)*lxyz+j) = tmp_sp(i) 
+                z%x((e-1)*lxyz+j) = tmp_sp(i)
                 i = i +1
              end do
           end if
-       end do     
+       end do
     end if
 
   end subroutine fld_file_read_vector_field
 
   subroutine fld_file_set_precision(this, precision)
     class(fld_file_t) :: this
-    integer, intent(inout) :: precision
+    integer, intent(in) :: precision
 
     if (precision .eq. dp) then
        this%dp_precision = .true.
@@ -837,8 +980,8 @@ subroutine fld_file_set_precision(this, precision)
     else
        call neko_error('Invalid precision')
     end if
-    
+
   end subroutine fld_file_set_precision
 
-  
+
 end module fld_file
diff --git a/src/io/fld_file_data.f90 b/src/io/fld_file_data.f90
index d6b70ed9432..093863048fc 100644
--- a/src/io/fld_file_data.f90
+++ b/src/io/fld_file_data.f90
@@ -10,7 +10,7 @@ module fld_file_data
   use vector, only : vector_t, vector_ptr_t
   implicit none
   private
-  
+
   type, public :: fld_file_data_t
      type(vector_t) :: x !< x-coords
      type(vector_t) :: y !< y-coords
@@ -46,7 +46,7 @@ module fld_file_data
   end type fld_file_data_t
 
 contains
-  
+
   !> Initialise a fld_file_data object with nelv elements with a offset_nel
   subroutine fld_file_data_init(this, nelv, offset_el)
     class(fld_file_data_t), intent(inout) :: this
@@ -54,24 +54,24 @@ subroutine fld_file_data_init(this, nelv, offset_el)
     call this%free()
     if (present(nelv)) this%nelv = nelv
     if (present(offset_el)) this%offset_el = offset_el
-    
+
   end subroutine fld_file_data_init
   !> Get number of fields in this fld file
   function fld_file_data_size(this) result(i)
     class(fld_file_data_t) :: this
     integer :: i
     i = 0
-    if(this%u%n .gt. 0) i = i + 1 
-    if(this%v%n .gt. 0) i = i + 1 
-    if(this%w%n .gt. 0) i = i + 1 
-    if(this%p%n .gt. 0) i = i + 1 
-    if(this%t%n .gt. 0) i = i + 1 
+    if(this%u%n .gt. 0) i = i + 1
+    if(this%v%n .gt. 0) i = i + 1
+    if(this%w%n .gt. 0) i = i + 1
+    if(this%p%n .gt. 0) i = i + 1
+    if(this%t%n .gt. 0) i = i + 1
     i = i + this%n_scalars
 
   end function fld_file_data_size
 
   !> Get a list with pointers to the fields in the fld file
-  subroutine fld_file_get_list(this, ptr_list, n) 
+  subroutine fld_file_get_list(this, ptr_list, n)
     class(fld_file_data_t), target, intent(in) :: this
     integer, intent(in) :: n
     integer :: i, j
@@ -80,23 +80,23 @@ subroutine fld_file_get_list(this, ptr_list, n)
     if(this%u%n .gt. 0) then
        ptr_list(i)%v => this%u
        i = i + 1
-    end if     
+    end if
     if(this%v%n .gt. 0) then
        ptr_list(i)%v => this%v
        i = i + 1
-    end if     
+    end if
     if(this%w%n .gt. 0) then
        ptr_list(i)%v => this%w
        i = i + 1
-    end if     
+    end if
     if(this%p%n .gt. 0) then
        ptr_list(i)%v => this%p
        i = i + 1
-    end if     
+    end if
     if(this%t%n .gt. 0) then
        ptr_list(i)%v => this%t
        i = i + 1
-    end if     
+    end if
     do j = 1, this%n_scalars
        ptr_list(i)%v => this%s(j)
        i = i +1
@@ -154,14 +154,14 @@ subroutine fld_file_data_free(this)
     call this%p%free()
     call this%t%free()
     if (allocated(this%s)) then
-        do i = 1, this%n_scalars
-           call this%s(i)%free()
-        end do
+       do i = 1, this%n_scalars
+          call this%s(i)%free()
+       end do
     end if
     this%n_scalars = 0
     this%time = 0.0
-    this%glb_nelv = 0 
-    this%nelv = 0 
+    this%glb_nelv = 0
+    this%nelv = 0
     this%offset_el = 0
     this%lx = 0
     this%ly = 0
diff --git a/src/io/fld_file_output.f90 b/src/io/fld_file_output.f90
new file mode 100644
index 00000000000..4a43fe8fb03
--- /dev/null
+++ b/src/io/fld_file_output.f90
@@ -0,0 +1,107 @@
+! Copyright (c) 2020-2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements `fld_file_output_t`.
+module fld_file_output
+  use num_types, only : rp
+  use field_list, only : field_list_t
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use device, only : device_memcpy, DEVICE_TO_HOST
+  use output, only : output_t
+  implicit none
+  private
+
+  !> A simple output saving a list of fields to a .fld file.
+  type, public, extends(output_t) :: fld_file_output_t
+     ! The list of fields to save.
+     type(field_list_t) :: fields
+   contains
+     ! Constructor.
+     procedure, pass(this) :: init => fld_file_output_init
+     ! Writes the data.
+     procedure, pass(this) :: sample => fld_file_output_sample
+  end type fld_file_output_t
+
+contains
+
+  !> Constructor.
+  !! @param precision the precison of the reals in the file.
+  !! @param name The base name of the files.
+  !! @param name The number of field pointers to preallocate in the field list.
+  !! @param path Optional path to the write folder.
+  subroutine fld_file_output_init(this, precision, name, nfields, path)
+    class(fld_file_output_t), intent (inout) :: this
+    integer, intent(in) :: precision
+    character(len=*), intent(in) :: name
+    character(len=*), intent(in), optional :: path
+    integer, intent(in) :: nfields
+    character(len=1024) :: fname
+
+    if (present(path)) then
+       fname = trim(path) // trim(name) // '.fld'
+    else
+       fname = trim(name) // '.fld'
+    end if
+
+    call this%init_base(fname, precision)
+
+    if (allocated(this%fields%fields)) then
+       deallocate(this%fields%fields)
+    end if
+
+    allocate(this%fields%fields(nfields))
+
+   end subroutine fld_file_output_init
+
+  !> Writes the data.
+  !! @param t The time value.
+  subroutine fld_file_output_sample(this, t)
+    class(fld_file_output_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer :: i
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       associate(fields => this%fields%fields)
+         do i = 1, size(fields)
+            call device_memcpy(fields(i)%f%x, fields(i)%f%x_d, &
+                 fields(i)%f%dof%size(), DEVICE_TO_HOST, &
+                 sync=(i .eq. size(fields))) ! Sync on the last field
+         end do
+       end associate
+
+    end if
+
+    call this%file_%write(this%fields, t)
+
+  end subroutine fld_file_output_sample
+
+end module fld_file_output
diff --git a/src/io/fluid_output.f90 b/src/io/fluid_output.f90
index 8b130be5b0a..d08fc4bf8d5 100644
--- a/src/io/fluid_output.f90
+++ b/src/io/fluid_output.f90
@@ -32,12 +32,13 @@
 !
 !> Defines an output for a fluid
 module fluid_output
+  use num_types, only : rp
   use fluid_scheme, only : fluid_scheme_t
   use scalar_scheme, only : scalar_scheme_t
   use field_list, only : field_list_t
   use neko_config
   use device
-  use output
+  use output, only : output_t
   implicit none
 
   !> Fluid output
@@ -53,7 +54,8 @@ module fluid_output
 
 contains
 
-  function fluid_output_init(fluid, scalar, name, path) result(this)
+  function fluid_output_init(precision, fluid, scalar, name, path) result(this)
+    integer, intent(inout) :: precision
     class(fluid_scheme_t), intent(in), target :: fluid
     class(scalar_scheme_t), intent(in), optional, target :: scalar
     character(len=*), intent(in), optional :: name
@@ -67,11 +69,11 @@ function fluid_output_init(fluid, scalar, name, path) result(this)
        fname = trim(name) // '.fld'
     else if (present(path)) then
        fname = trim(path) // 'field.fld'
-    else       
+    else
        fname = 'field.fld'
     end if
 
-    call output_init(this, fname)
+    call this%init_base(fname, precision)
 
     if (allocated(this%fluid%fields)) then
        deallocate(this%fluid%fields)
@@ -91,7 +93,7 @@ function fluid_output_init(fluid, scalar, name, path) result(this)
     if (present(scalar)) then
        this%fluid%fields(5)%f => scalar%s
     end if
-    
+
   end function fluid_output_init
 
   !> Sample a fluid solution at time @a t
@@ -106,14 +108,14 @@ subroutine fluid_output_sample(this, t)
          do i = 1, size(fields)
             call device_memcpy(fields(i)%f%x, fields(i)%f%x_d, &
                  fields(i)%f%dof%size(), DEVICE_TO_HOST, &
-                 sync=(i .eq. size(fields)))
+                 sync=(i .eq. size(fields))) ! Sync on the last field
          end do
        end associate
 
     end if
-       
+
     call this%file_%write(this%fluid, t)
 
   end subroutine fluid_output_sample
-  
+
 end module fluid_output
diff --git a/src/io/fluid_stats_output.f90 b/src/io/fluid_stats_output.f90
index 222c4dc168d..46bf3fba3f8 100644
--- a/src/io/fluid_stats_output.f90
+++ b/src/io/fluid_stats_output.f90
@@ -52,7 +52,7 @@ module fluid_stats_output
   end interface fluid_stats_output_t
 
 contains
-  
+
   function fluid_stats_output_init(stats, T_begin, name, path) result(this)
     type(fluid_stats_t), intent(in), target :: stats
     real(kind=rp), intent(in) :: T_begin
@@ -71,7 +71,7 @@ function fluid_stats_output_init(stats, T_begin, name, path) result(this)
        fname = 'stats.fld'
     end if
 
-    call output_init(this, fname)
+    call this%init_base(fname)
     this%stats => stats
     this%T_begin = T_begin
   end function fluid_stats_output_init
@@ -82,21 +82,21 @@ subroutine fluid_stats_output_sample(this, t)
     real(kind=rp), intent(in) :: t
     integer :: i
     associate (out_fields => this%stats%stat_fields%fields)
-    if (t .ge. this%T_begin) then
-       call this%stats%make_strong_grad() 
-       if ( NEKO_BCKND_DEVICE .eq. 1) then
-          do i = 1, size(out_fields)
-             call device_memcpy(out_fields(i)%f%x, out_fields(i)%f%x_d,&
+      if (t .ge. this%T_begin) then
+         call this%stats%make_strong_grad()
+         if ( NEKO_BCKND_DEVICE .eq. 1) then
+            do i = 1, size(out_fields)
+               call device_memcpy(out_fields(i)%f%x, out_fields(i)%f%x_d,&
                   out_fields(i)%f%dof%size(), DEVICE_TO_HOST, &
-                  sync=(i .eq. size(out_fields)))
-          end do
-       end if
-       call this%file_%write(this%stats%stat_fields, t)
-       call this%stats%reset()
-    end if
+                  sync=(i .eq. size(out_fields))) ! Sync on last field
+            end do
+         end if
+         call this%file_%write(this%stats%stat_fields, t)
+         call this%stats%reset()
+      end if
     end associate
   end subroutine fluid_stats_output_sample
-  
+
 end module fluid_stats_output
 
 
diff --git a/src/io/format/map.f90 b/src/io/format/map.f90
index 973c6e9cab6..386b229a4a6 100644
--- a/src/io/format/map.f90
+++ b/src/io/format/map.f90
@@ -24,7 +24,7 @@ subroutine map_init_mesh(m, msh)
 
     m%nel = msh%nelv
     m%nlv = msh%npts
-    
+
     call map_init_common(m)
 
   end subroutine map_init_mesh
@@ -45,11 +45,11 @@ end subroutine map_init_nel_nelv
 
   subroutine map_init_common(m)
     type(map_t), intent(inout) :: m
-    
+
     allocate(m%imap(m%nel))
-    
+
     allocate(m%vertex(m%nlv, m%nel))
-    
+
   end subroutine map_init_common
 
   subroutine map_free(m)
@@ -63,5 +63,5 @@ subroutine map_free(m)
        deallocate(m%vertex)
     end if
   end subroutine map_free
-  
+
 end module map
diff --git a/src/io/format/nmsh.f90 b/src/io/format/nmsh.f90
index 068be507f1c..37e09bdd755 100644
--- a/src/io/format/nmsh.f90
+++ b/src/io/format/nmsh.f90
@@ -14,7 +14,7 @@ module nmsh
      integer :: v_idx                     !< Vertex id (global)
      real(kind=dp), dimension(3) :: v_xyz ! Vertex coordinates
   end type nmsh_vertex_t
-  
+
   !> Neko quad element data
   type, public, extends(nmsh_t) :: nmsh_quad_t
      type(nmsh_vertex_t), dimension(4) :: v !< List of vertices
@@ -29,19 +29,19 @@ module nmsh
   type, public :: nmsh_zone_t
      integer :: e                        !< Element id (global)
      integer :: f                        !< Facet number
-     integer :: p_e                      !< Perio dic connection (element)     
+     integer :: p_e                      !< Perio dic connection (element)
      integer :: p_f                      !< Periodic connection (facet)
      integer, dimension(4) :: glb_pt_ids !< Global point ids
      integer :: type                     !< Zone type
   end type nmsh_zone_t
-  
-   !> Neko curve data
+
+  !> Neko curve data
   type, public :: nmsh_curve_el_t
      integer :: e                                !< Element id (global)
      real(kind=dp), dimension(5,12) :: curve_data !< Save 6 values for each edge
      integer, dimension(12) :: type               !< type of curve for each edge
   end type nmsh_curve_el_t
-  
+
 
 end module nmsh
 
diff --git a/src/io/format/re2.f90 b/src/io/format/re2.f90
index 7aed51a8bd5..8bcce6151aa 100644
--- a/src/io/format/re2.f90
+++ b/src/io/format/re2.f90
@@ -15,14 +15,14 @@ module re2
   end type re2v1_t
 
   !> NEKTON re2 element data (3d) (version 1)
-  type, public, extends(re2v1_t) :: re2v1_xyz_t    
+  type, public, extends(re2v1_t) :: re2v1_xyz_t
      real(kind=sp), dimension(8) :: x
      real(kind=sp), dimension(8) :: y
      real(kind=sp), dimension(8) :: z
   end type re2v1_xyz_t
 
   !> NEKTON re2 element data (2d) (version 1)
-  type, public, extends(re2v1_t) :: re2v1_xy_t    
+  type, public, extends(re2v1_t) :: re2v1_xy_t
      real(kind=sp), dimension(4) :: x
      real(kind=sp), dimension(4) :: y
   end type re2v1_xy_t
@@ -34,7 +34,7 @@ module re2
      real(kind=sp), dimension(5) :: point
      character(len=4) :: type
   end type re2v1_curve_t
-  
+
   !> NEKTON re2 bc data (version 1)
   type, public :: re2v1_bc_t
      integer :: elem
@@ -49,14 +49,14 @@ module re2
   end type re2v2_t
 
   !> NEKTON re2 element data (3d) (version 2)
-  type, public, extends(re2v2_t) :: re2v2_xyz_t    
+  type, public, extends(re2v2_t) :: re2v2_xyz_t
      real(kind=dp), dimension(8) :: x
      real(kind=dp), dimension(8) :: y
      real(kind=dp), dimension(8) :: z
   end type re2v2_xyz_t
 
   !> NEKTON re2 element data (2d) (version 2)
-  type, public, extends(re2v1_t) :: re2v2_xy_t    
+  type, public, extends(re2v1_t) :: re2v2_xy_t
      real(kind=dp), dimension(4) :: x
      real(kind=dp), dimension(4) :: y
   end type re2v2_xy_t
@@ -68,7 +68,7 @@ module re2
      real(kind=dp), dimension(5) :: point
      character(len=8) :: type
   end type re2v2_curve_t
-  
+
   !> NEKTON re2 bc data (version 2)
   type, public :: re2v2_bc_t
      real(kind=dp) :: elem
diff --git a/src/io/format/rea.f90 b/src/io/format/rea.f90
index 28a6987be73..3aab159d0cd 100644
--- a/src/io/format/rea.f90
+++ b/src/io/format/rea.f90
@@ -16,17 +16,17 @@ module rea
      character(len=3), allocatable :: cbc(:,:)
   end type rea_t
 
-  
+
   public :: rea_free
 
 contains
-  
+
   !> Free a NEKTON session data
   subroutine rea_free(r)
     type(rea_t), intent(inout) :: r
 
     call r%msh%free()
-    
+
     if (allocated(r%params)) then
        deallocate(r%params)
     end if
diff --git a/src/io/format/stl.f90 b/src/io/format/stl.f90
index e7f63397a07..c709484dd62 100644
--- a/src/io/format/stl.f90
+++ b/src/io/format/stl.f90
@@ -9,14 +9,14 @@ module stl
      character(len=80) :: hdr
      integer :: ntri
   end type stl_hdr_t
-  
+
   !> Defines a STL triangle
   type, public :: stl_triangle_t
      real(kind=sp) :: n(3)
      real(kind=sp) :: v1(3)
      real(kind=sp) :: v2(3)
      real(kind=sp) :: v3(3)
-     integer(kind=i2) :: attrib     
+     integer(kind=i2) :: attrib
   end type stl_triangle_t
-  
+
 end module stl
diff --git a/src/io/generic_file.f90 b/src/io/generic_file.f90
index 374908048c4..da26a09581e 100644
--- a/src/io/generic_file.f90
+++ b/src/io/generic_file.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2019-2022, The Neko Authors
+! Copyright (c) 2019-2023, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -33,23 +33,27 @@
 module generic_file
   use num_types
   implicit none
-  
+
   !> A generic file handler.
   type, abstract :: generic_file_t
      character(len=1024) :: fname
      integer :: counter
      integer :: start_counter = 0
+     !> File format is serial
+     logical :: serial = .false.
    contains
      !> Generic file constructor.
      procedure :: init => generic_file_init
      !< Write method.
-     procedure(generic_file_write), deferred :: write 
+     procedure(generic_file_write), deferred :: write
      !< Read method.
-     procedure(generic_file_read), deferred :: read   
+     procedure(generic_file_read), deferred :: read
      !> Set the file counter to @a n.
      procedure :: set_counter => generic_file_set_counter
      !> Set the file start counter to @a n.
      procedure :: set_start_counter => generic_file_set_start_counter
+     !> Ensure the file exists
+     procedure :: check_exists => generic_file_check_exists
   end type generic_file_t
 
   abstract interface
@@ -61,7 +65,7 @@ subroutine generic_file_write(this, data, t)
        real(kind=rp), intent(in), optional :: t
      end subroutine generic_file_write
   end interface
-  
+
   abstract interface
      subroutine generic_file_read(this, data)
        import :: generic_file_t
@@ -71,31 +75,58 @@ end subroutine generic_file_read
   end interface
 
 contains
-  
+
   !> Generic file constructor.
   !! @param fname Filename.
   subroutine generic_file_init(this, fname)
     class(generic_file_t) :: this
     character(len=*) :: fname
-    
+
     this%fname = fname
     this%counter = 0
-    
+
   end subroutine generic_file_init
 
   !> Set the file counter to @a n.
   subroutine generic_file_set_counter(this, n)
     class(generic_file_t), intent(inout) :: this
     integer, intent(in) :: n
-    this%counter = n    
+    this%counter = n
   end subroutine generic_file_set_counter
 
   !> Set the file start counter to @a n.
   subroutine generic_file_set_start_counter(this, n)
     class(generic_file_t), intent(inout) :: this
     integer, intent(in) :: n
-    this%start_counter = n    
+    this%start_counter = n
   end subroutine generic_file_set_start_counter
 
+  !> check if the file exists
+  subroutine generic_file_check_exists(this)
+    use utils, only: neko_error
+    use comm, only: pe_rank, NEKO_COMM
+    use mpi_f08
+    implicit none
+
+    class(generic_file_t), intent(in) :: this
+    logical :: file_exists
+    integer :: neko_mpi_ierr
+
+    file_exists = .false.
+
+    if (pe_rank .eq. 0 .or. this%serial) then
+       ! Stop if the file does not exist
+       inquire(file=this%fname, exist=file_exists)
+    end if
+    if (.not. this%serial) then
+       call MPI_Bcast(file_exists, 1, MPI_LOGICAL, 0, NEKO_COMM, neko_mpi_ierr)
+    end if
+
+    if (.not. file_exists) then
+       call neko_error('File does not exist: '//trim(this%fname))
+    end if
+
+  end subroutine generic_file_check_exists
+
 
 end module generic_file
diff --git a/src/io/map_file.f90 b/src/io/map_file.f90
index 19f69128a87..8cdd8e1c120 100644
--- a/src/io/map_file.f90
+++ b/src/io/map_file.f90
@@ -55,7 +55,9 @@ subroutine map_file_read(this, data)
     class(*), target, intent(inout) :: data
     type(map_t), pointer :: nm
     integer :: j, k, neli, nnzi, ierr
-    
+
+    call this%check_exists()
+
     select type(data)
     type is (map_t)
        nm => data
@@ -71,13 +73,13 @@ subroutine map_file_read(this, data)
     read(10, *) neli, nnzi
 
     !> @todo Check if neli matches map%nel
-    
+
     do j = 1, nm%nel
        read(10, *) nm%imap(j),(nm%vertex(k, j), k=1,nm%nlv)
     end do
-    
+
     close(unit=10)
-    
+
   end subroutine map_file_read
 
   subroutine map_file_write(this, data, t)
@@ -86,5 +88,5 @@ subroutine map_file_write(this, data, t)
     real(kind=rp), intent(in), optional :: t
     call neko_error("Not implemented yet!")
   end subroutine map_file_write
-  
+
 end module map_file
diff --git a/src/io/mean_flow_output.f90 b/src/io/mean_flow_output.f90
index 97508241354..e3dd8f7f970 100644
--- a/src/io/mean_flow_output.f90
+++ b/src/io/mean_flow_output.f90
@@ -34,7 +34,7 @@
 module mean_flow_output
   use mean_flow, only : mean_flow_t
   use num_types
-  use device  
+  use device
   use output
   implicit none
   private
@@ -51,7 +51,7 @@ module mean_flow_output
   end interface mean_flow_output_t
 
 contains
-  
+
   function mean_flow_output_init(mf, T_begin, name, path) result(this)
     type(mean_flow_t), intent(in), target ::mf
     real(kind=rp), intent(in) :: T_begin
@@ -70,7 +70,7 @@ function mean_flow_output_init(mf, T_begin, name, path) result(this)
        fname = 'mean_field.fld'
     end if
 
-    call output_init(this, fname)
+    call this%init_base(fname)
     this%mf => mf
     this%T_begin = T_begin
   end function mean_flow_output_init
@@ -81,16 +81,20 @@ subroutine mean_flow_output_sample(this, t)
     real(kind=rp), intent(in) :: t
 
     if (t .ge. this%T_begin) then
-       call device_memcpy(this%mf%p%mf%x, this%mf%p%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST)
-       call device_memcpy(this%mf%u%mf%x, this%mf%u%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST)
-       call device_memcpy(this%mf%v%mf%x, this%mf%v%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST)
-       call device_memcpy(this%mf%w%mf%x, this%mf%w%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST, sync=.true.)
+       call device_memcpy(this%mf%p%mf%x, this%mf%p%mf%x_d, this%mf%p%mf%dof%size(), &
+                          DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(this%mf%u%mf%x, this%mf%u%mf%x_d, this%mf%p%mf%dof%size(), &
+                          DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(this%mf%v%mf%x, this%mf%v%mf%x_d, this%mf%p%mf%dof%size(), &
+                          DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(this%mf%w%mf%x, this%mf%w%mf%x_d, this%mf%p%mf%dof%size(), &
+                          DEVICE_TO_HOST, sync=.true.)
        call this%file_%write(this%mf, t)
        call this%mf%reset()
     end if
 
   end subroutine mean_flow_output_sample
-  
+
 end module mean_flow_output
 
 
diff --git a/src/io/mean_sqr_flow_output.f90 b/src/io/mean_sqr_flow_output.f90
index 8ea02626a95..04e9492529a 100644
--- a/src/io/mean_sqr_flow_output.f90
+++ b/src/io/mean_sqr_flow_output.f90
@@ -50,7 +50,7 @@ module mean_sqr_flow_output
   end interface mean_sqr_flow_output_t
 
 contains
-  
+
   function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this)
     type(mean_sqr_flow_t), intent(in), target ::msqrf
     real(kind=rp), intent(in) :: T_begin
@@ -58,7 +58,7 @@ function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this)
     character(len=*), intent(in), optional :: path
     type(mean_sqr_flow_output_t) :: this
     character(len=1024) :: fname
-    
+
     if (present(name) .and. present(path)) then
        fname = trim(path) // trim(name) // '.fld'
     else if (present(name)) then
@@ -69,7 +69,7 @@ function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this)
        fname = 'mean_sqr_field.fld'
     end if
 
-    call output_init(this, fname)
+    call this%init_base(fname)
     this%msqrf => msqrf
     this%T_begin = T_begin
   end function mean_sqr_flow_output_init
@@ -84,7 +84,7 @@ subroutine mean_sqr_flow_output_sample(this, t)
     end if
 
   end subroutine mean_sqr_flow_output_sample
-  
+
 end module mean_sqr_flow_output
 
 
diff --git a/src/io/nek_adios2.cpp b/src/io/nek_adios2.cpp
index 9c5d0b2c92d..451ec534494 100644
--- a/src/io/nek_adios2.cpp
+++ b/src/io/nek_adios2.cpp
@@ -3,358 +3,91 @@
 #include <iostream>
 #include <ctime>
 
+// Global Adios2 variables
 adios2::ADIOS adios;
-adios2::IO io;
-adios2::IO ior;
-adios2::IO io_head;
 adios2::IO io_asynchronous;
-adios2::Engine writer;
-adios2::Engine writer_head;
-adios2::Engine readr; 
 adios2::Engine writer_st;
-adios2::Variable<double> x;
-adios2::Variable<double> y;
-adios2::Variable<double> z;
-adios2::Variable<int> lglelw;
-adios2::Variable<int> lglelw_st;
-adios2::Variable<double> p;
-adios2::Variable<double> vx;
-adios2::Variable<double> vy;
-adios2::Variable<double> vz;
-adios2::Variable<double> bm1;
-adios2::Variable<double> p_st;
-adios2::Variable<double> vx_st;
-adios2::Variable<double> vy_st;
-adios2::Variable<double> vz_st;
-adios2::Variable<double> bm1_st;
-adios2::Variable<double> vxr;
-adios2::Variable<double> vyr;
-adios2::Variable<double> vzr;
-adios2::Variable<double> prr;
-adios2::Variable<int> lglelr;
-std::vector<double> vVXr;
-std::vector<double> vVYr;
-std::vector<double> vVZr;
-std::vector<double> vVPrr;
-std::vector<int> vVLGLELr;
-// adios2::Variable<double> t;
-double dataTime = 0.0;
-std::clock_t startT;
-std::clock_t elapsedT;
+adios2::Engine reader_st;
+adios2::Variable<double> f2py_field;
+adios2::Variable<double> py2f_field;
+// Global C variables
 int rank, size;
-int ifile;
-int ifilew;
-int ifstream;
-int decide_stream_global;
-
-
-extern "C" void adios2_setup_(
-    const int *nval,
-    const int *nelvin,
-    const int *nelb,
-    const int *nelgv,
-    const int *nelgt,
-    const double *xml,
-    const double *yml,
-    const double *zml,
-    const int *if_asynchronous,
+int reader_start;
+int reader_count;
+
+extern "C" void adios2_initialize_(
+    const int *lxyz,
+    const int *nelv,
+    const int *offset_el,
+    const int *glb_nelv,
+    const int *gdim,
     const int *comm_int
 ){
-    std::string configFile="adios2_config/config.xml";
     MPI_Comm comm = MPI_Comm_f2c(*comm_int);
-    adios = adios2::ADIOS(configFile, comm);
+    adios = adios2::ADIOS(comm);
     MPI_Comm_rank(comm, &rank);
     MPI_Comm_size(comm, &size);
-    // Compressor writer.
-    io = adios.DeclareIO("writer");
-    // Mesh writer.
-    io_head = adios.DeclareIO("writer0");
-    // Asynchronous writer.
-    io_asynchronous = adios.DeclareIO("writerISMPI");
-    // Compressor reader
-    ior = adios.DeclareIO("inputReader");
+    // Asynchronous IO.
+    io_asynchronous = adios.DeclareIO("streamIO");
+    io_asynchronous.SetEngine("SST");
 
-    // Determine if asyncrhonous operation will be needed for this set up
-    unsigned int decide_stream = static_cast<unsigned int>((*if_asynchronous));
-    decide_stream_global = decide_stream;
-    
     // Number of elements in my rank.
-    unsigned int nelv = static_cast<unsigned int>((*nelvin));
-
+    unsigned int nel = static_cast<unsigned int>((*nelv));
     // Determine where my rank writes in the global array according to number of element in previous ranks
-    unsigned int start = static_cast<unsigned int>(*nelb);
-    start *= static_cast<unsigned int>(*nval);
-
+    unsigned int start = static_cast<unsigned int>(*offset_el);
+    start *= static_cast<unsigned int>(*lxyz);
     // n is count, i.e number of entries in the array in my rank
-    unsigned int n = static_cast<unsigned int> (*nval) * nelv;
+    unsigned int n = static_cast<unsigned int> (*lxyz) * nel;
     // gn is the total size of the arrays, not per io rank 
-    unsigned int gn = static_cast<unsigned int>((*nelgv)*(*nval));
-    std::cout << rank << ": " << gn << ", " << start << "," << n << std::endl;
-
-    // Create the adios2 variables for writer that depend on the current start and n
-    p = io.DefineVariable<double>("P_OUT", {gn}, {start}, {n});
-    vx = io.DefineVariable<double>("VX_OUT", {gn}, {start}, {n});
-    vy = io.DefineVariable<double>("VY_OUT", {gn}, {start}, {n});
-    vz = io.DefineVariable<double>("VZ_OUT", {gn}, {start}, {n});
-    bm1 = io.DefineVariable<double>("BM1_OUT", {gn}, {start}, {n});
-
-    // Create the adios2 variables for writer0
-    x = io_head.DefineVariable<double>("X", {gn}, {start}, {n});
-    y = io_head.DefineVariable<double>("Y", {gn}, {start}, {n});
-    z = io_head.DefineVariable<double>("Z", {gn}, {start}, {n});
+    unsigned int gn = static_cast<unsigned int>((*glb_nelv)*(*lxyz));
+ 
+    // Assign to global variables
+    reader_start = start;
+    reader_count = n;
 
     // If the process is asynchronous, define the relevant variables for writer_st
-    if (decide_stream == 1){
-	    p_st = io_asynchronous.DefineVariable<double>("P", {gn}, {start}, {n});
-	    vx_st = io_asynchronous.DefineVariable<double>("VX", {gn}, {start}, {n});
-	    vy_st = io_asynchronous.DefineVariable<double>("VY", {gn}, {start}, {n});
-	    vz_st = io_asynchronous.DefineVariable<double>("VZ", {gn}, {start}, {n});
-	    bm1_st = io_asynchronous.DefineVariable<double>("BM1", {gn}, {start}, {n});
-    }
-
-
-    // Do everything again for the global indices 
-    nelv = static_cast<unsigned int>((*nelvin));
-    start = static_cast<unsigned int>(*nelb);
-    n = static_cast<unsigned int> (nelv);
-    gn = static_cast<unsigned int>((*nelgv));
-    // Define variable for compression writer
-    lglelw = io.DefineVariable<int>("LGLEL_OUT", {gn}, {start}, {n});
-    // Define variable for asyncrhonous writet
-    if (decide_stream == 1){
-    	lglelw_st = io_asynchronous.DefineVariable<int>("LGLEL", {gn}, {start}, {n});
-    }
-
-    // Write the mesh information only once (Currently commented out).
-    //writer_head = io_head.Open("geo.bp", adios2::Mode::Write);
-    //writer_head.Put<double>(x, xml);
-    //writer_head.Put<double>(y, yml);
-    //writer_head.Put<double>(z, yml);
-    //writer_head.Close();
-    //if(!rank)
-	//std::cout << "geo.bp written" << std::endl;
+    f2py_field = io_asynchronous.DefineVariable<double>("f2py_field", {gn}, {start}, {n});
     
     // If asyncrhonous execution, open the global array
-    if (decide_stream == 1){
-	std::cout << "create global array" << std::endl;
-    	writer_st = io_asynchronous.Open("globalArray", adios2::Mode::Write);
-    }
+    std::cout << "create global array" << std::endl;
+    writer_st = io_asynchronous.Open("globalArray_f2py", adios2::Mode::Write);
+    reader_st = io_asynchronous.Open("globalArray_py2f", adios2::Mode::Read);
 
-    // Initialize global variables for writing. This could be done in global definition    
-    ifile = 0 ;
-    ifilew = 0 ;
+    // Put necesary information in a header stream
+    writer_st.BeginStep();
+    adios2::Variable<int> hdr_elems = io_asynchronous.DefineVariable<int>("global_elements");
+    adios2::Variable<int> hdr_lxyz = io_asynchronous.DefineVariable<int>("points_per_element");
+    adios2::Variable<int> hdr_gdim = io_asynchronous.DefineVariable<int>("problem_dimension");
+    if( rank == 0 )
+    {
+       writer_st.Put(hdr_elems, static_cast<int> (*glb_nelv));
+       writer_st.Put(hdr_lxyz,  static_cast<int> (*lxyz));
+       writer_st.Put(hdr_gdim,  static_cast<int> (*gdim));
+    }
+    writer_st.EndStep();
 }
 
-extern "C" void adios2_update_(
-    const int *lglel,
-    const double *pr,
-    const double *u,
-    const double *v,
-    const double *w,
-    const double *temp
-){
-    startT = std::clock();
-    ifilew=ifilew+1;
-    std:: string fileName= "out.f0000"+ std::to_string(ifilew) +".bp";
-    writer = io.Open(fileName, adios2::Mode::Write);
-    // Begin a step of the writer
-    writer.BeginStep();
-    writer.Put<double>(p, pr);
-    writer.Put<double>(vx, u);
-    writer.Put<double>(vy, v);
-    writer.Put<double>(vz, w);
-    writer.Put<int>(lglelw, lglel);
-    writer.EndStep();
-    writer.Close();
-    dataTime += (std::clock() - startT) / (double) CLOCKS_PER_SEC;
+extern "C" void adios2_finalize_(){
+    std::cout << "Close global arrays" << std::endl;
+    writer_st.Close();
+    reader_st.Close();
+
 }
 
 extern "C" void adios2_stream_(
-    const int *lglel,
-    const double *pr,
-    const double *u,
-    const double *v,
-    const double *w,
-    const double *mass1,
-    const double *temp
+    const double *field
 ){
-    startT = std::clock();
-    // Begin a step of the writer
     writer_st.BeginStep();
-    writer_st.Put<double>(p_st, pr);
-    writer_st.Put<double>(vx_st, u);
-    writer_st.Put<double>(vy_st, v);
-    writer_st.Put<double>(vz_st, w);
-    writer_st.Put<double>(bm1_st, mass1);
-    writer_st.Put<int>(lglelw_st, lglel);
+    writer_st.Put<double>(f2py_field, field);
     writer_st.EndStep();
-    dataTime += (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-}
-extern "C" void adios2_read_(
-    int *lglelrr,
-    double *pr,
-    double *v,
-    double *u,
-    double *w,
-    const int *nval,
-    const int *nelvin,
-    const int *nelb,
-    const int *nelgv,
-    const int *nelgt,
-    const int *comm_int,
-    char *fname
-){
-    startT = std::clock();
-    std::string configFile="adios2_config/config.xml";
-    MPI_Comm comm = MPI_Comm_f2c(*comm_int);
-    MPI_Comm_rank(comm, &rank);
-    MPI_Comm_size(comm, &size);
-    
-    // See how much of the file my rank needs to read
-    unsigned int nelv = static_cast<unsigned int>((*nelvin));
-    unsigned int start = static_cast<unsigned int>(*nelb);
-    start *= static_cast<unsigned int>(*nval);
-    unsigned int n = static_cast<unsigned int> (*nval) * nelv;
-    unsigned int gn = static_cast<unsigned int>((*nelgv)*(*nval));
-
-    // See how much of the file my rank needs to read for the global indices
-    unsigned int nelv3 = static_cast<unsigned int>((*nelvin));
-    unsigned int start3 = static_cast<unsigned int>(*nelb);
-    unsigned int n3 = static_cast<unsigned int> (nelv);
-    unsigned int gn3 = static_cast<unsigned int>((*nelgv));
-   
-    // Write some debuffing info 
-    if (rank==0){
-    std::cout << "what adios is getting:"<< std::endl;
-    std::cout << "nvals:"<< static_cast<unsigned int> (*nval) << std::endl;
-    std::cout << "nelgv:"<< static_cast<unsigned int> (*nelgv) << std::endl;
-    std::cout << "nelgt:"<< static_cast<unsigned int> (*nelgt) << std::endl;
-
-    std::cout << "what adios is calculating:"<< std::endl;
-    std::cout << "nelv:"<< nelv << std::endl;
-    std::cout << "start:"<< start << std::endl;
-    std::cout << "count:"<< n << std::endl;
-    std::cout << "total number of entries:"<< gn << std::endl;
-    }
-    
-
-    // Read the file with factory name
-    ifile=ifile+1;
-    // Make a new name every time the reader is called
-    std:: string fileName= "out.f0000"+ std::to_string(ifile) +".bp";
-    startT = std::clock();
-    readr = ior.Open(fileName,adios2::Mode::Read); 
-
-    int step = 1;
-    bool firstStep = true;
-    std::cout << "CAREFULL, YOU ARE SAYING EVERY STEP IS THE FIRST, SO IT ALLOCATES EVERY TIME" << std::endl;
-
-    // Check if the variables that we are requesting are in the file
-        vxr = ior.InquireVariable<double>("VX_OUT");
-        if (!vxr)
-        {
-            std::cout << "Error: NO variable VX_OUT found. Unable to proceed. " << std::endl;
-        }
-
-        vyr = ior.InquireVariable<double>("VY_OUT");
-        if (!vyr)
-        {
-            std::cout << "Error: NO variable VY_OUT found. Unable to proceed. " << std::endl;
-        }
-
-        vzr = ior.InquireVariable<double>("VZ_OUT");
-        if (!vzr)
-        {
-            std::cout << "Error: NO variable VZ_OUT found. Unable to proceed. " << std::endl;
-        }
-
-        prr = ior.InquireVariable<double>("P_OUT");
-        if (!prr)
-        {
-            std::cout << "Error: NO variable P_OUT found. Unable to proceed. " << std::endl;
-        }
-        lglelr = ior.InquireVariable<int>("LGLEL_OUT");
-        if (!lglelr)
-        {
-            std::cout << "Error: NO variable LGLEL_OUT found. Unable to proceed. " << std::endl;
-        }
-
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " open and inquire: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
-        // Allocate variables if it is the first step
-        if (firstStep)
-        {
-                readr.LockReaderSelections();
-		vVXr.resize(n);
-		vVYr.resize(n);
-		vVZr.resize(n);
-		vVPrr.resize(n);
-		vVLGLELr.resize(n3);
-		firstStep=false;
-        }
-
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " resize vector: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
- 
-        // Select in the adios file the sections to read
-    	vxr.SetSelection({{start}, {n}});
-        vyr.SetSelection({{start}, {n}});
-        vzr.SetSelection({{start}, {n}});
-        prr.SetSelection({{start}, {n}});
-        lglelr.SetSelection({{start3}, {n3}});
-
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " set selection: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
- 
-        // get the data that has been selected
-        readr.Get<double>(vxr,vVXr.data());
-        readr.Get<double>(vyr,vVYr.data());
-        readr.Get<double>(vzr,vVZr.data());
-        readr.Get<double>(prr,vVPrr.data());
-        readr.Get<int>(lglelr,vVLGLELr.data());
-    
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " get data: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
-
-    // Close the reader
-    readr.Close();	
-    
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " Close: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
-
-    //copy the data from the buffer to the array that is used in nek/neko
-    if (rank==0){
-    std::cout <<"Copying data into nek vector " << std::endl;
-    }
-    
-    for (int i=0; i<vVZr.size(); i++){
-    pr[i]=vVPrr[i];
-    v[i]=vVXr[i];
-    u[i]=vVYr[i];
-    w[i]=vVZr[i];
-    }
-    for (int i=0; i<vVLGLELr.size(); i++){
-    lglelrr[i]=vVLGLELr[i];
-    }
-    
-    elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-    std::cout <<  "rank: " << rank << " copy into nek: " << elapsedT << "s." << std::endl;
-    startT = std::clock();
-
-    dataTime += (std::clock() - startT) / (double) CLOCKS_PER_SEC;
-
 }
 
-extern "C" void adios2_finalize_(){
-    if (decide_stream_global == 1){
-	std::cout << "Close global array" << std::endl;
-    	writer_st.Close();
-    	std::cout <<  "rank: " << rank << " in-situ time: " << dataTime << "s." << std::endl;
-    }
-
+extern "C" void adios2_recieve_(
+    double *field
+){
+    reader_st.BeginStep();
+    py2f_field = io_asynchronous.InquireVariable<double>("py2f_field");
+    py2f_field.SetSelection({{reader_start}, {reader_count}});
+    reader_st.Get<double>(py2f_field, field);
+    reader_st.EndStep();
 }
diff --git a/src/io/nmsh_file.f90 b/src/io/nmsh_file.f90
index cc83ba8a5e5..48e5f765f36 100644
--- a/src/io/nmsh_file.f90
+++ b/src/io/nmsh_file.f90
@@ -45,11 +45,11 @@ module nmsh_file
   use mpi_f08
   use logger
   implicit none
-  
+
   private
   !> Specifices the maximum number of elements any rank is allowed to write (for nmsh).
   !! Needed in order to generate large meshes where an individual write might exceed 2GB.
-  integer, parameter :: max_write_nel = 8000000 
+  integer, parameter :: max_write_nel = 8000000
   !> Interface for Neko nmsh files
   type, public, extends(generic_file_t) :: nmsh_file_t
    contains
@@ -62,7 +62,7 @@ module nmsh_file
   !> Load a mesh from a binary Neko nmsh file
   subroutine nmsh_file_read(this, data)
     class(nmsh_file_t) :: this
-    class(*), target, intent(inout) :: data    
+    class(*), target, intent(inout) :: data
     type(nmsh_hex_t), allocatable :: nmsh_hex(:)
     type(nmsh_quad_t), allocatable :: nmsh_quad(:)
     type(nmsh_zone_t), allocatable :: nmsh_zone(:)
@@ -75,10 +75,12 @@ subroutine nmsh_file_read(this, data)
     integer :: nmsh_quad_size, nmsh_hex_size, nmsh_zone_size
     integer :: nelv, gdim, nzones, ncurves
     integer :: el_idx
-    type(point_t) :: p(8)
+    type(point_t), target :: p(8)
     type(linear_dist_t) :: dist
     character(len=LOG_SIZE) :: log_buf
 
+    call this%check_exists()
+
     select type(data)
     type is(mesh_t)
        msh => data
@@ -96,155 +98,155 @@ subroutine nmsh_file_read(this, data)
 
     call MPI_File_open(NEKO_COMM, trim(this%fname), &
          MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
-      
+
     if (ierr > 0) then
        call neko_error('Could not open the mesh file ' // this%fname  // &
                        'for reading!')
-    end if 
+    end if
     call MPI_File_read_all(fh, nelv, 1, MPI_INTEGER, status, ierr)
     call MPI_File_read_all(fh, gdim, 1, MPI_INTEGER, status, ierr)
 
     write(log_buf,1) gdim, nelv
-1      format('gdim = ', i1, ', nelements =', i9)
+1   format('gdim = ', i1, ', nelements =', i9)
     call neko_log%message(log_buf)
 
     if (gdim .eq. 2) then
        call MPI_File_close(fh, ierr)
        call nmsh_file_read_2d(this, msh)
     else
-    
-    dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM)
-    nelv = dist%num_local()
-    element_offset = dist%start_idx()
-    
-    call msh%init(gdim, nelv)
-   
-    call neko_log%message('Reading elements')
 
-    if (msh%gdim .eq. 2) then
-       allocate(nmsh_quad(msh%nelv))
-       mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8)
-       call MPI_File_read_at_all(fh, mpi_offset, &
+       dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM)
+       nelv = dist%num_local()
+       element_offset = dist%start_idx()
+
+       call msh%init(gdim, nelv)
+
+       call neko_log%message('Reading elements')
+
+       if (msh%gdim .eq. 2) then
+          allocate(nmsh_quad(msh%nelv))
+          mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8)
+          call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_quad, msh%nelv, MPI_NMSH_QUAD, status, ierr)
-       do i = 1, nelv
-          do j = 1, 4
-             p(j) = point_t(nmsh_quad(i)%v(j)%v_xyz, nmsh_quad(i)%v(j)%v_idx)
+          do i = 1, nelv
+             do j = 1, 4
+                p(j) = point_t(nmsh_quad(i)%v(j)%v_xyz, nmsh_quad(i)%v(j)%v_idx)
+             end do
+             ! swap vertices to keep symmetric vertex numbering in neko
+             call msh%add_element(i, p(1), p(2), p(4), p(3))
           end do
-          ! swap vertices to keep symmetric vertex numbering in neko
-          call msh%add_element(i, p(1), p(2), p(4), p(3))
-       end do
-       deallocate(nmsh_quad)
-       mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_quad_size,i8)
-    else if (msh%gdim .eq. 3) then
-       allocate(nmsh_hex(msh%nelv))
-       mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_hex_size,i8)
-       call MPI_File_read_at_all(fh, mpi_offset, &
+          deallocate(nmsh_quad)
+          mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_quad_size,i8)
+       else if (msh%gdim .eq. 3) then
+          allocate(nmsh_hex(msh%nelv))
+          mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_hex_size,i8)
+          call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_hex, msh%nelv, MPI_NMSH_HEX, status, ierr)
-       do i = 1, nelv
-          do j = 1, 8
-             p(j) = point_t(nmsh_hex(i)%v(j)%v_xyz, nmsh_hex(i)%v(j)%v_idx)
-          end do
-          ! swap vertices to keep symmetric vertex numbering in neko
-          call msh%add_element(i, &
+          do i = 1, nelv
+             do j = 1, 8
+                p(j) = point_t(nmsh_hex(i)%v(j)%v_xyz, nmsh_hex(i)%v(j)%v_idx)
+             end do
+             ! swap vertices to keep symmetric vertex numbering in neko
+             call msh%add_element(i, &
                p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7))
-       end do
-       deallocate(nmsh_hex)
-       mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_hex_size,i8)
-    else        
-       if (pe_rank .eq. 0) call neko_error('Invalid dimension of mesh')
-    end if
-    call neko_log%message('Reading BC/zone data')
-
-    mpi_offset = mpi_el_offset
-    call MPI_File_read_at_all(fh, mpi_offset, &
-         nzones, 1, MPI_INTEGER, status, ierr)
-    if (nzones .gt. 0) then
-       allocate(nmsh_zone(nzones))
-
-       !>
-       !!@todo Fix the parallel reading in this part, let each rank read
-       !!a piece and pass the pieces around, filtering out matching zones
-       !!in the local mesh.
-       !!       
-       mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8)
+          end do
+          deallocate(nmsh_hex)
+          mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_hex_size,i8)
+       else
+          if (pe_rank .eq. 0) call neko_error('Invalid dimension of mesh')
+       end if
+       call neko_log%message('Reading BC/zone data')
+
+       mpi_offset = mpi_el_offset
        call MPI_File_read_at_all(fh, mpi_offset, &
+         nzones, 1, MPI_INTEGER, status, ierr)
+       if (nzones .gt. 0) then
+          allocate(nmsh_zone(nzones))
+
+          !>
+          !!@todo Fix the parallel reading in this part, let each rank read
+          !!a piece and pass the pieces around, filtering out matching zones
+          !!in the local mesh.
+          !!
+          mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8)
+          call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr)
-       
-       do i = 1, nzones
-          el_idx = nmsh_zone(i)%e
-          if (el_idx .gt. msh%offset_el .and. &
-               el_idx .le. msh%offset_el + msh%nelv) then             
-             el_idx = el_idx - msh%offset_el
-             select case(nmsh_zone(i)%type)
-             case(1)
-                call msh%mark_wall_facet(nmsh_zone(i)%f, el_idx)
-             case(2)
-                call msh%mark_inlet_facet(nmsh_zone(i)%f, el_idx)
-             case(3)
-                call msh%mark_outlet_facet(nmsh_zone(i)%f, el_idx)
-             case(4)
-                call msh%mark_sympln_facet(nmsh_zone(i)%f, el_idx)
-             case(5)
-                call msh%mark_periodic_facet(nmsh_zone(i)%f, el_idx, &
+
+          do i = 1, nzones
+             el_idx = nmsh_zone(i)%e
+             if (el_idx .gt. msh%offset_el .and. &
+               el_idx .le. msh%offset_el + msh%nelv) then
+                el_idx = el_idx - msh%offset_el
+                select case(nmsh_zone(i)%type)
+                case(1)
+                   call msh%mark_wall_facet(nmsh_zone(i)%f, el_idx)
+                case(2)
+                   call msh%mark_inlet_facet(nmsh_zone(i)%f, el_idx)
+                case(3)
+                   call msh%mark_outlet_facet(nmsh_zone(i)%f, el_idx)
+                case(4)
+                   call msh%mark_sympln_facet(nmsh_zone(i)%f, el_idx)
+                case(5)
+                   call msh%mark_periodic_facet(nmsh_zone(i)%f, el_idx, &
                      nmsh_zone(i)%p_f, nmsh_zone(i)%p_e, nmsh_zone(i)%glb_pt_ids)
-             case(6)
-                call msh%mark_outlet_normal_facet(nmsh_zone(i)%f, el_idx)
-             case(7)
-                call msh%mark_labeled_facet(nmsh_zone(i)%f, el_idx,nmsh_zone(i)%p_f)
-             end select
-          end if
-       end do
-       !Apply facets, important that marking is finished
-       do i = 1, nzones
-          el_idx = nmsh_zone(i)%e
-          if (el_idx .gt. msh%offset_el .and. &
-               el_idx .le. msh%offset_el + msh%nelv) then             
-             el_idx = el_idx - msh%offset_el
-             select case(nmsh_zone(i)%type)
-             case(5)
-                call msh%apply_periodic_facet(nmsh_zone(i)%f, el_idx, &
+                case(6)
+                   call msh%mark_outlet_normal_facet(nmsh_zone(i)%f, el_idx)
+                case(7)
+                   call msh%mark_labeled_facet(nmsh_zone(i)%f, el_idx,nmsh_zone(i)%p_f)
+                end select
+             end if
+          end do
+          !Apply facets, important that marking is finished
+          do i = 1, nzones
+             el_idx = nmsh_zone(i)%e
+             if (el_idx .gt. msh%offset_el .and. &
+               el_idx .le. msh%offset_el + msh%nelv) then
+                el_idx = el_idx - msh%offset_el
+                select case(nmsh_zone(i)%type)
+                case(5)
+                   call msh%apply_periodic_facet(nmsh_zone(i)%f, el_idx, &
                      nmsh_zone(i)%p_f, nmsh_zone(i)%p_e, nmsh_zone(i)%glb_pt_ids)
-             end select
-          end if
-       end do
+                end select
+             end if
+          end do
 
-       deallocate(nmsh_zone)
-    end if
-    call neko_log%message('Reading deformation data')
+          deallocate(nmsh_zone)
+       end if
+       call neko_log%message('Reading deformation data')
 
-    mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
-    call MPI_File_read_at_all(fh, mpi_offset, &
+       mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
+       call MPI_File_read_at_all(fh, mpi_offset, &
          ncurves, 1, MPI_INTEGER, status, ierr)
 
-    if (ncurves .gt. 0) then
-       
-       allocate(nmsh_curve(ncurves))
-       mpi_offset = mpi_el_offset + int(2 * MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
-       call MPI_File_read_at_all(fh, mpi_offset, &
+       if (ncurves .gt. 0) then
+
+          allocate(nmsh_curve(ncurves))
+          mpi_offset = mpi_el_offset + int(2 * MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
+          call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr)
-       
-       do i = 1, ncurves 
-          el_idx = nmsh_curve(i)%e - msh%offset_el
-          if (el_idx .gt. 0 .and. &
-              el_idx .le. msh%nelv) then             
-             call msh%mark_curve_element(el_idx, &
+
+          do i = 1, ncurves
+             el_idx = nmsh_curve(i)%e - msh%offset_el
+             if (el_idx .gt. 0 .and. &
+              el_idx .le. msh%nelv) then
+                call msh%mark_curve_element(el_idx, &
                   nmsh_curve(i)%curve_data, nmsh_curve(i)%type)
-          end if
-             
-       end do
-       
-       deallocate(nmsh_curve)
-    end if
+             end if
 
-    call MPI_File_close(fh, ierr)
-    call neko_log%message('Mesh read, setting up connectivity')
+          end do
 
-    call msh%finalize()
-    call neko_log%message('Done setting up mesh and connectivity')
-    
-    call neko_log%end_section()
+          deallocate(nmsh_curve)
+       end if
+
+       call MPI_File_close(fh, ierr)
+       call neko_log%message('Mesh read, setting up connectivity')
+
+       call msh%finalize()
+       call neko_log%message('Done setting up mesh and connectivity')
+
+       call neko_log%end_section()
     end if
-       
+
   end subroutine nmsh_file_read
 
   !> Load a mesh from a binary Neko nmsh file
@@ -280,16 +282,16 @@ subroutine nmsh_file_read_2d(this, msh)
     call MPI_File_read_all(fh, gdim, 1, MPI_INTEGER, status, ierr)
 
     write(log_buf,2) gdim
-2      format('gdim = ', i1, ', no full 2d support, creating thin slab')
+2   format('gdim = ', i1, ', no full 2d support, creating thin slab')
     call neko_log%message(log_buf)
     gdim = 3
-    
+
     dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM)
     nelv = dist%num_local()
     element_offset = dist%start_idx()
-    
+
     call msh%init(gdim, nelv)
-   
+
     allocate(nmsh_quad(msh%nelv))
     mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8)
     call MPI_File_read_at_all(fh, mpi_offset, &
@@ -323,15 +325,15 @@ subroutine nmsh_file_read_2d(this, msh)
        !!@todo Fix the parallel reading in this part, let each rank read
        !!a piece and pass the pieces around, filtering out matching zones
        !!in the local mesh.
-       !!       
+       !!
        mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8)
        call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr)
-       
+
        do i = 1, nzones
           el_idx = nmsh_zone(i)%e
           if (el_idx .gt. msh%offset_el .and. &
-               el_idx .le. msh%offset_el + msh%nelv) then             
+               el_idx .le. msh%offset_el + msh%nelv) then
              el_idx = el_idx - msh%offset_el
              select case(nmsh_zone(i)%type)
              case(1)
@@ -370,7 +372,7 @@ subroutine nmsh_file_read_2d(this, msh)
        do i = 1, nzones
           el_idx = nmsh_zone(i)%e
           if (el_idx .gt. msh%offset_el .and. &
-               el_idx .le. msh%offset_el + msh%nelv) then             
+               el_idx .le. msh%offset_el + msh%nelv) then
              el_idx = el_idx - msh%offset_el
              select case(nmsh_zone(i)%type)
              case(5)
@@ -396,7 +398,7 @@ subroutine nmsh_file_read_2d(this, msh)
           call msh%apply_periodic_facet(5, el_idx, &
                6, el_idx, glb_pt_ids%x)
        end do
-       
+
        deallocate(nmsh_zone)
     end if
 
@@ -406,32 +408,32 @@ subroutine nmsh_file_read_2d(this, msh)
          ncurves, 1, MPI_INTEGER, status, ierr)
 
     if (ncurves .gt. 0) then
-       
+
        allocate(nmsh_curve(ncurves))
        mpi_offset = mpi_el_offset + &
             int(2*MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
        call MPI_File_read_at_all(fh, mpi_offset, &
             nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr)
-       
-       do i = 1, ncurves 
+
+       do i = 1, ncurves
           el_idx = nmsh_curve(i)%e - msh%offset_el
           if (el_idx .gt. 0 .and. &
-              el_idx .le. msh%nelv) then             
+              el_idx .le. msh%nelv) then
              call msh%mark_curve_element(el_idx, &
                   nmsh_curve(i)%curve_data, nmsh_curve(i)%type)
           end if
-             
+
        end do
-       
+
        deallocate(nmsh_curve)
     end if
 
     call MPI_File_close(fh, ierr)
 
     call msh%finalize()
-    
+
     call neko_log%end_section()
-       
+
   end subroutine nmsh_file_read_2d
 
 
@@ -450,10 +452,10 @@ subroutine nmsh_file_write(this, data, t)
     integer (kind=MPI_OFFSET_KIND) :: mpi_offset, mpi_el_offset
     integer :: i, j, ierr, nelgv, element_offset, k
     integer :: nmsh_quad_size, nmsh_hex_size, nmsh_zone_size, nmsh_curve_size
-    integer :: nzones, ncurves 
+    integer :: nzones, ncurves
     class(element_t), pointer :: ep
     integer(i4),  dimension(8), parameter :: vcyc_to_sym = (/1, 2, 4, 3, 5, &
-         & 6, 8, 7/) ! cyclic to symmetric vertex mapping
+    & 6, 8, 7/) ! cyclic to symmetric vertex mapping
 
     select type(data)
     type is (mesh_t)
@@ -484,7 +486,7 @@ subroutine nmsh_file_write(this, data, t)
     call msh%reset_periodic_ids()
 
     if (msh%gdim .eq. 2) then
-       allocate(nmsh_quad(msh%nelv))       
+       allocate(nmsh_quad(msh%nelv))
        do i = 1, msh%nelv
           ep => msh%elements(i)%e
           nmsh_quad(i)%el_idx = ep%id()
@@ -499,7 +501,7 @@ subroutine nmsh_file_write(this, data, t)
        deallocate(nmsh_quad)
        mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(nelgv,i8) * int(nmsh_quad_size,i8)
     else if (msh%gdim .eq. 3) then
-       allocate(nmsh_hex(msh%nelv))       
+       allocate(nmsh_hex(msh%nelv))
        do i = 1, msh%nelv
           ep => msh%elements(i)%e
           nmsh_hex(i)%el_idx = ep%id()
@@ -516,14 +518,14 @@ subroutine nmsh_file_write(this, data, t)
           call MPI_File_write_at_all(fh, mpi_offset, &
                nmsh_HEX(i*max_write_nel+1), min(msh%nelv-i*max_write_nel,max_write_nel), MPI_NMSH_HEX, status, ierr)
        end do
-       deallocate(nmsh_hex)       
+       deallocate(nmsh_hex)
        mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(nelgv,i8) * int(nmsh_hex_size,i8)
-    else 
+    else
        call neko_error('Invalid dimension of mesh')
     end if
 
     nzones = msh%wall%size + msh%inlet%size + msh%outlet%size + &
-         msh%sympln%size + msh%periodic%size + msh%outlet_normal%size 
+         msh%sympln%size + msh%periodic%size + msh%outlet_normal%size
 
     do i = 1, NEKO_MSH_MAX_ZLBLS
        nzones = nzones + msh%labeled_zones(i)%size
@@ -534,9 +536,9 @@ subroutine nmsh_file_write(this, data, t)
 
     if (nzones .gt. 0) then
        allocate(nmsh_zone(nzones))
-       
+
        nmsh_zone(:)%type = 0
-       
+
        j = 1
        do i = 1, msh%wall%size
           nmsh_zone(j)%e = msh%wall%facet_el(i)%x(2) + msh%offset_el
@@ -590,16 +592,16 @@ subroutine nmsh_file_write(this, data, t)
              j = j + 1
           end do
        end do
-  
-       
+
+
        mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8)
        call MPI_File_write_at_all(fh, mpi_offset, &
             nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr)
-       
+
        deallocate(nmsh_zone)
     end if
- 
-    ncurves = msh%curve%size 
+
+    ncurves = msh%curve%size
     mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
 
     call MPI_File_write_at_all(fh, mpi_offset, &
@@ -610,7 +612,7 @@ subroutine nmsh_file_write(this, data, t)
        do i = 1, ncurves
           nmsh_curve(i)%type = 0
        end do
-       
+
        do i = 1, ncurves
           nmsh_curve(i)%e = msh%curve%curve_el(i)%el_idx + msh%offset_el
           nmsh_curve(i)%curve_data = msh%curve%curve_el(i)%curve_data
@@ -619,15 +621,15 @@ subroutine nmsh_file_write(this, data, t)
        mpi_offset = mpi_el_offset + int(2*MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8)
        call MPI_File_write_at_all(fh, mpi_offset, &
             nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr)
-       
+
        deallocate(nmsh_curve)
     end if
-   
+
     call MPI_File_sync(fh, ierr)
     call MPI_File_close(fh, ierr)
     call neko_log%message('Done')
 
   end subroutine nmsh_file_write
-  
+
 end module nmsh_file
-  
+
diff --git a/src/io/output.f90 b/src/io/output.f90
index a0284131cd8..41f8d94a671 100644
--- a/src/io/output.f90
+++ b/src/io/output.f90
@@ -35,12 +35,13 @@ module output
   use num_types, only : rp
   use file, only : file_t
   implicit none
+  private
 
   !> Abstract type defining an output type
   type, public, abstract :: output_t
      type(file_t) :: file_
    contains
-     procedure, pass(this) :: init => output_init
+     procedure, pass(this) :: init_base => output_init
      procedure, pass(this) :: set_counter => output_set_counter
      procedure, pass(this) :: set_start_counter => output_set_start_counter
      procedure(output_sample), pass(this), deferred :: sample
@@ -58,13 +59,20 @@ end subroutine output_sample
 
 contains
 
-  !> Output constructor
-  subroutine output_init(this, fname)
+  !> Output constructor.
+  !! @param fname Name of the output file.
+  !! @param precision Output precision (sp or dp).
+  subroutine output_init(this, fname, precision)
     class(output_t), intent(inout) :: this
     character(len=*), intent(inout) :: fname
+    integer, intent(in), optional :: precision
+
+    if (present(precision)) then
+       this%file_ = file_t(fname, precision=precision)
+    else
+       this%file_ = file_t(fname)
+    end if
 
-    this%file_ = file_t(fname)
-    
   end subroutine output_init
 
   !> Update the output's file counter
@@ -73,13 +81,13 @@ subroutine output_set_counter(this, n)
     integer, intent(in) :: n
     call this%file_%set_counter(n)
   end subroutine output_set_counter
- 
+
   !> Update the start of output's file counter
   subroutine output_set_start_counter(this, n)
     class(output_t), intent(inout) :: this
     integer, intent(in) :: n
     call this%file_%set_start_counter(n)
   end subroutine output_set_start_counter
-  
- 
+
+
 end module output
diff --git a/src/io/re2_file.f90 b/src/io/re2_file.f90
index 3f7933582f0..8a0077d904c 100644
--- a/src/io/re2_file.f90
+++ b/src/io/re2_file.f90
@@ -49,7 +49,7 @@ module re2_file
   use logger
   implicit none
   private
-  
+
 
   !> Interface for NEKTON re2 files
   type, public, extends(generic_file_t) :: re2_file_t
@@ -86,7 +86,9 @@ subroutine re2_file_read(this, data)
     integer :: re2_data_bc_size
     logical :: v2_format
     character(len=LOG_SIZE) :: log_buf
-    
+
+    call this%check_exists()
+
     select type(data)
     type is (mesh_t)
        msh => data
@@ -120,7 +122,7 @@ subroutine re2_file_read(this, data)
        call MPI_Type_size(MPI_RE2V1_DATA_XY, re2_data_xy_size, ierr)
        call MPI_Type_size(MPI_RE2V1_DATA_XYZ, re2_data_xyz_size, ierr)
        call MPI_Type_size(MPI_RE2V1_DATA_CV, re2_data_cv_size, ierr)
-       call MPI_Type_size(MPI_RE2V1_DATA_BC, re2_data_bc_size, ierr)    
+       call MPI_Type_size(MPI_RE2V1_DATA_BC, re2_data_bc_size, ierr)
     end if
 
     write(log_buf,1) ndim, nelv
@@ -141,7 +143,7 @@ subroutine re2_file_read(this, data)
 
     call MPI_File_open(NEKO_COMM, trim(this%fname), &
          MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
-    
+
     if (ierr .ne. 0) then
        call neko_log%error("Can't open binary NEKTON file ")
     end if
@@ -155,15 +157,15 @@ subroutine re2_file_read(this, data)
 
     call MPI_File_read_at_all(fh, mpi_offset, test, 1, MPI_REAL, status, ierr)
     mpi_offset = mpi_offset + MPI_REAL_SIZE
-    
+
     if (abs(RE2_ENDIAN_TEST - test) .gt. 1e-4) then
        call neko_error('Invalid endian of re2 file, byte swap not implemented yet')
     end if
 
     call re2_file_read_points(msh, ndim, nel, dist, fh, &
          mpi_offset, re2_data_xy_size, re2_data_xyz_size, v2_format)
-    
-          
+
+
     ! Set offset to start of curved side data
     mpi_offset = RE2_HDR_SIZE * MPI_CHARACTER_SIZE + MPI_REAL_SIZE
     if (ndim .eq. 2) then
@@ -185,7 +187,7 @@ subroutine re2_file_read(this, data)
        mpi_offset = mpi_offset + MPI_DOUBLE_PRECISION_SIZE
 
        call re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
-    else 
+    else
        call MPI_File_read_at_all(fh, mpi_offset, ncurv, 1, MPI_INTEGER, status, ierr)
        mpi_offset = mpi_offset + MPI_INTEGER_SIZE
        call re2_file_read_curve(msh, ncurv, dist, fh, mpi_offset, v2_format)
@@ -203,7 +205,7 @@ subroutine re2_file_read(this, data)
 
     call neko_log%message('Done')
 
-    
+
   end subroutine re2_file_read
 
   subroutine re2_file_write(this, data, t)
@@ -217,13 +219,13 @@ subroutine re2_file_write(this, data, t)
     character(len=54), parameter :: RE2_HDR_STR = 'RE2 exported by NEKO'
     integer :: i, j, ierr, nelgv
     type(MPI_Status) :: status
-    type(MPI_File) :: fh    
+    type(MPI_File) :: fh
     integer (kind=MPI_OFFSET_KIND) :: mpi_offset
     integer :: element_offset
     integer :: re2_data_xy_size
     integer :: re2_data_xyz_size
     character(len=LOG_SIZE) :: log_buf
-    
+
     select type(data)
     type is (mesh_t)
        msh => data
@@ -243,7 +245,7 @@ subroutine re2_file_write(this, data, t)
 
     if (pe_rank .eq. 0) then
        open(unit=9,file=trim(this%fname), status='new', iostat=ierr)
-              write(9, '(a5,i9,i3,i9,a54)') RE2_HDR_VER, nelgv, msh%gdim,&
+       write(9, '(a5,i9,i3,i9,a54)') RE2_HDR_VER, nelgv, msh%gdim,&
             nelgv, RE2_HDR_STR
        close(9)
     end if
@@ -252,7 +254,7 @@ subroutine re2_file_write(this, data, t)
     call MPI_File_open(NEKO_COMM, trim(this%fname), &
          MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr)
     mpi_offset = RE2_HDR_SIZE * MPI_CHARACTER_SIZE
-    
+
     call MPI_File_write_at(fh, mpi_offset, RE2_ENDIAN_TEST, 1, &
          MPI_REAL, status, ierr)
     mpi_offset = mpi_offset + MPI_REAL_SIZE
@@ -276,7 +278,7 @@ subroutine re2_file_write(this, data, t)
        allocate(re2_data_xyz(msh%nelv))
        do i = 1, msh%nelv
           re2_data_xyz(i)%rgroup = 1.0 ! Not used
-          do j = 1, 8 
+          do j = 1, 8
              re2_data_xyz(i)%x(j) = real(msh%elements(i)%e%pts(j)%p%x(1))
              re2_data_xyz(i)%y(j) = real(msh%elements(i)%e%pts(j)%p%x(2))
              re2_data_xyz(i)%z(j) = real(msh%elements(i)%e%pts(j)%p%x(3))
@@ -285,17 +287,17 @@ subroutine re2_file_write(this, data, t)
        mpi_offset = mpi_offset + element_offset * re2_data_xyz_size
        call MPI_File_write_at(fh, mpi_offset, &
             re2_data_xyz, msh%nelv, MPI_RE2V1_DATA_XYZ, status, ierr)
-       
+
        deallocate(re2_data_xyz)
     else
        call neko_error("Invalid dimension of mesh")
     end if
-        
+
     call MPI_FILE_close(fh, ierr)
     call neko_log%message('Done')
 
     !> @todo Add support for curved side data
-    
+
   end subroutine re2_file_write
 
   subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
@@ -320,20 +322,20 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
     integer :: pt_idx, nelv
     integer :: i, j, ierr
 
-    
+
     nelv = dist%num_local()
     element_offset = dist%start_idx()
 
     call htp%init(2*nel, ndim)
     pt_idx = 0
     if (ndim .eq. 2) then
-       mpi_offset = mpi_offset + element_offset * re2_data_xy_size          
+       mpi_offset = mpi_offset + element_offset * re2_data_xy_size
        if (.not. v2_format) then
           allocate(re2v1_data_xy(nelv))
           call MPI_File_read_at_all(fh, mpi_offset, &
                re2v1_data_xy, nelv, MPI_RE2V1_DATA_XY, status, ierr)
           do i = 1, nelv
-             do j = 1, 4             
+             do j = 1, 4
                 p(j) = point_t(real(re2v1_data_xy(i)%x(j),dp), &
                      real(re2v1_data_xy(i)%y(j),dp), 0.0d0)
                 call re2_file_add_point(htp, p(j), pt_idx)
@@ -350,7 +352,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
           call MPI_File_read_at_all(fh, mpi_offset, &
                re2v2_data_xy, nelv, MPI_RE2V2_DATA_XY, status, ierr)
           do i = 1, nelv
-             do j = 1, 4             
+             do j = 1, 4
                 p(j) = point_t(re2v2_data_xy(i)%x(j), &
                      re2v2_data_xy(i)%y(j), 0.0d0)
                 call re2_file_add_point(htp, p(j), pt_idx)
@@ -370,7 +372,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
           call MPI_File_read_at_all(fh, mpi_offset, &
                re2v1_data_xyz, nelv, MPI_RE2V1_DATA_XYZ, status, ierr)
           do i = 1, nelv
-             do j = 1, 8             
+             do j = 1, 8
                 p(j) = point_t(real(re2v1_data_xyz(i)%x(j),dp), &
                      real(re2v1_data_xyz(i)%y(j),dp),&
                      real(re2v1_data_xyz(i)%z(j),dp))
@@ -381,7 +383,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
              end if
              ! swap vertices to keep symmetric vertex numbering in neko
              call msh%add_element(i, &
-                  p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7))          
+                  p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7))
           end do
           deallocate(re2v1_data_xyz)
        else
@@ -389,7 +391,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
           call MPI_File_read_at_all(fh, mpi_offset, &
                re2v2_data_xyz, nelv, MPI_RE2V2_DATA_XYZ, status, ierr)
           do i = 1, nelv
-             do j = 1, 8             
+             do j = 1, 8
                 p(j) = point_t(re2v2_data_xyz(i)%x(j), &
                      re2v2_data_xyz(i)%y(j),&
                      re2v2_data_xyz(i)%z(j))
@@ -400,7 +402,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, &
              end if
              ! swap vertices to keep symmetric vertex numbering in neko
              call msh%add_element(i, &
-                  p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7))          
+                  p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7))
           end do
           deallocate(re2v2_data_xyz)
        end if
@@ -419,13 +421,13 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format)
     type(MPI_Status) :: status
     integer :: i, j, l, ierr, el_idx, id
     type(re2v1_curve_t), allocatable :: re2v1_data_curve(:)
-    type(re2v2_curve_t), allocatable :: re2v2_data_curve(:)    
+    type(re2v2_curve_t), allocatable :: re2v2_data_curve(:)
     real(kind=dp), allocatable :: curve_data(:,:,:)
     integer, allocatable :: curve_type(:,:)
     logical, allocatable :: curve_element(:)
     character(len=1) :: chtemp
     logical :: curve_skip = .false.
- 
+
     allocate(curve_data(5,12,msh%nelv))
     allocate(curve_element(msh%nelv))
     allocate(curve_type(12,msh%nelv))
@@ -438,7 +440,7 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format)
           end do
        end do
     end do
-    write(*,*) 'reading curved data' 
+    write(*,*) 'reading curved data'
     if (.not. v2_format) then
        allocate(re2v1_data_curve(ncurve))
        call MPI_File_read_at_all(fh, mpi_offset, re2v1_data_curve, ncurve, &
@@ -454,39 +456,39 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format)
           el_idx = re2v2_data_curve(i)%elem - dist%start_idx()
           id = re2v2_data_curve(i)%zone
           chtemp = re2v2_data_curve(i)%type
-          do j = 1, 5 
+          do j = 1, 5
              curve_data(j,id, el_idx) = re2v2_data_curve(i)%point(j)
           enddo
-       else 
+       else
           el_idx = re2v1_data_curve(i)%elem - dist%start_idx()
           id = re2v1_data_curve(i)%zone
           chtemp = re2v1_data_curve(i)%type
-          do j = 1, 5 
-             curve_data(j,id, el_idx) = real(re2v1_data_curve(i)%point(j),dp) 
+          do j = 1, 5
+             curve_data(j,id, el_idx) = real(re2v1_data_curve(i)%point(j),dp)
           enddo
        end if
-       
-       curve_element(el_idx) = .true. 
+
+       curve_element(el_idx) = .true.
        !This might need to be extended
        select case(trim(chtemp))
        case ('s')
-         curve_type(id,el_idx) = 1
-         call neko_log%warning('curve type s not supported, treating mesh as non-curved')
-         curve_skip = .true.
-         exit
+          curve_type(id,el_idx) = 1
+          call neko_log%warning('curve type s not supported, treating mesh as non-curved')
+          curve_skip = .true.
+          exit
        case ('e')
-         curve_type(id,el_idx) = 2
-         call neko_log%warning('curve type e not supported, treating mesh as non-curved')
-         curve_skip = .true.
-         exit
+          curve_type(id,el_idx) = 2
+          call neko_log%warning('curve type e not supported, treating mesh as non-curved')
+          curve_skip = .true.
+          exit
        case ('C')
-         curve_type(id,el_idx) = 3
+          curve_type(id,el_idx) = 3
        case ('m')
-         curve_type(id,el_idx) = 4
+          curve_type(id,el_idx) = 4
        case default
           write(*,*) chtemp, 'curve type not supported yet, treating mesh as non-curved',id, el_idx
 
-         curve_skip = .true.
+          curve_skip = .true.
        end select
     end do
 
@@ -501,8 +503,8 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format)
              call msh%mark_curve_element(el_idx, &
                   curve_data(1,1,el_idx), curve_type(1,el_idx))
           end if
-       end do 
-     end if
+       end do
+    end if
 
     deallocate(curve_data)
     deallocate(curve_element)
@@ -525,7 +527,7 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
     integer, parameter, dimension(6) :: facet_map = (/3, 2, 4, 1, 5, 6/)
     logical :: periodic
     type(re2v1_bc_t), allocatable :: re2v1_data_bc(:)
-    type(re2v2_bc_t), allocatable :: re2v2_data_bc(:)    
+    type(re2v2_bc_t), allocatable :: re2v2_data_bc(:)
 
     if (.not. v2_format) then
        allocate(re2v1_data_bc(nbcs))
@@ -538,7 +540,7 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
     end if
 
     periodic = .false.
-    
+
     !> @todo Use element offset in parallel
     if (v2_format) then ! V2 format
        do i = 1, nbcs
@@ -570,11 +572,11 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
              end if
              call msh%mark_labeled_facet(sym_facet, el_idx, label)
           case default
-              write (*,*) re2v2_data_bc(i)%type, 'bc type not supported yet'
+             write (*,*) re2v2_data_bc(i)%type, 'bc type not supported yet'
              write (*,*) re2v2_data_bc(i)%bc_data
           end select
        end do
-    
+
        !
        ! Fix periodic condition for shared nodes
        !
@@ -588,13 +590,13 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
                    p_el_idx = int(re2v2_data_bc(i)%bc_data(1))
                    p_facet = facet_map(int(re2v2_data_bc(i)%bc_data(2)))
                    call msh%create_periodic_ids(sym_facet, el_idx, &
-                        p_facet, p_el_idx) 
+                        p_facet, p_el_idx)
                 end select
              end do
           end do
        end if
        deallocate(re2v2_data_bc)
-          
+
     else ! V! format
        do i = 1, nbcs
           el_idx = re2v1_data_bc(i)%elem - dist%start_idx()
@@ -619,12 +621,12 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
              label = int(re2v1_data_bc(i)%bc_data(5))
              call msh%mark_labeled_facet(sym_facet, el_idx, label)
           case default
-              write (*,*) re2v1_data_bc(i)%type, 'bc type not supported yet'
+             write (*,*) re2v1_data_bc(i)%type, 'bc type not supported yet'
              write (*,*) re2v1_data_bc(i)%bc_data
 
           end select
        end do
-       
+
        !
        ! Fix periodic condition for shared nodes
        !
@@ -638,15 +640,15 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format)
                    p_el_idx = int(re2v1_data_bc(i)%bc_data(1))
                    p_facet = facet_map(int(re2v1_data_bc(i)%bc_data(2)))
                    call msh%create_periodic_ids(sym_facet, el_idx, &
-                        p_facet, p_el_idx) 
+                        p_facet, p_el_idx)
                 end select
              end do
           end do
        end if
-          
+
        deallocate(re2v1_data_bc)
     end if
-    
+
   end subroutine re2_file_read_bcs
 
 
@@ -655,7 +657,7 @@ subroutine re2_file_add_point(htp, p, idx)
     type(point_t), intent(inout) :: p
     integer, intent(inout) :: idx
     integer :: tmp
-    
+
     if (htp%get(p, tmp) .gt. 0) then
        idx = idx + 1
        call htp%set(p, idx)
@@ -663,7 +665,7 @@ subroutine re2_file_add_point(htp, p, idx)
     else
        call p%set_id(tmp)
     end if
-    
+
   end subroutine re2_file_add_point
-  
+
 end module re2_file
diff --git a/src/io/rea_file.f90 b/src/io/rea_file.f90
index 23f2c44cdf0..5c617345298 100644
--- a/src/io/rea_file.f90
+++ b/src/io/rea_file.f90
@@ -37,7 +37,7 @@ module rea_file
   use num_types
   use utils
   use mesh
-  use point 
+  use point
   use map
   use rea
   use re2_file
@@ -88,16 +88,18 @@ subroutine rea_file_read(this, data)
     integer, parameter, dimension(6) :: facet_map = (/3, 2, 4, 1, 5, 6/)
     logical :: curve_skip = .false.
     character(len=LOG_SIZE) :: log_buf
+    
+    call this%check_exists()
 
     select type(data)
     type is (rea_t)
-       call rea_free(data)       
+       call rea_free(data)
        msh => data%msh
        params => data%params
        cbc => data%cbc
        read_param = .true.
        read_bcs = .true.
-    type is (mesh_t)    
+    type is (mesh_t)
        msh => data
        read_param = .false.
        read_bcs = .false.
@@ -108,50 +110,50 @@ subroutine rea_file_read(this, data)
     if (read_param .and. read_bcs .and. pe_size .gt. 1) then
        call neko_error('Reading NEKTON session data only implemented in serial')
     end if
-          
-    
+
+
     open(unit=9,file=trim(this%fname), status='old', iostat=ierr)
     call neko_log%message('Reading NEKTON file ' // this%fname)
-    
+
     read(9, *)
     read(9, *)
     read(9, *) ndim
     read(9, *) nparam
-    
+
     if (.not. read_param) then
        ! Skip parameters
        do i = 1, nparam
           read(9, *)
        end do
-    else       
+    else
        allocate(params(nparam))
        do i = 1, nparam
           read(9, *) params(i)
        end do
     end if
-    
+
     ! Skip passive scalars
     read(9, *) nskip
     do i = 1, nskip
        read(9, *)
     end do
-    
+
     ! Skip logic switches
     read(9, *) nlogic
     do i = 1, nlogic
        read(9, *)
     end do
-    
+
     ! Read mesh info
     read(9, *)
     read(9, *)
     read(9, *) nelgs,ndim, nelgv
     if (nelgs .lt. 0) then
        re2_fname = trim(this%fname(1:scan(trim(this%fname), &
-            '.', back=.true.)))//'re2' 
+            '.', back=.true.)))//'re2'
        call re2_file%init(re2_fname)
        call re2_file%read(msh)
-    else       
+    else
        write(log_buf,1) ndim, nelgv
 1      format('gdim = ', i1, ', nelements =', i7)
        call neko_log%message(log_Buf)
@@ -214,8 +216,8 @@ subroutine rea_file_read(this, data)
        end do
 
        call htp%free()
-       
-       read(9, *) 
+
+       read(9, *)
        read(9, *) ncurve
        allocate(curve_data(5,8,nelgv))
        allocate(curve_element(nelgv))
@@ -230,20 +232,22 @@ subroutine rea_file_read(this, data)
           end do
        end do
        do i = 1, ncurve
-          read(9, *) edge, el_idx, (curve(j),j=1,5), chtemp       
+          read(9, *) edge, el_idx, (curve(j),j=1,5), chtemp
           do j = 1, 5
              curve_data(j,edge,el_idx) = curve(j)
           end do
-          curve_element(el_idx) = .true. 
+          curve_element(el_idx) = .true.
           select case(trim(chtemp))
           case ('s')
-            curve_type(edge,el_idx) = 1
-            curve_skip = .true.
+             curve_type(edge,el_idx) = 1
+             curve_skip = .true.
           case ('e')
-            curve_type(edge,el_idx) = 2
-            curve_skip = .true.
+             curve_type(edge,el_idx) = 2
+             curve_skip = .true.
           case ('C')
-            curve_type(edge,el_idx) = 3
+             curve_type(edge,el_idx) = 3
+          case ('m')
+             curve_type(edge,el_idx) = 4
           end select
        end do
        if (curve_skip) then
@@ -254,15 +258,15 @@ subroutine rea_file_read(this, data)
                 call msh%mark_curve_element(el_idx, &
                      curve_data(1,1,el_idx), curve_type(1,el_idx))
              end if
-          end do 
+          end do
        end if
        deallocate(curve_data)
        deallocate(curve_element)
        deallocate(curve_type)
 
        ! Read fluid boundary conditions
-       read(9,*) 
-       read(9,*) 
+       read(9,*)
+       read(9,*)
        if (.not. read_bcs) then ! Mark zones in the mesh
           allocate(cbc(6,nelgv))
           allocate(bc_data(6,2*ndim,nelgv))
@@ -306,7 +310,7 @@ subroutine rea_file_read(this, data)
                       p_el_idx = int(bc_data(2+off,j,i))
                       p_facet = facet_map(int(bc_data(3+off,j,i)))
                       call msh%create_periodic_ids(sym_facet, el_idx, &
-                                                   p_facet, p_el_idx) 
+                                                   p_facet, p_el_idx)
                    end select
                 end do
              end if
@@ -321,7 +325,7 @@ subroutine rea_file_read(this, data)
                       p_el_idx = int(bc_data(2+off,j,i))
                       p_facet = facet_map(int(bc_data(3+off,j,i)))
                       call msh%create_periodic_ids(sym_facet, el_idx, &
-                                                   p_facet, p_el_idx) 
+                                                   p_facet, p_el_idx)
                    end select
                 end do
              end if
@@ -336,7 +340,7 @@ subroutine rea_file_read(this, data)
                       p_el_idx = int(bc_data(2+off,j,i))
                       p_facet = facet_map(int(bc_data(3+off,j,i)))
                       call msh%create_periodic_ids(sym_facet, el_idx, &
-                                                   p_facet, p_el_idx) 
+                                                   p_facet, p_el_idx)
                    end select
                 end do
              end if
@@ -353,11 +357,11 @@ subroutine rea_file_read(this, data)
        end if
 
        call msh%finalize()
-       
+
        call neko_log%message('Done')
        close(9)
     endif
-    
+
   end subroutine rea_file_read
 
   subroutine rea_file_write(this, data, t)
@@ -371,7 +375,7 @@ subroutine rea_file_add_point(htp, p, idx)
     type(point_t), intent(inout) :: p
     integer, intent(inout) :: idx
     integer :: tmp
-    
+
     if (htp%get(p, tmp) .gt. 0) then
        idx = idx + 1
        call htp%set(p, idx)
@@ -379,7 +383,7 @@ subroutine rea_file_add_point(htp, p, idx)
     else
        call p%set_id(tmp)
     end if
-    
+
   end subroutine rea_file_add_point
 
 end module rea_file
diff --git a/src/io/stl_file.f90 b/src/io/stl_file.f90
index 519811dbdbf..c1d4d658d49 100644
--- a/src/io/stl_file.f90
+++ b/src/io/stl_file.f90
@@ -30,17 +30,18 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!>  Stereolithography (STL) file 
+!>  Stereolithography (STL) file
 module stl_file
   use num_types
   use generic_file
-  use tri_mesh  
+  use tri_mesh
   use logger
   use point
   use neko_mpi_types
   use mpi_f08
+  use utils, only: neko_error
   use comm
-  use stl    
+  use stl
   implicit none
   private
 
@@ -59,11 +60,11 @@ subroutine stl_file_write(this, data, t)
     real(kind=rp), intent(in), optional :: t
     call neko_log%error('Not implemented')
   end subroutine stl_file_write
-  
+
   subroutine stl_file_read(this, data)
     class(stl_file_t) :: this
     class(*), target, intent(inout) :: data
-    type(tri_mesh_t), pointer :: tri_msh => null()    
+    type(tri_mesh_t), pointer :: tri_msh => null()
     type(MPI_Status) :: status
     type(MPI_File) :: fh
     type(point_t) :: p(3)
@@ -71,6 +72,8 @@ subroutine stl_file_read(this, data)
     type(stl_triangle_t), allocatable :: stl_tri(:)
     integer :: i, p_idx, ierr
 
+    call this%check_exists()
+
     select type(data)
     type is(tri_mesh_t)
        tri_msh => data
@@ -85,7 +88,7 @@ subroutine stl_file_read(this, data)
     if (stl_hdr%hdr(1:6) .eq. 'solid') then
        call neko_log%error('Invalid STL file (ASCII)')
     end if
-    
+
     call tri_msh%init(stl_hdr%ntri)
     allocate(stl_tri(stl_hdr%ntri))
 
@@ -106,7 +109,7 @@ subroutine stl_file_read(this, data)
     deallocate(stl_tri)
 
     call MPI_File_close(fh, ierr)
-     
+
   end subroutine stl_file_read
-  
+
 end module stl_file
diff --git a/src/io/vtk_file.f90 b/src/io/vtk_file.f90
index 3f64cda7ace..baae5f41c5a 100644
--- a/src/io/vtk_file.f90
+++ b/src/io/vtk_file.f90
@@ -46,7 +46,7 @@ module vtk_file
   use comm
   implicit none
   private
-  
+
   !> Interface for legacy VTK files
   type, public, extends(generic_file_t) :: vtk_file_t
    contains
@@ -105,7 +105,7 @@ subroutine vtk_file_write(this, data, t)
     if (associated(msh)) then
        write(9, fmt='(A)') 'DATASET UNSTRUCTURED_GRID'
 
-       call vtk_file_write_mesh(9, msh)       
+       call vtk_file_write_mesh(9, msh)
 
        if (associated(mfld)) then
           call vtk_file_write_cell_data(9, mfld)
@@ -127,13 +127,15 @@ subroutine vtk_file_write(this, data, t)
     else
        call neko_error('Invalid data')
     end if
-    
+
     close(9)
   end subroutine vtk_file_write
 
   subroutine vtk_file_read(this, data)
     class(vtk_file_t) :: this
     class(*), target, intent(inout) :: data
+
+    call neko_error('VTK file read not implemented')
   end subroutine vtk_file_read
 
   !> Write a mesh in legacy VTK format
@@ -177,15 +179,15 @@ subroutine vtk_file_write_cell_data(unit, mfld)
     write(unit, fmt='(A,I8)') 'CELL_DATA', mfld%msh%nelv
     write(unit, fmt='(A,A,A,I8)') 'SCALARS ', trim(mfld%name), ' int', 1
     write(unit, fmt='(A)') 'LOOKUP_TABLE default'
-    
+
     do i = 1, mfld%msh%nelv
        write(unit, fmt='(I8)') mfld%data(i)
     end do
-    
+
   end subroutine vtk_file_write_cell_data
 
   !> Write a field @a fld as point data
-  !! @note High-order fields will be interpolated down 
+  !! @note High-order fields will be interpolated down
   !! to the low-order mesh
   subroutine vtk_file_write_point_data(unit, fld)
     integer :: unit
@@ -202,12 +204,12 @@ subroutine vtk_file_write_point_data(unit, fld)
     write(unit, fmt='(A,I8)') 'POINT_DATA', fld%msh%mpts
     write(unit, fmt='(A,A,A,I8)') 'SCALARS ', trim(fld%name), ' double', 1
     write(unit, fmt='(A)') 'LOOKUP_TABLE default'
-    
+
     lx = fld%Xh%lx
     ly = fld%Xh%ly
     lz = fld%Xh%lz
     allocate(point_data(fld%msh%mpts))
-    
+
     do i = 1, fld%msh%nelv
        do j = 1, fld%msh%npts
           id(j) = fld%msh%get_local(fld%msh%elements(i)%e%pts(j)%p)
@@ -216,21 +218,21 @@ subroutine vtk_file_write_point_data(unit, fld)
        point_data(id(1)) = real(fld%x(1,1,1,i),dp)
        point_data(id(2)) = real(fld%x(lx,1,1,i),dp)
        point_data(id(3)) = real(fld%x(1,ly,1,i),dp)
-       point_data(id(4)) = real(fld%x(lx,ly,1,i),dp)       
+       point_data(id(4)) = real(fld%x(lx,ly,1,i),dp)
 
        if (fld%msh%gdim .eq. 3) then
           point_data(id(5)) = real(fld%x(1,1,lz,i),dp)
           point_data(id(6)) = real(fld%x(lx,1,lz,i),dp)
           point_data(id(7)) = real(fld%x(1,ly,lz,i),dp)
-          point_data(id(8)) = real(fld%x(lx,ly,lz,i),dp)    
+          point_data(id(8)) = real(fld%x(lx,ly,lz,i),dp)
        end if
 
     end do
 
     write(unit, *) point_data
-    
-    deallocate(point_data)       
-    
+
+    deallocate(point_data)
+
   end subroutine vtk_file_write_point_data
 
   !> Write xyz-coordinates of a dofmap @a dm as points
@@ -258,30 +260,30 @@ subroutine vtk_file_write_dofmap_coordinates(unit, dm)
     do i = 1, size(dm%x)
        write(unit, fmt='(I8,I8)') 1,i-1
     end do
-    
-    
+
+
   end subroutine vtk_file_write_dofmap_coordinates
 
   !> Write a dofmap @a dm data as point data
-  subroutine vtk_file_write_dofmap_data(unit, dm)    
+  subroutine vtk_file_write_dofmap_data(unit, dm)
     integer :: unit
     type(dofmap_t), intent(inout) :: dm
     integer :: i, j, k, l
 
-     write(unit, fmt='(A,I8)') 'POINT_DATA', size(dm%dof)
-     write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'dof_id', ' integer', 1
-     write(unit, fmt='(A)') 'LOOKUP_TABLE default'
-
-     do i = 1, dm%msh%nelv
-        do l = 1, dm%Xh%lz
-           do k = 1, dm%Xh%ly
-              do j = 1, dm%Xh%lx
-                 write(unit, fmt='(I8)') real(dm%dof(j,k,l,i),dp)
-              end do
-           end do
-        end do
-     end do
-    
+    write(unit, fmt='(A,I8)') 'POINT_DATA', size(dm%dof)
+    write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'dof_id', ' integer', 1
+    write(unit, fmt='(A)') 'LOOKUP_TABLE default'
+
+    do i = 1, dm%msh%nelv
+       do l = 1, dm%Xh%lz
+          do k = 1, dm%Xh%ly
+             do j = 1, dm%Xh%lx
+                write(unit, fmt='(I8)') real(dm%dof(j,k,l,i),dp)
+             end do
+          end do
+       end do
+    end do
+
     write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'shared_dof', ' integer', 1
     write(unit, fmt='(A)') 'LOOKUP_TABLE default'
 
@@ -298,7 +300,7 @@ subroutine vtk_file_write_dofmap_data(unit, dm)
           end do
        end do
     end do
-    
+
   end subroutine vtk_file_write_dofmap_data
 
   !> Write a tetrahedral mesh in legacy VTK format
diff --git a/src/krylov/bcknd/cpu/bicgstab.f90 b/src/krylov/bcknd/cpu/bicgstab.f90
index ce6f691a557..a843d0bffb2 100644
--- a/src/krylov/bcknd/cpu/bicgstab.f90
+++ b/src/krylov/bcknd/cpu/bicgstab.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@ module bicgstab
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
   use math, only : glsc3, rzero, copy, NEKO_EPS, add2s2, x_update, &
-                   p_update
+                   p_update, abscmp
   use utils, only : neko_error
   implicit none
   private
@@ -56,24 +56,28 @@ module bicgstab
      real(kind=rp), allocatable :: t(:)
      real(kind=rp), allocatable :: v(:)
    contains
+     !> Constructor.
      procedure, pass(this) :: init => bicgstab_init
+     !> Destructor.
      procedure, pass(this) :: free => bicgstab_free
+     !> Solve the system.
      procedure, pass(this) :: solve => bicgstab_solve
   end type bicgstab_t
 
 contains
 
-  !> Initialise a standard BiCGSTAB solver
-  subroutine bicgstab_init(this, n, M, rel_tol, abs_tol)
+  !> Constructor.
+  subroutine bicgstab_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(bicgstab_t), intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
 
-        
+
     call this%free()
-    
+
     allocate(this%p(n))
     allocate(this%p_hat(n))
     allocate(this%r(n))
@@ -81,20 +85,20 @@ subroutine bicgstab_init(this, n, M, rel_tol, abs_tol)
     allocate(this%s_hat(n))
     allocate(this%t(n))
     allocate(this%v(n))
-    if (present(M)) then 
+    if (present(M)) then
        this%M => M
     end if
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine bicgstab_init
 
   !> Deallocate a standard BiCGSTAB solver
@@ -110,7 +114,7 @@ subroutine bicgstab_free(this)
     if (allocated(this%r)) then
        deallocate(this%r)
     end if
-    
+
     if (allocated(this%t)) then
        deallocate(this%t)
     end if
@@ -118,15 +122,15 @@ subroutine bicgstab_free(this)
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
- 
+
     if (allocated(this%p_hat)) then
        deallocate(this%p_hat)
     end if
-    
+
     if (allocated(this%s)) then
        deallocate(this%s)
     end if
- 
+
     if (allocated(this%s_hat)) then
        deallocate(this%s_hat)
     end if
@@ -135,7 +139,7 @@ subroutine bicgstab_free(this)
 
 
   end subroutine bicgstab_free
-  
+
   !> Bi-Conjugate Gradient Stabilized method solve
   function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(bicgstab_t), intent(inout) :: this
@@ -151,17 +155,17 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
     integer :: iter, max_iter
     real(kind=rp) :: rnorm, rtr, norm_fac, gamma
     real(kind=rp) :: beta, alpha, omega, rho_1, rho_2
-    
+
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = 1.0_rp / sqrt(coef%volume)
 
     associate(r => this%r, t => this%t, s => this%s, v => this%v, p => this%p, &
          s_hat => this%s_hat, p_hat => this%p_hat)
-    
+
       call rzero(x%x, n)
       call copy(r, f, n)
 
@@ -171,22 +175,22 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
       ksp_results%res_start = rnorm
       ksp_results%res_final = rnorm
       ksp_results%iter = 0
-      if(rnorm .eq. 0.0_rp) return
+      if(abscmp(rnorm, 0.0_rp)) return
       do iter = 1, max_iter
-         
+
          rho_1 = glsc3(r, coef%mult, f ,n)
-         
+
          if (abs(rho_1) .lt. NEKO_EPS) then
             call neko_error('Bi-CGStab rho failure')
          end if
-   
+
          if (iter .eq. 1) then
-            call copy(p, r, n) 
+            call copy(p, r, n)
          else
             beta = (rho_1 / rho_2) * (alpha / omega)
             call p_update(p, r, v, beta, omega, n)
          end if
-       
+
          call this%M%solve(p_hat, p, n)
          call Ax%compute(v, p_hat, coef, x%msh, x%Xh)
          call gs_h%op(v, n, GS_OP_ADD)
@@ -200,7 +204,7 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
             call add2s2(x%x, p_hat, alpha,n)
             exit
          end if
-       
+
          call this%M%solve(s_hat, s, n)
          call Ax%compute(t, s_hat, coef, x%msh, x%Xh)
          call gs_h%op(t, n, GS_OP_ADD)
@@ -210,18 +214,18 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
          call x_update(x%x, p_hat, s_hat, alpha, omega, n)
          call copy(r, s, n)
          call add2s2(r, t, -omega, n)
-      
+
          rtr = glsc3(r, coef%mult, r, n)
          rnorm = sqrt(rtr) * norm_fac
          if (rnorm .lt. this%abs_tol .or. rnorm .lt. gamma) then
             exit
          end if
-    
+
          if (omega .lt. NEKO_EPS) then
             call neko_error('Bi-CGstab omega failure')
          end if
          rho_2 = rho_1
-    
+
       end do
     end associate
     ksp_results%res_final = rnorm
@@ -229,5 +233,5 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
   end function bicgstab_solve
 
 end module bicgstab
-  
+
 
diff --git a/src/krylov/bcknd/cpu/cacg.f90 b/src/krylov/bcknd/cpu/cacg.f90
index c1ba3ae99ee..2f15d501626 100644
--- a/src/krylov/bcknd/cpu/cacg.f90
+++ b/src/krylov/bcknd/cpu/cacg.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,13 +40,13 @@ module cacg
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply, bc_list_apply_scalar
-  use math, only : glsc3, rzero, copy, x_update
+  use math, only : glsc3, rzero, copy, x_update, abscmp
   use utils, only : neko_warning
   use comm
   use mxm_wrapper
   implicit none
   private
-  
+
   !> S-step communication avoiding  preconditioned conjugate gradient method
   type, public, extends(ksp_t) :: cacg_t
      real(kind=rp), allocatable :: r(:)
@@ -62,10 +62,11 @@ module cacg
 contains
 
   !> Initialise a s-step CA  PCG solver
-  subroutine cacg_init(this, n, M, s, rel_tol, abs_tol)
+  subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol)
     class(cacg_t), intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
     integer, optional, intent(inout) :: s
@@ -73,30 +74,30 @@ subroutine cacg_init(this, n, M, s, rel_tol, abs_tol)
 
     if (present(s)) then
        this%s = s
-    else 
+    else
        this%s = 4
     end if
     if (pe_rank .eq. 0) then
        call neko_warning("Communication Avoiding CG chosen, be aware of potential instabilities")
     end if
-    
+
     allocate(this%r(n))
     allocate(this%p(n))
     allocate(this%PR(n,4*this%s+1))
-    if (present(M)) then 
+    if (present(M)) then
        this%M => M
     end if
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine cacg_init
 
   !> Deallocate a s-step CA PCG solver
@@ -116,12 +117,12 @@ subroutine cacg_free(this)
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
-    
+
     nullify(this%M)
 
 
   end subroutine cacg_free
-  
+
   !> S-step CA PCG solve
   function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(cacg_t), intent(inout) :: this
@@ -142,34 +143,34 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
     real(kind=rp) :: r_c(4*this%s+1,this%s+1)
     real(kind=rp) :: z_c(4*this%s+1,this%s+1)
     real(kind=rp) :: x_c(4*this%s+1,this%s+1)
-    
+
     associate(PR => this%PR, r => this%r, p => this%p)
       s = this%s
       if (present(niter)) then
          max_iter = niter
       else
-         max_iter = KSP_MAX_ITER
+         max_iter = this%max_iter
       end if
       norm_fac = 1.0_rp / sqrt(coef%volume)
-      
+
       rtz1 = 1.0_rp
       call rzero(x%x, n)
       call copy(r, f, n)
       call this%M%solve(p, r, n)
-      
+
       rtr = glsc3(r, coef%mult, r, n)
       rnorm = sqrt(rtr)*norm_fac
       ksp_results%res_start = rnorm
       ksp_results%res_final = rnorm
       ksp_results%iter = 0
       iter = 0
-      if(rnorm .eq. 0.0_rp) return
+      if(abscmp(rnorm, 0.0_rp)) return
       do while (iter < max_iter)
 
          call copy(PR,p, n)
          call copy(PR(1,2*s+2), r, n)
 
-         !Here we have hardcoded a monomial basis atm. 
+         !Here we have hardcoded a monomial basis atm.
          do i = 2, 2*s + 1
             if (mod(i,2) .eq. 0) then
                call Ax%compute(PR(1,i), PR(1,i-1), coef, x%msh, x%Xh)
@@ -192,7 +193,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
 
          call construct_basis_matrix(Tt, s)
          call rzero(p_c, (4*s+1) * (s+1))
-         p_c(1,1) = 1.0_rp 
+         p_c(1,1) = 1.0_rp
          call rzero(r_c, (4*s+1) * (s+1))
          r_c(2*s+2,1) = 1.0_rp
          call mxm(Tt, 4*s+1, r_c, 4*s+1, z_c,s+1)
@@ -234,12 +235,12 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
                G(k,j) = temp2(it,1)
             end do
          end do
-         
+
          call mxm(G,4*s+1, Tt, 4*s+1,GTt,4*s+1)
-         
+
          do j = 1, s
             iter = iter + 1
-          
+
             call mxm(G, 4*s+1, r_c(1,j), 4*s+1,temp, 1)
             call mxm(GTt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1)
             alpha1 = 0.0_rp
@@ -291,7 +292,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
                do k = 1, NEKO_BLK_SIZE
                   rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1)
                end do
-            else 
+            else
                do j = 1,4*s+1
                   do k = 1, n-i
                      x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + PR(i+k,j) * x_c(j,s+1)
@@ -309,7 +310,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
          call MPI_Allreduce(rtr, tmp, 1, &
               MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
          rnorm = norm_fac*sqrt(tmp)
-         if( rnorm <= this%abs_tol) exit 
+         if( rnorm <= this%abs_tol) exit
       end do
 
       ksp_results%res_final = rnorm
@@ -321,19 +322,19 @@ end function cacg_solve
 
   !> Monomial matrix constuction, not sparse
   subroutine construct_basis_matrix(Tt, s)
-     integer, intent(in) :: s
-     real(kind=rp), intent(inout) :: Tt(4*s+1,4*s+1)
-     integer :: mlen, i
-     mlen = (4*s+1)*(4*s+1)
-     call rzero(Tt,mlen)
-     do i = 1, 2*s 
-        Tt(i+1,i) = 1.0_rp
-     end do
-     do i = 1, (2*s-1)
-        Tt(2*s+2+i,2*s+1+i) = 1.0_rp
-     end do
+    integer, intent(in) :: s
+    real(kind=rp), intent(inout) :: Tt(4*s+1,4*s+1)
+    integer :: mlen, i
+    mlen = (4*s+1)*(4*s+1)
+    call rzero(Tt,mlen)
+    do i = 1, 2*s
+       Tt(i+1,i) = 1.0_rp
+    end do
+    do i = 1, (2*s-1)
+       Tt(2*s+2+i,2*s+1+i) = 1.0_rp
+    end do
   end subroutine construct_basis_matrix
 
 end module cacg
-  
+
 
diff --git a/src/krylov/bcknd/cpu/cg.f90 b/src/krylov/bcknd/cpu/cg.f90
index 4461a4e76ef..5d74ce50074 100644
--- a/src/krylov/bcknd/cpu/cg.f90
+++ b/src/krylov/bcknd/cpu/cg.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2021, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,13 +40,13 @@ module cg
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, copy
+  use math, only : glsc3, rzero, copy, abscmp
   use comm
   implicit none
   private
 
   integer, parameter :: CG_P_SPACE = 7
-  
+
   !> Standard preconditioned conjugate gradient method
   type, public, extends(ksp_t) :: cg_t
      real(kind=rp), allocatable :: w(:)
@@ -63,13 +63,14 @@ module cg
 contains
 
   !> Initialise a standard PCG solver
-  subroutine cg_init(this, n, M, rel_tol, abs_tol)
+  subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(cg_t), intent(inout), target :: this
+    integer, intent(in) :: max_iter
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
-        
+
     call this%free()
 
     allocate(this%w(n))
@@ -77,21 +78,21 @@ subroutine cg_init(this, n, M, rel_tol, abs_tol)
     allocate(this%p(n,CG_P_SPACE))
     allocate(this%z(n))
     allocate(this%alpha(CG_P_SPACE))
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine cg_init
 
   !> Deallocate a standard PCG solver
@@ -111,11 +112,11 @@ subroutine cg_free(this)
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
-    
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
-    
+
     if (allocated(this%alpha)) then
        deallocate(this%alpha)
     end if
@@ -123,7 +124,7 @@ subroutine cg_free(this)
     nullify(this%M)
 
   end subroutine cg_free
-  
+
   !> Standard PCG solve
   function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(cg_t), intent(inout) :: this
@@ -139,11 +140,11 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results
     integer :: iter, max_iter, i, j, k, p_cur, p_prev
     real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(NEKO_BLK_SIZE)
     real(kind=rp) :: beta, pap, norm_fac
-    
+
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = 1.0_rp / sqrt(coef%volume)
 
@@ -162,24 +163,24 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results
       ksp_results%iter = 0
       p_prev = CG_P_SPACE
       p_cur = 1
-      if(rnorm .eq. 0.0_rp) return
+      if(abscmp(rnorm, 0.0_rp)) return
       do iter = 1, max_iter
          call this%M%solve(z, r, n)
          rtz2 = rtz1
          rtz1 = glsc3(r, coef%mult, z, n)
-      
+
          beta = rtz1 / rtz2
          if (iter .eq. 1) beta = 0.0_rp
          do i = 1, n
             p(i,p_cur) = z(i) + beta * p(i,p_prev)
          end do
-       
+
          call Ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh)
          call gs_h%op(w, n, GS_OP_ADD)
          call bc_list_apply(blst, w, n)
-         
+
          pap = glsc3(w, coef%mult, p(1,p_cur), n)
-         
+
          alpha(p_cur) = rtz1 / pap
          call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n)
          rnorm = sqrt(rtr) * norm_fac
@@ -199,7 +200,7 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results
                   do k = 1, NEKO_BLK_SIZE
                      x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
                   end do
-               else 
+               else
                   do k = 1, n-i
                      x_plus(1) = 0.0_rp
                      do j = 1, p_cur
@@ -227,7 +228,7 @@ end function cg_solve
   subroutine second_cg_part(rtr, r, mult, w, alpha, n)
     integer, intent(in) :: n
     real(kind=rp), intent(inout) :: r(n), rtr
-    real(kind=rp), intent(in) ::mult(n), w(n), alpha 
+    real(kind=rp), intent(in) ::mult(n), w(n), alpha
     integer :: i, ierr
 
     rtr = 0.0_rp
@@ -238,8 +239,8 @@ subroutine second_cg_part(rtr, r, mult, w, alpha, n)
     call MPI_Allreduce(MPI_IN_PLACE, rtr, 1, &
          MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
 
-  end subroutine second_cg_part 
+  end subroutine second_cg_part
 
 end module cg
-  
+
 
diff --git a/src/krylov/bcknd/cpu/gmres.f90 b/src/krylov/bcknd/cpu/gmres.f90
index 98bf9f24cc0..0832eabfbb7 100644
--- a/src/krylov/bcknd/cpu/gmres.f90
+++ b/src/krylov/bcknd/cpu/gmres.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2021, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,12 +40,12 @@ module gmres
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, rone, copy, sub2, cmult2
+  use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp
   use comm
   implicit none
   private
 
-  !> Standard preconditioned generalized minimal residual method 
+  !> Standard preconditioned generalized minimal residual method
   type, public, extends(ksp_t) :: gmres_t
      integer :: lgmres
      real(kind=rp), allocatable :: w(:)
@@ -66,9 +66,10 @@ module gmres
 contains
 
   !> Initialise a standard GMRES solver
-  subroutine gmres_init(this, n, M, lgmres, rel_tol, abs_tol)
+  subroutine gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol)
     class(gmres_t), intent(inout) :: this
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     class(pc_t), optional, intent(inout), target :: M
     integer, optional, intent(inout) :: lgmres
     real(kind=rp), optional, intent(inout) :: rel_tol
@@ -79,38 +80,38 @@ subroutine gmres_init(this, n, M, lgmres, rel_tol, abs_tol)
     else
        this%lgmres = 30
     end if
-    
+
 
     call this%free()
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
     allocate(this%w(n))
     allocate(this%r(n))
     allocate(this%wk1(n))
-    
+
     allocate(this%c(this%lgmres))
     allocate(this%s(this%lgmres))
     allocate(this%gam(this%lgmres + 1))
-    
+
     allocate(this%z(n,this%lgmres))
     allocate(this%v(n,this%lgmres))
-    
+
     allocate(this%h(this%lgmres,this%lgmres))
-    
-       
+
+
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init(abs_tol)
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine gmres_init
 
   !> Deallocate a standard GMRES solver
@@ -130,7 +131,7 @@ subroutine gmres_free(this)
     if (allocated(this%r)) then
        deallocate(this%r)
     end if
- 
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
@@ -138,28 +139,28 @@ subroutine gmres_free(this)
     if (allocated(this%h)) then
        deallocate(this%h)
     end if
-    
+
     if (allocated(this%v)) then
        deallocate(this%v)
     end if
-    
+
     if (allocated(this%s)) then
        deallocate(this%s)
     end if
-    
-    
+
+
     if (allocated(this%gam)) then
        deallocate(this%gam)
     end if
-    
+
     if (allocated(this%wk1)) then
        deallocate(this%wk1)
     end if
-    
+
     nullify(this%M)
-    
+
   end subroutine gmres_free
- 
+
   !> Standard GMRES solve
   function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(gmres_t), intent(inout) :: this
@@ -172,8 +173,8 @@ function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
     type(gs_t), intent(inout) :: gs_h
     type(ksp_monitor_t) :: ksp_results
     integer, optional, intent(in) :: niter
-    integer :: iter 
-    integer :: i, j, k, l, ierr 
+    integer :: iter, max_iter
+    integer :: i, j, k, l, ierr
     real(kind=rp) :: w_plus(NEKO_BLK_SIZE), x_plus(NEKO_BLK_SIZE)
     real(kind=rp) :: rnorm, alpha, temp, lr, alpha2, norm_fac
     logical :: conv
@@ -181,179 +182,185 @@ function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
     conv = .false.
     iter = 0
 
-     associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
+    if (present(niter)) then
+       max_iter = niter
+    else
+       max_iter = this%max_iter
+    end if
+
+    associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
           v => this%v, s => this%s, gam => this%gam, wk1 =>this%wk1)
 
-       norm_fac = 1.0_rp / sqrt(coef%volume)
-       call rzero(x%x, n)
-       call rzero(gam, this%lgmres + 1)
-       call rone(s, this%lgmres)
-       call rone(c, this%lgmres)
-       call rzero(h, this%lgmres * this%lgmres)
-       do while (.not. conv .and. iter .lt. niter)
-
-          if(iter.eq.0) then               
-             call copy(r, f, n) 
-          else
-             call copy(r, f, n)      
-             call Ax%compute(w, x%x, coef, x%msh, x%Xh)
-             call gs_h%op(w, n, GS_OP_ADD)
-             call bc_list_apply(blst, w, n)
-             call sub2(r, w, n) 
-          end if
-          
-          gam(1) = sqrt(glsc3(r, r, coef%mult, n))
-          if(iter.eq.0) then
-             ksp_results%res_start = gam(1) * norm_fac
-          end if
-
-          if (gam(1) .eq. 0) return
-
-          rnorm = 0.0_rp
-          temp = 1.0_rp / gam(1)
-          call cmult2(v(1,1), r, temp, n) 
-          do j = 1, this%lgmres
-             iter = iter+1
-          
-             call this%M%solve(z(1,j), v(1,j), n)
-
-             call Ax%compute(w, z(1,j), coef, x%msh, x%Xh)
-             call gs_h%op(w, n, GS_OP_ADD)
-             call bc_list_apply(blst, w, n)
-             
-             do l = 1, j
-                h(l,j) = 0.0_rp
-             enddo
-
-             do i = 0, n, NEKO_BLK_SIZE
-                if (i + NEKO_BLK_SIZE .le. n) then
-                   do l = 1, j
-                      do k = 1, NEKO_BLK_SIZE
-                         h(l,j) = h(l,j) + &
+      norm_fac = 1.0_rp / sqrt(coef%volume)
+      call rzero(x%x, n)
+      call rzero(gam, this%lgmres + 1)
+      call rone(s, this%lgmres)
+      call rone(c, this%lgmres)
+      call rzero(h, this%lgmres * this%lgmres)
+      do while (.not. conv .and. iter .lt. max_iter)
+
+         if(iter.eq.0) then
+            call copy(r, f, n)
+         else
+            call copy(r, f, n)
+            call Ax%compute(w, x%x, coef, x%msh, x%Xh)
+            call gs_h%op(w, n, GS_OP_ADD)
+            call bc_list_apply(blst, w, n)
+            call sub2(r, w, n)
+         end if
+
+         gam(1) = sqrt(glsc3(r, r, coef%mult, n))
+         if(iter.eq.0) then
+            ksp_results%res_start = gam(1) * norm_fac
+         end if
+
+         if (abscmp(gam(1), 0.0_rp)) return
+
+         rnorm = 0.0_rp
+         temp = 1.0_rp / gam(1)
+         call cmult2(v(1,1), r, temp, n)
+         do j = 1, this%lgmres
+            iter = iter+1
+
+            call this%M%solve(z(1,j), v(1,j), n)
+
+            call Ax%compute(w, z(1,j), coef, x%msh, x%Xh)
+            call gs_h%op(w, n, GS_OP_ADD)
+            call bc_list_apply(blst, w, n)
+
+            do l = 1, j
+               h(l,j) = 0.0_rp
+            enddo
+
+            do i = 0, n, NEKO_BLK_SIZE
+               if (i + NEKO_BLK_SIZE .le. n) then
+                  do l = 1, j
+                     do k = 1, NEKO_BLK_SIZE
+                        h(l,j) = h(l,j) + &
                               w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
-                      end do
-                   end do
-                else 
-                   do k = 1, n-i
-                      do l = 1, j
-                         h(l,j) = h(l,j) + &
+                     end do
+                  end do
+               else
+                  do k = 1, n-i
+                     do l = 1, j
+                        h(l,j) = h(l,j) + &
                               w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1)
-                      end do
-                   end do
-                end if
-             end do
-             
-             call MPI_Allreduce(h(1,j), wk1, j, &
+                     end do
+                  end do
+               end if
+            end do
+
+            call MPI_Allreduce(h(1,j), wk1, j, &
                   MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-             call copy(h(1,j), wk1, j) 
-
-             alpha2 = 0.0_rp
-             do i = 0,n,NEKO_BLK_SIZE
-                if (i + NEKO_BLK_SIZE .le. n) then
-                   do k = 1, NEKO_BLK_SIZE
-                      w_plus(k) = 0.0_rp
-                   end do
-                   do l = 1,j
-                      do k = 1, NEKO_BLK_SIZE
-                         w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
-                      end do
-                   end do
-                   do k = 1, NEKO_BLK_SIZE
-                      w(i+k) = w(i+k) + w_plus(k)
-                      alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
-                   end do
-                else 
-                   do k = 1, n-i
-                      w_plus(1) = 0.0_rp
-                      do l = 1, j
-                         w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l)
-                      end do
-                      w(i+k) = w(i+k) + w_plus(1)
-                      alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1)
-                   end do
-                end if
-             end do
-             
-             call MPI_Allreduce(alpha2, temp, 1, &
+            call copy(h(1,j), wk1, j)
+
+            alpha2 = 0.0_rp
+            do i = 0,n,NEKO_BLK_SIZE
+               if (i + NEKO_BLK_SIZE .le. n) then
+                  do k = 1, NEKO_BLK_SIZE
+                     w_plus(k) = 0.0_rp
+                  end do
+                  do l = 1,j
+                     do k = 1, NEKO_BLK_SIZE
+                        w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l)
+                     end do
+                  end do
+                  do k = 1, NEKO_BLK_SIZE
+                     w(i+k) = w(i+k) + w_plus(k)
+                     alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1)
+                  end do
+               else
+                  do k = 1, n-i
+                     w_plus(1) = 0.0_rp
+                     do l = 1, j
+                        w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l)
+                     end do
+                     w(i+k) = w(i+k) + w_plus(1)
+                     alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1)
+                  end do
+               end if
+            end do
+
+            call MPI_Allreduce(alpha2, temp, 1, &
                   MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-             alpha2 = temp
-             alpha = sqrt(alpha2)
-             do i=1,j-1
-                temp = h(i,j)                   
-                h(i  ,j) =  c(i)*temp + s(i) * h(i+1,j)  
-                h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
-             end do
-             
-             rnorm = 0.0_rp
-             if(alpha .eq. 0.0_rp) then 
-                conv = .true.
-                exit
-             end if
-             
-             lr = sqrt(h(j,j) * h(j,j) + alpha**2)
-             temp = 1.0_rp / lr
-             c(j) = h(j,j) * temp
-             s(j) = alpha  * temp
-             h(j,j) = lr
-             gam(j+1) = -s(j) * gam(j)
-             gam(j)   =  c(j) * gam(j)
-             
-             rnorm = abs(gam(j+1)) * norm_fac
-             if (rnorm .lt. this%abs_tol) then 
-                conv = .true.
-                exit
-             end if
-             
-             if (iter + 1 .gt. niter) exit
-             
-             if( j .lt. this%lgmres) then
-                temp = 1.0_rp / alpha
-                call cmult2(v(1,j+1), w, temp, n)
-             end if
-             
-          end do
-
-          j = min(j, this%lgmres)
-          do k = j, 1, -1
-             temp = gam(k)
-             do i = j, k+1, -1
-                temp = temp - h(k,i) * c(i)
-             end do
-             c(k) = temp / h(k,k)
-          end do
-
-          do i = 0, n, NEKO_BLK_SIZE
-             if (i + NEKO_BLK_SIZE .le. n) then
-                do k = 1, NEKO_BLK_SIZE
-                   x_plus(k) = 0.0_rp
-                end do
-                do l = 1,j
-                   do k = 1, NEKO_BLK_SIZE
-                      x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
-                   end do
-                end do
-                do k = 1, NEKO_BLK_SIZE
-                   x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
-                end do
-             else 
-                do k = 1, n-i
-                   x_plus(1) = 0.0_rp
-                   do l = 1, j
-                      x_plus(1) = x_plus(1) + c(l) * z(i+k,l)
-                   end do
-                   x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
-                end do
-             end if
-          end do
-       end do
-
-     end associate
-
-       ksp_results%res_final = rnorm
-       ksp_results%iter = iter
+            alpha2 = temp
+            alpha = sqrt(alpha2)
+            do i=1,j-1
+               temp = h(i,j)
+               h(i  ,j) =  c(i)*temp + s(i) * h(i+1,j)
+               h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
+            end do
+
+            rnorm = 0.0_rp
+            if(abscmp(alpha, 0.0_rp)) then
+               conv = .true.
+               exit
+            end if
+
+            lr = sqrt(h(j,j) * h(j,j) + alpha**2)
+            temp = 1.0_rp / lr
+            c(j) = h(j,j) * temp
+            s(j) = alpha  * temp
+            h(j,j) = lr
+            gam(j+1) = -s(j) * gam(j)
+            gam(j)   =  c(j) * gam(j)
+
+            rnorm = abs(gam(j+1)) * norm_fac
+            if (rnorm .lt. this%abs_tol) then
+               conv = .true.
+               exit
+            end if
+
+            if (iter + 1 .gt. max_iter) exit
+
+            if( j .lt. this%lgmres) then
+               temp = 1.0_rp / alpha
+               call cmult2(v(1,j+1), w, temp, n)
+            end if
+
+         end do
+
+         j = min(j, this%lgmres)
+         do k = j, 1, -1
+            temp = gam(k)
+            do i = j, k+1, -1
+               temp = temp - h(k,i) * c(i)
+            end do
+            c(k) = temp / h(k,k)
+         end do
+
+         do i = 0, n, NEKO_BLK_SIZE
+            if (i + NEKO_BLK_SIZE .le. n) then
+               do k = 1, NEKO_BLK_SIZE
+                  x_plus(k) = 0.0_rp
+               end do
+               do l = 1,j
+                  do k = 1, NEKO_BLK_SIZE
+                     x_plus(k) = x_plus(k) + c(l) * z(i+k,l)
+                  end do
+               end do
+               do k = 1, NEKO_BLK_SIZE
+                  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
+               end do
+            else
+               do k = 1, n-i
+                  x_plus(1) = 0.0_rp
+                  do l = 1, j
+                     x_plus(1) = x_plus(1) + c(l) * z(i+k,l)
+                  end do
+                  x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1)
+               end do
+            end if
+         end do
+      end do
+
+    end associate
+
+    ksp_results%res_final = rnorm
+    ksp_results%iter = iter
 
   end function gmres_solve
 
 end module gmres
-  
+
 
diff --git a/src/krylov/bcknd/cpu/pc_jacobi.f90 b/src/krylov/bcknd/cpu/pc_jacobi.f90
index c2075b8ef0d..8a6a266381c 100644
--- a/src/krylov/bcknd/cpu/pc_jacobi.f90
+++ b/src/krylov/bcknd/cpu/pc_jacobi.f90
@@ -55,7 +55,7 @@ module jacobi
   end type jacobi_t
 
 contains
-  
+
   subroutine jacobi_init(this, coef, dof, gs_h)
     class(jacobi_t), intent(inout) :: this
     type(coef_t), intent(inout), target :: coef
@@ -91,83 +91,71 @@ subroutine jacobi_solve(this, z, r, n)
     call col3(z,r,this%d,n)
   end subroutine jacobi_solve
 
+  !> Update Jacobi preconditioner if the geometry G has changed
   subroutine jacobi_update(this)
     class(jacobi_t), intent(inout) :: this
-    integer :: i, j, k, l, e, lz, ly, lx
     associate(dof => this%dof, coef => this%coef, gs_h => this%gs_h)
 
-      lx = dof%Xh%lx
-      ly = dof%Xh%ly
-      lz = dof%Xh%lz
-
-      this%d = 0d0
-
-      do e = 1,dof%msh%nelv
-         do l = 1,lx
-            do k = 1,lz
-               do j = 1,ly
-                  do i = 1,lx
-                     this%d(i,j,k,e) = this%d(i,j,k,e) + &
-                          coef%G11(l,j,k,e) * dof%Xh%dxt(i,l)**2
-                  end do
-               end do
-            end do
-         end do
-         do l = 1,ly
-            do k = 1,lz
-               do j = 1,ly
-                  do i = 1,lx
-                     this%d(i,j,k,e) = this%d(i,j,k,e) + &
-                          coef%G22(i,l,k,e) * dof%Xh%dyt(j,l)**2
-                  end do
-               end do
-            end do
-         end do
-         do l = 1,lz
-            do k = 1,lz
-               do j = 1,ly
-                  do i = 1,lx
-                     this%d(i,j,k,e) = this%d(i,j,k,e) + &
-                          coef%G33(i,j,l,e) * dof%Xh%dzt(k,l)**2
-                  end do
-               end do
-            end do
-         end do
-
-         if (dof%msh%dfrmd_el(e)) then
-            do j = 1,ly,ly-1
-               do k = 1,lz,lz-1
-                  this%d(1,j,k,e) = this%d(1,j,k,e) &
-                       + coef%G12(1,j,k,e) * dof%Xh%dxt(1,1)*dof%Xh%dyt(j,j) &
-                       + coef%G13(1,j,k,e) * dof%Xh%dxt(1,1)*dof%Xh%dzt(k,k)
-                  this%d(lx,j,k,e) = this%d(lx,j,k,e) &
-                       + coef%G12(lx,j,k,e) * dof%Xh%dxt(lx,lx)*dof%Xh%dyt(j,j) &
-                       + coef%G13(lx,j,k,e) * dof%Xh%dxt(lx,lx)*dof%Xh%dzt(k,k)
-               end do
-            end do
-
-            do i = 1,lx,lx-1
-               do k = 1,lz,lz-1
-                  this%d(i,1,k,e) = this%d(i,1,k,e) &
-                       + coef%G12(i,1,k,e) * dof%Xh%dyt(1,1)*dof%Xh%dxt(i,i) &
-                       + coef%G23(i,1,k,e) * dof%Xh%dyt(1,1)*dof%Xh%dzt(k,k)
-                  this%d(i,ly,k,e) = this%d(i,ly,k,e) &
-                       + coef%G12(i,ly,k,e) * dof%Xh%dyt(ly,ly)*dof%Xh%dxt(i,i) &
-                       + coef%G23(i,ly,k,e) * dof%Xh%dyt(ly,ly)*dof%Xh%dzt(k,k)
-               end do
-            end do
-            do i = 1,lx,lx-1
-               do j = 1,ly,ly-1
-                  this%d(i,j,1,e) = this%d(i,j,1,e) &
-                       + coef%G13(i,j,1,e) * dof%Xh%dzt(1,1)*dof%Xh%dxt(i,i) &
-                       + coef%G23(i,j,1,e) * dof%Xh%dzt(1,1)*dof%Xh%dyt(j,j)
-                  this%d(i,j,lz,e) = this%d(i,j,lz,e) &
-                       + coef%G13(i,j,lz,e) * dof%Xh%dzt(lz,lz)*dof%Xh%dxt(i,i) &
-                       + coef%G23(i,j,lz,e) * dof%Xh%dzt(lz,lz)*dof%Xh%dyt(j,j)
-               end do
-            end do
-         end if
-      end do
+
+      select case(dof%Xh%lx)
+      case (14)
+         call jacobi_update_lx14(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (13)
+         call jacobi_update_lx13(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (12)
+         call jacobi_update_lx12(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (11)
+         call jacobi_update_lx11(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (10)
+         call jacobi_update_lx10(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (9)
+         call jacobi_update_lx9(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (8)
+         call jacobi_update_lx8(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (7)
+         call jacobi_update_lx7(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (6)
+         call jacobi_update_lx6(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (5)
+         call jacobi_update_lx5(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (4)
+         call jacobi_update_lx4(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (3)
+         call jacobi_update_lx3(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case (2)
+         call jacobi_update_lx2(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv)
+      case default
+         call jacobi_update_lx(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, &
+              coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, &
+              dof%msh%dfrmd_el, dof%msh%nelv, dof%Xh%lx)
+      end select
+
       call col2(this%d,coef%h1,coef%dof%size())
       if (coef%ifh2) call addcol3(this%d,coef%h2,coef%B,coef%dof%size())
       call gs_h%op(this%d, dof%size(), GS_OP_ADD)
@@ -175,4 +163,1222 @@ subroutine jacobi_update(this)
     end associate
   end subroutine jacobi_update
 
+  !> Generic CPU kernel for updating the Jacobi preconditioner
+  subroutine jacobi_update_lx(d, dxt, dyt, dzt, G11, G22, G33, &
+                              G12, G13, G23, dfrmd_el, n, lx)
+    integer, intent(in) :: n, lx
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx
+
+  subroutine jacobi_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, &
+                                G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 14
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx14
+  
+  subroutine jacobi_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, &
+                              G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 13
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+    
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx13
+  
+  subroutine jacobi_update_lx12(d, dxt, dyt, dzt, G11, G22, G33, &
+                                G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 12
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx12
+  
+  subroutine jacobi_update_lx11(d, dxt, dyt, dzt, G11, G22, G33, &
+                                G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 11
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx11
+  
+  subroutine jacobi_update_lx10(d, dxt, dyt, dzt, G11, G22, G33, &
+                                G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 10
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx10
+  
+  subroutine jacobi_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, &
+                              G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 9
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx9
+  
+  subroutine jacobi_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 8
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx8
+  
+  subroutine jacobi_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, &
+                              G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 7
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx7
+  
+  subroutine jacobi_update_lx6(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 6
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx6
+  
+  subroutine jacobi_update_lx5(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 5
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx5
+  
+  subroutine jacobi_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 4
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx4
+  
+  subroutine jacobi_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 3
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx3
+  
+  subroutine jacobi_update_lx2(d, dxt, dyt, dzt, G11, G22, G33, &
+                               G12, G13, G23, dfrmd_el, n)
+    integer, parameter :: lx = 2
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) :: d(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G11(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G22(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G33(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G12(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G13(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: G23(lx, lx, lx, n)
+    real(kind=rp), intent(in) :: dxt(lx, lx)
+    real(kind=rp), intent(in) :: dyt(lx, lx)
+    real(kind=rp), intent(in) :: dzt(lx, lx)
+    logical, intent(in) :: dfrmd_el(n)
+    integer :: i, j, k, l, e
+      
+    d = 0d0
+
+    do e = 1,n
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G11(l,j,k,e) * dxt(i,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G22(i,l,k,e) * dyt(j,l)**2
+                end do
+             end do
+          end do
+       end do
+       do l = 1,lx
+          do k = 1,lx
+             do j = 1,lx
+                do i = 1,lx
+                   d(i,j,k,e) = d(i,j,k,e) + &
+                        G33(i,j,l,e) * dzt(k,l)**2
+                end do
+             end do
+          end do
+       end do
+       
+       if (dfrmd_el(e)) then
+          do j = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(1,j,k,e) = d(1,j,k,e) &
+                     + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) &
+                     + G13(1,j,k,e) * dxt(1,1)*dzt(k,k)
+                d(lx,j,k,e) = d(lx,j,k,e) &
+                     + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) &
+                     + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          
+          do i = 1,lx,lx-1
+             do k = 1,lx,lx-1
+                d(i,1,k,e) = d(i,1,k,e) &
+                     + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) &
+                     + G23(i,1,k,e) * dyt(1,1)*dzt(k,k)
+                d(i,lx,k,e) = d(i,lx,k,e) &
+                     + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) &
+                     + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k)
+             end do
+          end do
+          do i = 1,lx,lx-1
+             do j = 1,lx,lx-1
+                d(i,j,1,e) = d(i,j,1,e) &
+                     + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) &
+                     + G23(i,j,1,e) * dzt(1,1)*dyt(j,j)
+                d(i,j,lx,e) = d(i,j,lx,e) &
+                     + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) &
+                     + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j)
+             end do
+          end do
+       end if
+    end do
+  end subroutine jacobi_update_lx2
+
 end module jacobi
diff --git a/src/krylov/bcknd/cpu/pipecg.f90 b/src/krylov/bcknd/cpu/pipecg.f90
index 32f5c2053ca..f9270947c77 100644
--- a/src/krylov/bcknd/cpu/pipecg.f90
+++ b/src/krylov/bcknd/cpu/pipecg.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@ module pipecg
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, copy
+  use math, only : glsc3, rzero, copy, abscmp
   use comm
   implicit none
   private
@@ -66,19 +66,20 @@ module pipecg
      !> Solve the linear system.
      procedure, pass(this) :: solve => pipecg_solve
   end type pipecg_t
-  
+
 contains
-  
+
   !> Initialise a pipelined PCG solver
-  subroutine pipecg_init(this, n, M, rel_tol, abs_tol)
+  subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(pipecg_t), intent(inout) :: this
+    integer, intent(in) :: max_iter
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
-    
+
     call this%free()
-        
+
     allocate(this%p(n))
     allocate(this%q(n))
     allocate(this%r(n))
@@ -88,28 +89,28 @@ subroutine pipecg_init(this, n, M, rel_tol, abs_tol)
     allocate(this%z(n))
     allocate(this%mi(n))
     allocate(this%ni(n))
-    if (present(M)) then 
+    if (present(M)) then
        this%M => M
     end if
-    
+
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-    
+
   end subroutine pipecg_init
-  
+
   !> Deallocate a pipelined PCG solver
   subroutine pipecg_free(this)
     class(pipecg_t), intent(inout) :: this
-    
+
     call this%ksp_free()
-    
+
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
@@ -137,12 +138,12 @@ subroutine pipecg_free(this)
     if (allocated(this%ni)) then
        deallocate(this%ni)
     end if
-    
+
     nullify(this%M)
 
 
   end subroutine pipecg_free
-  
+
   !> Pipelined PCG solve
   function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(pipecg_t), intent(inout) :: this
@@ -162,17 +163,17 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
     real(kind=rp) :: tmp1, tmp2, tmp3, x_plus(NEKO_BLK_SIZE)
     type(MPI_Request) :: request
     type(MPI_Status) :: status
-    
+
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = 1.0_rp / sqrt(coef%volume)
-    
+
     associate(p => this%p, q => this%q, r => this%r, s => this%s, &
          u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni)
-      
+
       p_prev = PIPECG_P_SPACE
       u_prev = PIPECG_P_SPACE+1
       p_cur = 1
@@ -186,14 +187,14 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
       call Ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh)
       call gs_h%op(w, n, GS_OP_ADD)
       call bc_list_apply(blst, w, n)
-      
+
       rtr = glsc3(r, coef%mult, r, n)
       rnorm = sqrt(rtr)*norm_fac
       ksp_results%res_start = rnorm
       ksp_results%res_final = rnorm
       ksp_results%iter = 0
-      if(rnorm .eq. 0.0_rp) return
-      
+      if(abscmp(rnorm, 0.0_rp)) return
+
       gamma1 = 0.0_rp
       tmp1 = 0.0_rp
       tmp2 = 0.0_rp
@@ -206,33 +207,33 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
       reduction(1) = tmp1
       reduction(2) = tmp2
       reduction(3) = tmp3
-      
+
       do iter = 1, max_iter
          call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, &
               MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr)
-         
+
          call this%M%solve(mi, w, n)
          call Ax%compute(ni, mi, coef, x%msh, x%Xh)
          call gs_h%op(ni, n, GS_OP_ADD)
          call bc_list_apply(blst, ni, n)
-         
+
          call MPI_Wait(request, status, ierr)
-         gamma2 = gamma1       
+         gamma2 = gamma1
          gamma1 = reduction(1)
          delta = reduction(2)
          rtr = reduction(3)
-         
+
          rnorm = sqrt(rtr)*norm_fac
          if (rnorm .lt. this%abs_tol) exit
 
          if (iter .gt. 1) then
             beta(p_cur) = gamma1 / gamma2
             alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
-         else 
+         else
             beta(p_cur) = 0.0_rp
             alpha(p_cur) = gamma1/delta
          end if
-         
+
          tmp1 = 0.0_rp
          tmp2 = 0.0_rp
          tmp3 = 0.0_rp
@@ -263,11 +264,11 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
                end do
             end if
          end do
-         
+
          reduction(1) = tmp1
          reduction(2) = tmp2
          reduction(3) = tmp3
-         
+
          if (p_cur .eq. PIPECG_P_SPACE) then
             do i = 0, n, NEKO_BLK_SIZE
                if (i + NEKO_BLK_SIZE .le. n) then
@@ -286,7 +287,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
                      x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
                      u(i+k,PIPECG_P_SPACE+1) = u(i+k,PIPECG_P_SPACE)
                   end do
-               else 
+               else
                   do k = 1, n-i
                      x_plus(1) = 0.0_rp
                      p_prev = PIPECG_P_SPACE + 1
@@ -302,7 +303,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
             end do
             p_prev = p_cur
             u_prev = PIPECG_P_SPACE+1
-            alpha(1) = alpha(p_cur) 
+            alpha(1) = alpha(p_cur)
             beta(1) = beta(p_cur)
             p_cur = 1
          else
@@ -311,7 +312,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
             p_cur = p_cur + 1
          end if
       end do
-      
+
       if ( p_cur .ne. 1) then
          do i = 0, n, NEKO_BLK_SIZE
             if (i + NEKO_BLK_SIZE .le. n) then
@@ -330,7 +331,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
                   x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k)
                   u(i+k,PIPECG_P_SPACE+1) = u(i+k,PIPECG_P_SPACE)
                end do
-            else 
+            else
                do k = 1, n-i
                   x_plus(1) = 0.0_rp
                   p_prev = PIPECG_P_SPACE + 1
@@ -345,14 +346,14 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res
             end if
          end do
       end if
-      
+
       ksp_results%res_final = rnorm
       ksp_results%iter = iter
-      
+
     end associate
-    
+
   end function pipecg_solve
-   
+
 end module pipecg
-  
+
 
diff --git a/src/krylov/bcknd/device/cg_device.f90 b/src/krylov/bcknd/device/cg_device.f90
index 58e1b350abb..baa39bde4d8 100644
--- a/src/krylov/bcknd/device/cg_device.f90
+++ b/src/krylov/bcknd/device/cg_device.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021-2022, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,8 @@ module cg_device
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use device  
+  use math, only : abscmp
+  use device
   use device_math, only : device_rzero, device_copy, device_glsc3, &
                           device_add2s2, device_add2s1
   implicit none
@@ -65,15 +66,16 @@ module cg_device
 contains
 
   !> Initialise a device based PCG solver
-  subroutine cg_device_init(this, n, M, rel_tol, abs_tol)
+  subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(cg_device_t), intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
-    
+
     call this%free()
-    
+
     allocate(this%w(n))
     allocate(this%r(n))
     allocate(this%p(n))
@@ -83,20 +85,20 @@ subroutine cg_device_init(this, n, M, rel_tol, abs_tol)
     call device_map(this%p, this%p_d, n)
     call device_map(this%r, this%r_d, n)
     call device_map(this%w, this%w_d, n)
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
 
     call device_event_create(this%gs_event, 2)
@@ -119,7 +121,7 @@ subroutine cg_device_free(this)
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
-    
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
@@ -145,9 +147,9 @@ subroutine cg_device_free(this)
     if (c_associated(this%gs_event)) then
        call device_event_destroy(this%gs_event)
     end if
-    
+
   end subroutine cg_device_free
-  
+
   !> Standard PCG solve
   function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(cg_device_t), intent(inout) :: this
@@ -166,13 +168,13 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
     real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
     real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
     type(c_ptr) :: f_d
-    
+
     f_d = device_get_ptr(f)
 
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = one/sqrt(coef%volume)
 
@@ -186,7 +188,7 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
     ksp_results%res_start = rnorm
     ksp_results%res_final = rnorm
     ksp_results%iter = 0
-    if(rnorm .eq. zero) return
+    if(abscmp(rnorm, zero)) return
     do iter = 1, max_iter
        call this%M%solve(this%z, this%r, n)
        rtz2 = rtz1
@@ -195,10 +197,10 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
        if (iter .eq. 1) beta = zero
        call device_add2s1(this%p_d, this%z_d, beta, n)
 
-       call Ax%compute(this%w, this%p, coef, x%msh, x%Xh)       
+       call Ax%compute(this%w, this%p, coef, x%msh, x%Xh)
        call gs_h%op(this%w, n, GS_OP_ADD, this%gs_event)
        call device_event_sync(this%gs_event)
-       call bc_list_apply(blst, this%w, n)       
+       call bc_list_apply(blst, this%w, n)
 
        pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n)
 
@@ -220,5 +222,5 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
   end function cg_device_solve
 
 end module cg_device
-  
+
 
diff --git a/src/krylov/bcknd/device/cuda/fusedcg_aux.cu b/src/krylov/bcknd/device/cuda/fusedcg_aux.cu
new file mode 100644
index 00000000000..43558734cb3
--- /dev/null
+++ b/src/krylov/bcknd/device/cuda/fusedcg_aux.cu
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2021-2024, The Neko Authors
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above
+     copyright notice, this list of conditions and the following
+     disclaimer in the documentation and/or other materials provided
+     with the distribution.
+
+   * Neither the name of the authors nor the names of its
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "fusedcg_kernel.h"
+#include <device/device_config.h>
+#include <device/cuda/check.h>
+
+/**
+ * @todo Make sure that this gets deleted at some point...
+ */
+real *fusedcg_buf = NULL;
+real *fusedcg_buf_d = NULL;
+int fusedcg_buf_len = 0;
+
+extern "C" {
+
+#include <math/bcknd/device/device_mpi_reduce.h>
+#include <math/bcknd/device/device_mpi_op.h>
+
+
+  
+  void cuda_fusedcg_update_p(void *p, void *z, void *po, real *beta, int *n) {
+        
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
+    
+    fusedcg_update_p_kernel<real>
+      <<<nblcks, nthrds, 0, stream>>>((real *) p, (real *) z,
+                                      (real *) po, *beta, *n);
+    CUDA_CHECK(cudaGetLastError());
+
+  }
+
+  void cuda_fusedcg_update_x(void *x, void *p, void *alpha, int *p_cur, int *n) {
+
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
+    
+    fusedcg_update_x_kernel<real>
+      <<<nblcks, nthrds, 0, stream>>>((real *) x, (const real **) p,
+                                      (const real *) alpha, *p_cur, *n);
+    CUDA_CHECK(cudaGetLastError());
+  }
+
+
+  real cuda_fusedcg_part2(void *a, void *b, void *c,
+                          void *alpha_d , real *alpha, int *p_cur, int * n) {
+    
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const int nb = ((*n) + 1024 - 1)/ 1024;
+    const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;
+
+    if (fusedcg_buf != NULL && fusedcg_buf_len < nb) {
+      CUDA_CHECK(cudaFreeHost(fusedcg_buf));
+      CUDA_CHECK(cudaFree(fusedcg_buf_d));
+      fusedcg_buf = NULL;
+    }
+    
+    if (fusedcg_buf == NULL){
+      CUDA_CHECK(cudaMallocHost(&fusedcg_buf, 2*sizeof(real)));
+      CUDA_CHECK(cudaMalloc(&fusedcg_buf_d, nb*sizeof(real)));
+      fusedcg_buf_len = nb;
+    }
+
+    /* Store alpha(p_cur) in pinned memory */
+    fusedcg_buf[1] = (*alpha);
+
+    /* Update alpha_d(p_cur) = alpha(p_cur) */
+    real *alpha_d_p_cur = ((real *) alpha_d) + ((*p_cur - 1));   
+    CUDA_CHECK(cudaMemcpyAsync(alpha_d_p_cur, &fusedcg_buf[1], sizeof(real),
+                               cudaMemcpyHostToDevice, stream));
+
+   
+    fusedcg_part2_kernel<real>
+      <<<nblcks, nthrds, 0, stream>>>((real *) a, (real *) b,
+                                      (real *) c, *alpha,
+                                      fusedcg_buf_d, * n);
+    CUDA_CHECK(cudaGetLastError());
+
+    reduce_kernel<real><<<1, 1024, 0, stream>>>(fusedcg_buf_d, nb);
+    CUDA_CHECK(cudaGetLastError());
+
+#ifdef HAVE_DEVICE_MPI
+    cudaStreamSynchronize(stream);
+    device_mpi_allreduce(fusedcg_buf_d, fusedcg_buf, 1,
+                         sizeof(real), DEVICE_MPI_SUM);
+#else
+    CUDA_CHECK(cudaMemcpyAsync(fusedcg_buf, fusedcg_buf_d, sizeof(real),
+                               cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+#endif
+
+    return fusedcg_buf[0];
+  }
+}
+
diff --git a/src/krylov/bcknd/device/cuda/fusedcg_kernel.h b/src/krylov/bcknd/device/cuda/fusedcg_kernel.h
new file mode 100644
index 00000000000..aaef81fe524
--- /dev/null
+++ b/src/krylov/bcknd/device/cuda/fusedcg_kernel.h
@@ -0,0 +1,124 @@
+#ifndef __KRYLOV_FUSEDCG_KERNEL_H__
+#define __KRYLOV_FUSEDCG_KERNEL_H__
+/*
+ Copyright (c) 2021-2024, The Neko Authors
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above
+     copyright notice, this list of conditions and the following
+     disclaimer in the documentation and/or other materials provided
+     with the distribution.
+
+   * Neither the name of the authors nor the names of its
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math/bcknd/device/cuda/math_kernel.h>
+
+/**
+ * Kernel for update of p
+ */
+template< typename T >
+__global__ void fusedcg_update_p_kernel(T * __restrict__  p,
+                                    const T * __restrict__ z,
+                                    const T * __restrict__ po,
+                                    const T beta,
+                                    const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < n; i+= str) {
+    p[i] = beta*po[i] + z[i];
+  }
+  
+}
+
+/**
+ * Kernel for update of x
+ */
+template< typename T >
+__global__ void fusedcg_update_x_kernel(T * __restrict__  x,
+                                        const T ** p,
+                                        const T * __restrict__ alpha,
+                                        const int p_cur,
+                                        const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < n; i+= str) {
+    T tmp = 0.0;
+    for (int j = 0; j < p_cur; j ++) {
+      tmp += p[j][i] * alpha[j];
+    }
+    x[i] += tmp;
+  }
+  
+}
+
+/**
+ * Device kernel for fusedcg_part2
+ */
+template< typename T>
+__global__ void fusedcg_part2_kernel(T * __restrict__ a,
+                                     const T * __restrict__ b,
+                                     const T * __restrict__ c,
+                                     const T alpha,
+                                     T * buf_h,
+                                     const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  const unsigned int lane = threadIdx.x % warpSize;
+  const unsigned int wid = threadIdx.x / warpSize;
+  
+  __shared__ T buf[32];
+  T tmp = 0.0;
+
+  for (int i = idx; i < n; i+= str) {
+    T rt = a[i] - alpha * c[i];
+    tmp = tmp + rt * b[i] * rt;
+    a[i] = rt;
+  }
+
+  tmp = reduce_warp<T>(tmp);
+  if (lane == 0) {
+    buf[wid] = tmp;
+  }
+  __syncthreads();
+  
+  tmp = (threadIdx.x < blockDim.x / warpSize) ? buf[lane] : 0;
+  if (wid == 0) {
+    tmp = reduce_warp<T>(tmp);
+  }
+
+  if (threadIdx.x == 0) {
+    buf_h[blockIdx.x] = tmp;
+  }
+}
+                                     
+
+#endif // __KRYLOV_FUSEDCG_KERNEL_H__
diff --git a/src/krylov/bcknd/device/cuda/gmres_kernel.h b/src/krylov/bcknd/device/cuda/gmres_kernel.h
index 381dcbd603f..cc2f0de2148 100644
--- a/src/krylov/bcknd/device/cuda/gmres_kernel.h
+++ b/src/krylov/bcknd/device/cuda/gmres_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __KRYLOV_GMRES_KERNEL_H__
+#define __KRYLOV_GMRES_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -79,3 +81,5 @@ __global__ void gmres_part2_kernel(T  * __restrict__  w,
 }
 
 
+
+#endif // __KRYLOV_GMRES_KERNEL_H__
diff --git a/src/krylov/bcknd/device/cuda/pipecg_kernel.h b/src/krylov/bcknd/device/cuda/pipecg_kernel.h
index b7280150d86..9f51dbadeb2 100644
--- a/src/krylov/bcknd/device/cuda/pipecg_kernel.h
+++ b/src/krylov/bcknd/device/cuda/pipecg_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __KRYLOV_PIPECG_KERNEL_H__
+#define __KRYLOV_PIPECG_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -137,3 +139,5 @@ __global__ void pipecg_vecops_kernel(T  * __restrict__  p,
   }
 
 }
+
+#endif // __KRYLOV_PIPECG_KERNEL_H__
diff --git a/src/krylov/bcknd/device/fusedcg_device.F90 b/src/krylov/bcknd/device/fusedcg_device.F90
new file mode 100644
index 00000000000..db91a99df7a
--- /dev/null
+++ b/src/krylov/bcknd/device/fusedcg_device.F90
@@ -0,0 +1,392 @@
+! Copyright (c) 2021-2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Defines a fused Conjugate Gradient method for accelerators
+module fusedcg_device
+  use krylov, only : ksp_t, ksp_monitor_t, KSP_MAX_ITER
+  use precon,  only : pc_t
+  use ax_product, only : ax_t
+  use num_types, only: rp, c_rp
+  use field, only : field_t
+  use coefs, only : coef_t
+  use gather_scatter, only : gs_t, GS_OP_ADD
+  use bc, only : bc_list_t, bc_list_apply
+  use math, only : glsc3, rzero, copy, abscmp
+  use device_math, only : device_rzero, device_copy, device_glsc3
+  use device
+  use comm
+  implicit none
+  private
+
+  integer, parameter :: DEVICE_FUSEDCG_P_SPACE = 10
+
+  !> Fused preconditioned conjugate gradient method
+  type, public, extends(ksp_t) :: fusedcg_device_t
+     real(kind=rp), allocatable :: w(:)
+     real(kind=rp), allocatable :: r(:)
+     real(kind=rp), allocatable :: z(:)
+     real(kind=rp), allocatable :: p(:,:)
+     real(kind=rp), allocatable :: alpha(:)
+     type(c_ptr) :: w_d = C_NULL_PTR
+     type(c_ptr) :: r_d = C_NULL_PTR
+     type(c_ptr) :: z_d = C_NULL_PTR
+     type(c_ptr) :: alpha_d = C_NULL_PTR
+     type(c_ptr) :: p_d_d = C_NULL_PTR
+     type(c_ptr), allocatable :: p_d(:)
+     type(c_ptr) :: gs_event = C_NULL_PTR
+   contains
+     procedure, pass(this) :: init => fusedcg_device_init
+     procedure, pass(this) :: free => fusedcg_device_free
+     procedure, pass(this) :: solve => fusedcg_device_solve
+  end type fusedcg_device_t
+
+#ifdef HAVE_CUDA
+  interface
+     subroutine cuda_fusedcg_update_p(p_d, z_d, po_d, beta, n) &
+          bind(c, name='cuda_fusedcg_update_p')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: p_d, z_d, po_d
+       real(c_rp) :: beta
+       integer(c_int) :: n
+     end subroutine cuda_fusedcg_update_p
+  end interface
+
+  interface
+     subroutine cuda_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) &
+          bind(c, name='cuda_fusedcg_update_x')
+       use, intrinsic :: iso_c_binding
+       implicit none
+       type(c_ptr), value :: x_d, p_d, alpha
+       integer(c_int) :: p_cur, n
+     end subroutine cuda_fusedcg_update_x
+  end interface
+
+  interface
+     real(c_rp) function cuda_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, &
+          p_cur, n) bind(c, name='cuda_fusedcg_part2')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: a_d, b_d, c_d, alpha_d
+       real(c_rp) :: alpha
+       integer(c_int) :: n, p_cur
+     end function cuda_fusedcg_part2
+  end interface
+#elif HAVE_HIP
+  interface
+     subroutine hip_fusedcg_update_p(p_d, z_d, po_d, beta, n) &
+          bind(c, name='hip_fusedcg_update_p')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: p_d, z_d, po_d
+       real(c_rp) :: beta
+       integer(c_int) :: n
+     end subroutine hip_fusedcg_update_p
+  end interface
+
+  interface
+     subroutine hip_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) &
+          bind(c, name='hip_fusedcg_update_x')
+       use, intrinsic :: iso_c_binding
+       implicit none
+       type(c_ptr), value :: x_d, p_d, alpha
+       integer(c_int) :: p_cur, n
+     end subroutine hip_fusedcg_update_x
+  end interface
+
+  interface
+     real(c_rp) function hip_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, &
+          p_cur, n) bind(c, name='hip_fusedcg_part2')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: a_d, b_d, c_d, alpha_d
+       real(c_rp) :: alpha
+       integer(c_int) :: n, p_cur
+     end function hip_fusedcg_part2
+  end interface
+#endif
+
+contains
+
+  subroutine device_fusedcg_update_p(p_d, z_d, po_d, beta, n)
+    type(c_ptr), value :: p_d, z_d, po_d
+    real(c_rp) :: beta
+    integer(c_int) :: n
+#ifdef HAVE_HIP
+    call hip_fusedcg_update_p(p_d, z_d, po_d, beta, n)
+#elif HAVE_CUDA
+    call cuda_fusedcg_update_p(p_d, z_d, po_d, beta, n)
+#else
+    call neko_error('No device backend configured')
+#endif
+  end subroutine device_fusedcg_update_p
+
+  subroutine device_fusedcg_update_x(x_d, p_d, alpha, p_cur, n)
+    type(c_ptr), value :: x_d, p_d, alpha
+    integer(c_int) :: p_cur, n
+#ifdef HAVE_HIP
+    call hip_fusedcg_update_x(x_d, p_d, alpha, p_cur, n)
+#elif HAVE_CUDA
+    call cuda_fusedcg_update_x(x_d, p_d, alpha, p_cur, n)
+#else
+    call neko_error('No device backend configured')
+#endif
+  end subroutine device_fusedcg_update_x
+
+  function device_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, &
+                                p_cur, n) result(res)
+    type(c_ptr), value :: a_d, b_d, c_d, alpha_d
+    real(c_rp) :: alpha
+    integer :: n, p_cur
+    real(kind=rp) :: res
+    integer :: ierr
+#ifdef HAVE_HIP
+    res = hip_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, p_cur, n)
+#elif HAVE_CUDA
+    res = cuda_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, p_cur, n)
+#else
+    call neko_error('No device backend configured')
+#endif
+
+#ifndef HAVE_DEVICE_MPI
+    if (pe_size .gt. 1) then
+       call MPI_Allreduce(MPI_IN_PLACE, res, 1, &
+            MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+    end if
+#endif
+
+  end function device_fusedcg_part2
+
+  !> Initialise a fused PCG solver
+  subroutine fusedcg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
+    class(fusedcg_device_t), target, intent(inout) :: this
+    class(pc_t), optional, intent(inout), target :: M
+    integer, intent(in) :: n
+    integer, intent(in) :: max_iter
+    real(kind=rp), optional, intent(inout) :: rel_tol
+    real(kind=rp), optional, intent(inout) :: abs_tol
+    type(c_ptr) :: ptr
+    integer(c_size_t) :: p_size
+    integer :: i
+
+    call this%free()
+
+    allocate(this%w(n))
+    allocate(this%r(n))
+    allocate(this%z(n))
+    allocate(this%p(n, DEVICE_FUSEDCG_P_SPACE))
+    allocate(this%p_d(DEVICE_FUSEDCG_P_SPACE))
+    allocate(this%alpha(DEVICE_FUSEDCG_P_SPACE))
+
+    if (present(M)) then
+       this%M => M
+    end if
+
+    call device_map(this%w, this%w_d, n)
+    call device_map(this%r, this%r_d, n)
+    call device_map(this%z, this%z_d, n)
+    call device_map(this%alpha, this%alpha_d, DEVICE_FUSEDCG_P_SPACE)
+    do i = 1, DEVICE_FUSEDCG_P_SPACE+1
+       this%p_d(i) = C_NULL_PTR
+       call device_map(this%p(:,i), this%p_d(i), n)
+    end do
+
+    p_size = c_sizeof(C_NULL_PTR) * (DEVICE_FUSEDCG_P_SPACE)
+    call device_alloc(this%p_d_d, p_size)
+    ptr = c_loc(this%p_d)
+    call device_memcpy(ptr, this%p_d_d, p_size, &
+                       HOST_TO_DEVICE, sync=.false.)
+    if (present(rel_tol) .and. present(abs_tol)) then
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
+    else if (present(rel_tol)) then
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
+    else if (present(abs_tol)) then
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
+    else
+       call this%ksp_init(max_iter)
+    end if
+
+    call device_event_create(this%gs_event, 2)
+
+  end subroutine fusedcg_device_init
+
+  !> Deallocate a pipelined PCG solver
+  subroutine fusedcg_device_free(this)
+    class(fusedcg_device_t), intent(inout) :: this
+    integer :: i
+
+    call this%ksp_free()
+
+    if (allocated(this%w)) then
+       deallocate(this%w)
+    end if
+
+    if (allocated(this%r)) then
+       deallocate(this%r)
+    end if
+
+
+    if (allocated(this%z)) then
+       deallocate(this%z)
+    end if
+
+
+    if (allocated(this%alpha)) then
+       deallocate(this%alpha)
+    end if
+
+    if (allocated(this%p)) then
+       deallocate(this%p)
+    end if
+
+    if (c_associated(this%w_d)) then
+       call device_free(this%w_d)
+    end if
+
+    if (c_associated(this%r_d)) then
+       call device_free(this%r_d)
+    end if
+
+    if (c_associated(this%z_d)) then
+       call device_free(this%z_d)
+    end if
+
+    if (c_associated(this%alpha_d)) then
+       call device_free(this%alpha_d)
+    end if
+
+    if (allocated(this%p_d)) then
+       do i = 1, DEVICE_FUSEDCG_P_SPACE
+          if (c_associated(this%p_d(i))) then
+             call device_free(this%p_d(i))
+          end if
+       end do
+    end if
+
+    nullify(this%M)
+
+    if (c_associated(this%gs_event)) then
+       call device_event_destroy(this%gs_event)
+    end if
+
+  end subroutine fusedcg_device_free
+
+  !> Pipelined PCG solve
+  function fusedcg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
+    class(fusedcg_device_t), intent(inout) :: this
+    class(ax_t), intent(inout) :: Ax
+    type(field_t), intent(inout) :: x
+    integer, intent(in) :: n
+    real(kind=rp), dimension(n), intent(inout) :: f
+    type(coef_t), intent(inout) :: coef
+    type(bc_list_t), intent(inout) :: blst
+    type(gs_t), intent(inout) :: gs_h
+    type(ksp_monitor_t) :: ksp_results
+    integer, optional, intent(in) :: niter
+    integer :: iter, max_iter, ierr, i, p_cur, p_prev
+    real(kind=rp) :: rnorm, rtr, norm_fac,  rtz1, rtz2
+    real(kind=rp) :: pap, beta
+    type(c_ptr) :: f_d
+    f_d = device_get_ptr(f)
+
+    if (present(niter)) then
+       max_iter = niter
+    else
+       max_iter = KSP_MAX_ITER
+    end if
+    norm_fac = 1.0_rp / sqrt(coef%volume)
+
+    associate(w => this%w, r => this%r, p => this%p, z => this%z, &
+         alpha => this%alpha, alpha_d => this%alpha_d, &
+         w_d => this%w_d, r_d => this%r_d, z_d => this%z_d, &
+         p_d => this%p_d, p_d_d => this%p_d_d)
+
+      rtz1 = 1.0_rp
+      p_prev = DEVICE_FUSEDCG_P_SPACE
+      p_cur = 1
+      call device_rzero(x%x_d, n)
+      call device_rzero(p_d(1), n)
+      call device_copy(r_d, f_d, n)
+
+      rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
+      rnorm = sqrt(rtr)*norm_fac
+      ksp_results%res_start = rnorm
+      ksp_results%res_final = rnorm
+      ksp_results%iter = 0
+      if(abscmp(rnorm, 0.0_rp)) return
+
+      do iter = 1, max_iter
+         call this%M%solve(z, r, n)
+         rtz2 = rtz1
+         rtz1 = device_glsc3(r_d, coef%mult_d, z_d, n)
+         beta = rtz1 / rtz2
+         if (iter .eq. 1) beta = 0.0_rp
+         call device_fusedcg_update_p(p_d(p_cur), z_d, p_d(p_prev), beta, n)
+
+         call Ax%compute(w, p(1, p_cur), coef, x%msh, x%Xh)
+         call gs_h%op(w, n, GS_OP_ADD, this%gs_event)
+         call device_event_sync(this%gs_event)
+         call bc_list_apply(blst, w, n)
+
+         pap = device_glsc3(w_d, coef%mult_d, this%p_d(p_cur), n)
+
+         alpha(p_cur) = rtz1 / pap
+         rtr = device_fusedcg_part2(r_d, coef%mult_d, w_d, &
+                                    alpha_d, alpha(p_cur), p_cur, n)
+         rnorm = sqrt(rtr)*norm_fac
+
+         if ((p_cur .eq. DEVICE_FUSEDCG_P_SPACE) .or. &
+              (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then
+            call device_fusedcg_update_x(x%x_d, p_d_d, alpha_d, p_cur, n)
+            p_prev = p_cur
+            p_cur = 1
+            if (rnorm .lt. this%abs_tol) exit
+         else
+            p_prev = p_cur
+            p_cur = p_cur + 1
+         end if
+      end do
+
+      ksp_results%res_final = rnorm
+      ksp_results%iter = iter
+
+    end associate
+
+  end function fusedcg_device_solve
+
+end module fusedcg_device
+
+
diff --git a/src/krylov/bcknd/device/gmres_device.F90 b/src/krylov/bcknd/device/gmres_device.F90
index 2a19fda4fe9..35ef3e6b268 100644
--- a/src/krylov/bcknd/device/gmres_device.F90
+++ b/src/krylov/bcknd/device/gmres_device.F90
@@ -1,4 +1,4 @@
-! Copyright (c) 2022, The Neko Authors
+! Copyright (c) 2022-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,7 @@ module gmres_device
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
   use device_identity, only : device_ident_t
-  use math, only : rone, rzero
+  use math, only : rone, rzero, abscmp
   use device_math, only : device_rzero, device_copy, device_glsc3, &
                           device_add2s2, device_add2s1, device_rone, &
                           device_cmult2, device_add2s2_many, device_glsc3_many,&
@@ -52,7 +52,7 @@ module gmres_device
   implicit none
   private
 
-  !> Standard preconditioned generalized minimal residual method 
+  !> Standard preconditioned generalized minimal residual method
   type, public, extends(ksp_t) :: gmres_device_t
      integer :: m_restart
      real(kind=rp), allocatable :: w(:)
@@ -91,7 +91,7 @@ real(c_rp) function hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) &
      end function hip_gmres_part2
   end interface
 #elif HAVE_CUDA
-  
+
   interface
      real(c_rp) function cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) &
           bind(c, name='cuda_gmres_part2')
@@ -106,21 +106,21 @@ end function cuda_gmres_part2
 
 contains
 
-   function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha)
-     type(c_ptr), value :: h_d, w_d, v_d_d, mult_d
-     integer(c_int) :: j, n
-     real(c_rp) :: alpha
-     integer :: ierr
+  function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha)
+    type(c_ptr), value :: h_d, w_d, v_d_d, mult_d
+    integer(c_int) :: j, n
+    real(c_rp) :: alpha
+    integer :: ierr
 #ifdef HAVE_HIP
-     alpha = hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n)
+    alpha = hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n)
 #elif HAVE_CUDA
-     alpha = cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n)
+    alpha = cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n)
 #else
-     call neko_error('No device backend configured')
+    call neko_error('No device backend configured')
 #endif
 
 #ifndef HAVE_DEVICE_MPI
-     if (pe_size .gt. 1) then
+    if (pe_size .gt. 1) then
        call MPI_Allreduce(MPI_IN_PLACE, alpha, 1, &
             MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
     end if
@@ -129,9 +129,10 @@ function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha)
   end function device_gmres_part2
 
   !> Initialise a standard GMRES solver
-  subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol)
+  subroutine gmres_device_init(this, n, max_iter, M, m_restart, rel_tol, abs_tol)
     class(gmres_device_t), target,  intent(inout) :: this
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     class(pc_t), optional, intent(inout), target :: M
     integer, optional, intent(inout) :: m_restart
     real(kind=rp), optional, intent(inout) :: rel_tol
@@ -146,13 +147,13 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol)
     else
        this%m_restart = 30
     end if
-    
+
 
     call this%free()
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
-    else 
+    else
        this%M => M_ident
     end if
 
@@ -160,14 +161,14 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol)
     allocate(this%r(n))
     call device_map(this%w, this%w_d, n)
     call device_map(this%r, this%r_d, n)
-    
+
     allocate(this%c(this%m_restart))
     allocate(this%s(this%m_restart))
     allocate(this%gam(this%m_restart + 1))
     call device_map(this%c, this%c_d, this%m_restart)
     call device_map(this%s, this%s_d, this%m_restart)
     call device_map(this%gam, this%gam_d, this%m_restart+1)
-    
+
     allocate(this%z(n,this%m_restart))
     allocate(this%v(n,this%m_restart))
     allocate(this%h(this%m_restart,this%m_restart))
@@ -184,31 +185,34 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol)
        this%h_d(i) = c_null_ptr
        call device_map(this%h(:,i), this%h_d(i), this%m_restart)
     end do
-    
+
     z_size = c_sizeof(C_NULL_PTR) * (this%m_restart)
     call device_alloc(this%z_d_d, z_size)
     call device_alloc(this%v_d_d, z_size)
     call device_alloc(this%h_d_d, z_size)
     ptr = c_loc(this%z_d)
-    call device_memcpy(ptr,this%z_d_d, z_size, HOST_TO_DEVICE)
+    call device_memcpy(ptr,this%z_d_d, z_size, &
+                       HOST_TO_DEVICE, sync=.false.)
     ptr = c_loc(this%v_d)
-    call device_memcpy(ptr,this%v_d_d, z_size, HOST_TO_DEVICE)
+    call device_memcpy(ptr,this%v_d_d, z_size, &
+                       HOST_TO_DEVICE, sync=.false.)
     ptr = c_loc(this%h_d)
-    call device_memcpy(ptr,this%h_d_d, z_size, HOST_TO_DEVICE)
-    
-       
+    call device_memcpy(ptr,this%h_d_d, z_size, &
+                       HOST_TO_DEVICE, sync=.false.)
+
+
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init(abs_tol)
+       call this%ksp_init(max_iter)
     end if
 
     call device_event_create(this%gs_event, 2)
-          
+
   end subroutine gmres_device_init
 
   !> Deallocate a standard GMRES solver
@@ -229,7 +233,7 @@ subroutine gmres_device_free(this)
     if (allocated(this%r)) then
        deallocate(this%r)
     end if
- 
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
@@ -237,17 +241,17 @@ subroutine gmres_device_free(this)
     if (allocated(this%h)) then
        deallocate(this%h)
     end if
-    
+
     if (allocated(this%v)) then
        deallocate(this%v)
     end if
-    
+
     if (allocated(this%s)) then
        deallocate(this%s)
     end if
     if (allocated(this%gam)) then
        deallocate(this%gam)
-    end if   
+    end if
 
     if (allocated(this%v_d)) then
        do i = 1, this%m_restart
@@ -273,7 +277,7 @@ subroutine gmres_device_free(this)
     end if
 
 
-    
+
     if (c_associated(this%gam_d)) then
        call device_free(this%gam_d)
     end if
@@ -295,9 +299,9 @@ subroutine gmres_device_free(this)
     if (c_associated(this%gs_event)) then
        call device_event_destroy(this%gs_event)
     end if
-    
+
   end subroutine gmres_device_free
- 
+
   !> Standard GMRES solve
   function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(gmres_device_t), intent(inout) :: this
@@ -310,7 +314,7 @@ function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(k
     type(gs_t), intent(inout) :: gs_h
     type(ksp_monitor_t) :: ksp_results
     integer, optional, intent(in) :: niter
-    integer :: iter 
+    integer :: iter, max_iter
     integer :: i, j, k
     real(kind=rp) :: rnorm, alpha, temp, lr, alpha2, norm_fac
     logical :: conv
@@ -321,138 +325,146 @@ function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(k
     conv = .false.
     iter = 0
 
-     associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
+    if (present(niter)) then
+       max_iter = niter
+    else
+       max_iter = this%max_iter
+    end if
+
+    associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, &
           v => this%v, s => this%s, gam => this%gam, v_d => this%v_d, &
           w_d => this%w_d, r_d => this%r_d, h_d => this%h_d, v_d_d => this%v_d_d, &
           x_d => x%x_d, z_d_d => this%z_d_d, c_d => this%c_d)
 
-       norm_fac = 1.0_rp / sqrt(coef%volume)
-       call rzero(gam, this%m_restart + 1)
-       call rone(s, this%m_restart)
-       call rone(c, this%m_restart)
-       call rzero(h, this%m_restart * this%m_restart)
-       call device_rzero(x%x_d, n)
-       call device_rzero(this%gam_d, this%m_restart + 1)
-       call device_rone(this%s_d, this%m_restart)
-       call device_rone(this%c_d, this%m_restart)
-
-       call rzero(this%h, this%m_restart**2)
+      norm_fac = 1.0_rp / sqrt(coef%volume)
+      call rzero(gam, this%m_restart + 1)
+      call rone(s, this%m_restart)
+      call rone(c, this%m_restart)
+      call rzero(h, this%m_restart * this%m_restart)
+      call device_rzero(x%x_d, n)
+      call device_rzero(this%gam_d, this%m_restart + 1)
+      call device_rone(this%s_d, this%m_restart)
+      call device_rone(this%c_d, this%m_restart)
+
+      call rzero(this%h, this%m_restart**2)
 !       do j = 1, this%m_restart
 !          call device_rzero(h_d(j), this%m_restart)
 !       end do
-       do while (.not. conv .and. iter .lt. niter)
-
-          if(iter.eq.0) then               
-             call device_copy(r_d, f_d, n) 
-          else
-             call device_copy(r_d, f_d, n)      
-             call Ax%compute(w, x%x, coef, x%msh, x%Xh)
-             call gs_h%op(w, n, GS_OP_ADD)
-             call device_event_sync(this%gs_event)
-             call bc_list_apply(blst, w, n)
-             call device_sub2(r_d, w_d, n) 
-          end if
-          
-          gam(1) = sqrt(device_glsc3(r_d, r_d, coef%mult_d, n))
-          if(iter.eq.0) then
-             ksp_results%res_start = gam(1) * norm_fac
-          end if
-
-          if (gam(1) .eq. 0) return
-
-          rnorm = 0.0_rp
-          temp = 1.0_rp / gam(1)
-          call device_cmult2(v_d(1), r_d, temp, n) 
-          do j = 1, this%m_restart
-             iter = iter+1
-          
-             call this%M%solve(z(1,j), v(1,j), n)
-
-             call Ax%compute(w, z(1,j), coef, x%msh, x%Xh)
-             call gs_h%op(w, n, GS_OP_ADD)
-             call device_event_sync(this%gs_event)
-             call bc_list_apply(blst, w, n)
-
-             if (NEKO_BCKND_OPENCL .eq. 1) then
-                do i = 1, j
-                   h(i,j) = device_glsc3(w_d, v_d(i), coef%mult_d, n) 
-
-                   call device_add2s2(w_d, v_d(i), -h(i,j), n)
-
-                   alpha2 = device_glsc3(w_d, w_d, coef%mult_d, n)
-                end do
-             else
-                call device_glsc3_many(h(1,j), w_d, v_d_d, coef%mult_d, j, n)
-            
-                call device_memcpy(h(:,j), h_d(j), j, HOST_TO_DEVICE)
-
-                alpha2 = device_gmres_part2(w_d, v_d_d, h_d(j), coef%mult_d, j, n)
-                
-             end if
-             
-             alpha = sqrt(alpha2)
-             do i=1,j-1
-                temp = h(i,j)                   
-                h(i  ,j) =  c(i)*temp + s(i) * h(i+1,j)  
-                h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
-             end do
-             
-             rnorm = 0.0_rp
-             if(alpha .eq. 0.0_rp) then 
-                conv = .true.
-                exit
-             end if
-             
-             lr = sqrt(h(j,j) * h(j,j) + alpha**2)
-             temp = 1.0_rp / lr
-             c(j) = h(j,j) * temp
-             s(j) = alpha  * temp
-             h(j,j) = lr
-             call device_memcpy(h(:,j), h_d(j), j, HOST_TO_DEVICE)
-             gam(j+1) = -s(j) * gam(j)
-             gam(j)   =  c(j) * gam(j)
-             
-             rnorm = abs(gam(j+1)) * norm_fac
-             if (rnorm .lt. this%abs_tol) then 
-                conv = .true.
-                exit
-             end if
-             
-             if (iter + 1 .gt. niter) exit
-             
-             if( j .lt. this%m_restart) then
-                temp = 1.0_rp / alpha
-                call device_cmult2(v_d(j+1), w_d, temp, n)
-             end if
-             
-          end do
-
-          j = min(j, this%m_restart)
-          do k = j, 1, -1
-             temp = gam(k)
-             do i = j, k+1, -1
-                temp = temp - h(k,i) * c(i)
-             end do
-             c(k) = temp / h(k,k)
-          end do
-
-          if (NEKO_BCKND_OPENCL .eq. 1) then
-             do i = 1, j
-                call device_add2s2(x_d, this%z_d(i), c(i), n)
-             end do
-          else
-             call device_memcpy(c, c_d, j, HOST_TO_DEVICE)
-             call device_add2s2_many(x_d, z_d_d, c_d, j, n)
-          end if
-       end do
-
-     end associate
-
-       ksp_results%res_final = rnorm
-       ksp_results%iter = iter
+      do while (.not. conv .and. iter .lt. max_iter)
+
+         if(iter.eq.0) then
+            call device_copy(r_d, f_d, n)
+         else
+            call device_copy(r_d, f_d, n)
+            call Ax%compute(w, x%x, coef, x%msh, x%Xh)
+            call gs_h%op(w, n, GS_OP_ADD, this%gs_event)
+            call device_event_sync(this%gs_event)
+            call bc_list_apply(blst, w, n)
+            call device_sub2(r_d, w_d, n)
+         end if
+
+         gam(1) = sqrt(device_glsc3(r_d, r_d, coef%mult_d, n))
+         if(iter.eq.0) then
+            ksp_results%res_start = gam(1) * norm_fac
+         end if
+
+         if (abscmp(gam(1), 0.0_rp)) return
+
+         rnorm = 0.0_rp
+         temp = 1.0_rp / gam(1)
+         call device_cmult2(v_d(1), r_d, temp, n)
+         do j = 1, this%m_restart
+            iter = iter+1
+
+            call this%M%solve(z(1,j), v(1,j), n)
+
+            call Ax%compute(w, z(1,j), coef, x%msh, x%Xh)
+            call gs_h%op(w, n, GS_OP_ADD, this%gs_event)
+            call device_event_sync(this%gs_event)
+            call bc_list_apply(blst, w, n)
+
+            if (NEKO_BCKND_OPENCL .eq. 1) then
+               do i = 1, j
+                  h(i,j) = device_glsc3(w_d, v_d(i), coef%mult_d, n)
+
+                  call device_add2s2(w_d, v_d(i), -h(i,j), n)
+
+                  alpha2 = device_glsc3(w_d, w_d, coef%mult_d, n)
+               end do
+            else
+               call device_glsc3_many(h(1,j), w_d, v_d_d, coef%mult_d, j, n)
+
+               call device_memcpy(h(:,j), h_d(j), j, &
+                                   HOST_TO_DEVICE, sync=.false.)
+
+               alpha2 = device_gmres_part2(w_d, v_d_d, h_d(j), coef%mult_d, j, n)
+
+            end if
+
+            alpha = sqrt(alpha2)
+            do i=1,j-1
+               temp = h(i,j)
+               h(i  ,j) =  c(i)*temp + s(i) * h(i+1,j)
+               h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j)
+            end do
+
+            rnorm = 0.0_rp
+            if(abscmp(alpha, 0.0_rp)) then
+               conv = .true.
+               exit
+            end if
+
+            lr = sqrt(h(j,j) * h(j,j) + alpha**2)
+            temp = 1.0_rp / lr
+            c(j) = h(j,j) * temp
+            s(j) = alpha  * temp
+            h(j,j) = lr
+            call device_memcpy(h(:,j), h_d(j), j, &
+                                HOST_TO_DEVICE, sync=.false.)
+            gam(j+1) = -s(j) * gam(j)
+            gam(j)   =  c(j) * gam(j)
+
+            rnorm = abs(gam(j+1)) * norm_fac
+            if (rnorm .lt. this%abs_tol) then
+               conv = .true.
+               exit
+            end if
+
+            if (iter + 1 .gt. max_iter) exit
+
+            if( j .lt. this%m_restart) then
+               temp = 1.0_rp / alpha
+               call device_cmult2(v_d(j+1), w_d, temp, n)
+            end if
+
+         end do
+
+         j = min(j, this%m_restart)
+         do k = j, 1, -1
+            temp = gam(k)
+            do i = j, k+1, -1
+               temp = temp - h(k,i) * c(i)
+            end do
+            c(k) = temp / h(k,k)
+         end do
+
+         if (NEKO_BCKND_OPENCL .eq. 1) then
+            do i = 1, j
+               call device_add2s2(x_d, this%z_d(i), c(i), n)
+            end do
+         else
+            call device_memcpy(c, c_d, j, HOST_TO_DEVICE, sync=.false.)
+            call device_add2s2_many(x_d, z_d_d, c_d, j, n)
+         end if
+      end do
+
+    end associate
+
+    ksp_results%res_final = rnorm
+    ksp_results%iter = iter
 
   end function gmres_device_solve
 
 end module gmres_device
-  
+
 
diff --git a/src/krylov/bcknd/device/hip/fusedcg_aux.hip b/src/krylov/bcknd/device/hip/fusedcg_aux.hip
new file mode 100644
index 00000000000..5174f10832a
--- /dev/null
+++ b/src/krylov/bcknd/device/hip/fusedcg_aux.hip
@@ -0,0 +1,132 @@
+/*
+ Copyright (c) 2021-2024, The Neko Authors
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above
+     copyright notice, this list of conditions and the following
+     disclaimer in the documentation and/or other materials provided
+     with the distribution.
+
+   * Neither the name of the authors nor the names of its
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <hip/hip_runtime.h>
+#include "fusedcg_kernel.h"
+#include <device/device_config.h>
+#include <device/hip/check.h>
+
+/**
+ * @todo Make sure that this gets deleted at some point...
+ */
+real *fusedcg_buf = NULL;
+real *fusedcg_buf_d = NULL;
+int fusedcg_buf_len = 0;
+
+extern "C" {
+
+#include <math/bcknd/device/device_mpi_reduce.h>
+#include <math/bcknd/device/device_mpi_op.h>
+
+
+  
+  void hip_fusedcg_update_p(void *p, void *z, void *po, real *beta, int *n) {
+        
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const hipStream_t stream = (hipStream_t) glb_cmd_queue;
+    
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_update_p_kernel<real>),
+                       nblcks, nthrds, 0, stream,
+                       (real *) p, (real *) z, (real *) po, *beta, *n);
+    HIP_CHECK(hipGetLastError());
+
+  }
+
+  void hip_fusedcg_update_x(void *x, void *p, void *alpha, int *p_cur, int *n) {
+
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const hipStream_t stream = (hipStream_t) glb_cmd_queue;
+    
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_update_x_kernel<real>),
+                       nblcks, nthrds, 0, stream,
+                       (real *) x, (const real **) p,
+                       (const real *) alpha, *p_cur, *n);
+    HIP_CHECK(hipGetLastError());
+  }
+
+
+  real hip_fusedcg_part2(void *a, void *b, void *c,
+                          void *alpha_d , real *alpha, int *p_cur, int * n) {
+    
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const int nb = ((*n) + 1024 - 1)/ 1024;
+    const hipStream_t stream = (hipStream_t) glb_cmd_queue;
+
+    if (fusedcg_buf != NULL && fusedcg_buf_len < nb) {
+      HIP_CHECK(hipHostFree(fusedcg_buf));
+      HIP_CHECK(hipFree(fusedcg_buf_d));
+      fusedcg_buf = NULL;
+    }
+    
+    if (fusedcg_buf == NULL){
+      HIP_CHECK(hipHostMalloc(&fusedcg_buf, 2*sizeof(real)));
+      HIP_CHECK(hipMalloc(&fusedcg_buf_d, nb*sizeof(real)));
+      fusedcg_buf_len = nb;
+    }
+
+    /* Store alpha(p_cur) in pinned memory */
+    fusedcg_buf[1] = (*alpha);
+
+    /* Update alpha_d(p_cur) = alpha(p_cur) */
+    real *alpha_d_p_cur = ((real *) alpha_d) + ((*p_cur - 1));   
+    HIP_CHECK(hipMemcpyAsync(alpha_d_p_cur, &fusedcg_buf[1], sizeof(real),
+                               hipMemcpyHostToDevice, stream));
+   
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_part2_kernel<real>),
+                       nblcks, nthrds, 0, stream,
+                       (real *) a, (real *) b, (real *) c,
+                       *alpha, fusedcg_buf_d, * n);
+    HIP_CHECK(hipGetLastError());
+
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
+                       1, 1024, 0, stream, fusedcg_buf_d, nb);
+    HIP_CHECK(hipGetLastError());
+
+#ifdef HAVE_DEVICE_MPI
+    hipStreamSynchronize(stream);
+    device_mpi_allreduce(fusedcg_buf_d, fusedcg_buf, 1,
+                         sizeof(real), DEVICE_MPI_SUM);
+#else
+    HIP_CHECK(hipMemcpyAsync(fusedcg_buf, fusedcg_buf_d, sizeof(real),
+                               hipMemcpyDeviceToHost, stream));
+    hipStreamSynchronize(stream);
+#endif
+
+    return fusedcg_buf[0];
+  }
+}
+
diff --git a/src/krylov/bcknd/device/hip/fusedcg_kernel.h b/src/krylov/bcknd/device/hip/fusedcg_kernel.h
new file mode 100644
index 00000000000..1a62caf1af4
--- /dev/null
+++ b/src/krylov/bcknd/device/hip/fusedcg_kernel.h
@@ -0,0 +1,124 @@
+#ifndef __KRYLOV_FUSEDCG_KERNEL_H__
+#define __KRYLOV_FUSEDCG_KERNEL_H__
+/*
+ Copyright (c) 2021-2024, The Neko Authors
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above
+     copyright notice, this list of conditions and the following
+     disclaimer in the documentation and/or other materials provided
+     with the distribution.
+
+   * Neither the name of the authors nor the names of its
+     contributors may be used to endorse or promote products derived
+     from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math/bcknd/device/hip/math_kernel.h>
+
+/**
+ * Kernel for update of p
+ */
+template< typename T >
+__global__ void fusedcg_update_p_kernel(T * __restrict__  p,
+                                    const T * __restrict__ z,
+                                    const T * __restrict__ po,
+                                    const T beta,
+                                    const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < n; i+= str) {
+    p[i] = beta*po[i] + z[i];
+  }
+  
+}
+
+/**
+ * Kernel for update of x
+ */
+template< typename T >
+__global__ void fusedcg_update_x_kernel(T * __restrict__  x,
+                                        const T ** p,
+                                        const T * __restrict__ alpha,
+                                        const int p_cur,
+                                        const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < n; i+= str) {
+    T tmp = 0.0;
+    for (int j = 0; j < p_cur; j ++) {
+      tmp += p[j][i] * alpha[j];
+    }
+    x[i] += tmp;
+  }
+  
+}
+
+/**
+ * Device kernel for fusedcg_part2
+ */
+template< typename T>
+__global__ void fusedcg_part2_kernel(T * __restrict__ a,
+                                     const T * __restrict__ b,
+                                     const T * __restrict__ c,
+                                     const T alpha,
+                                     T * buf_h,
+                                     const int n) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  const unsigned int lane = threadIdx.x % warpSize;
+  const unsigned int wid = threadIdx.x / warpSize;
+  
+  __shared__ T buf[64];
+  T tmp = 0.0;
+
+  for (int i = idx; i < n; i+= str) {
+    T rt = a[i] - alpha * c[i];
+    tmp = tmp + rt * b[i] * rt;
+    a[i] = rt;
+  }
+
+  tmp = reduce_warp<T>(tmp);
+  if (lane == 0) {
+    buf[wid] = tmp;
+  }
+  __syncthreads();
+  
+  tmp = (threadIdx.x < blockDim.x / warpSize) ? buf[lane] : 0;
+  if (wid == 0) {
+    tmp = reduce_warp<T>(tmp);
+  }
+
+  if (threadIdx.x == 0) {
+    buf_h[blockIdx.x] = tmp;
+  }
+}
+                                     
+
+#endif // __KRYLOV_FUSEDCG_KERNEL_H__
diff --git a/src/krylov/bcknd/device/hip/gmres_kernel.h b/src/krylov/bcknd/device/hip/gmres_kernel.h
index e47e724e870..0f417d9b737 100644
--- a/src/krylov/bcknd/device/hip/gmres_kernel.h
+++ b/src/krylov/bcknd/device/hip/gmres_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __KRYLOV_GMRES_KERNEL_H__
+#define __KRYLOV_GMRES_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -80,3 +82,5 @@ __global__ void gmres_part2_kernel(T  * __restrict__  w,
 }
 
 
+
+#endif // __KRYLOV_GMRES_KERNEL_H__
diff --git a/src/krylov/bcknd/device/hip/pipecg_kernel.h b/src/krylov/bcknd/device/hip/pipecg_kernel.h
index c197526034a..790a15649a9 100644
--- a/src/krylov/bcknd/device/hip/pipecg_kernel.h
+++ b/src/krylov/bcknd/device/hip/pipecg_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __KRYLOV_PIPECG_KERNEL_H__
+#define __KRYLOV_PIPECG_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -138,3 +140,5 @@ __global__ void pipecg_vecops_kernel(T  * __restrict__  p,
   }
   
 }
+
+#endif // __KRYLOV_PIPECG_KERNEL_H__
diff --git a/src/krylov/bcknd/device/opencl/jacobi_kernel.cl b/src/krylov/bcknd/device/opencl/jacobi_kernel.cl
index 5e9027fb5af..fd225b50a85 100644
--- a/src/krylov/bcknd/device/opencl/jacobi_kernel.cl
+++ b/src/krylov/bcknd/device/opencl/jacobi_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __KRYLOV_JACOBI_KERNEL_CL__
+#define __KRYLOV_JACOBI_KERNEL_CL__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -115,3 +117,5 @@ DEFINE_JACOBI_KERNEL(14)
 DEFINE_JACOBI_KERNEL(15)
 DEFINE_JACOBI_KERNEL(16)
 
+
+#endif // __KRYLOV_JACOBI_KERNEL_CL__
diff --git a/src/krylov/bcknd/device/pc_identity_device.f90 b/src/krylov/bcknd/device/pc_identity_device.f90
index 69811e54557..20d56b91c30 100644
--- a/src/krylov/bcknd/device/pc_identity_device.f90
+++ b/src/krylov/bcknd/device/pc_identity_device.f90
@@ -35,14 +35,14 @@ module device_identity
   use device
   use device_math
   use precon, only : pc_t
-  use num_types, only : rp    
+  use num_types, only : rp
   use, intrinsic :: iso_c_binding, only : c_ptr
   implicit none
   private
-  
+
   !> Defines a canonical Krylov preconditioner for accelerators
   type, public, extends(pc_t) :: device_ident_t
-  contains
+   contains
      procedure, pass(this) :: solve => device_ident_solve
      procedure, pass(this) :: update => device_ident_update
   end type device_ident_t
@@ -56,17 +56,17 @@ subroutine device_ident_solve(this, z, r, n)
     real(kind=rp), dimension(n), intent(inout) :: z
     real(kind=rp), dimension(n), intent(inout) :: r
     type(c_ptr) :: z_d, r_d
-    
+
     z_d = device_get_ptr(z)
     r_d = device_get_ptr(r)
-    
+
     call device_copy(z_d, r_d, n)
-    
+
   end subroutine device_ident_solve
-  
+
   !> Mandatory update routine (NOP)
   subroutine device_ident_update(this)
     class(device_ident_t), intent(inout) :: this
   end subroutine device_ident_update
-  
+
 end module device_identity
diff --git a/src/krylov/bcknd/device/pc_jacobi_device.F90 b/src/krylov/bcknd/device/pc_jacobi_device.F90
index 6f26a2435a5..6cd20641a89 100644
--- a/src/krylov/bcknd/device/pc_jacobi_device.F90
+++ b/src/krylov/bcknd/device/pc_jacobi_device.F90
@@ -66,7 +66,7 @@ subroutine hip_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
        integer(c_int) :: nelv, lx
      end subroutine hip_jacobi_update
   end interface
-  
+
   interface
      subroutine cuda_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, &
           G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) &
@@ -90,7 +90,7 @@ end subroutine opencl_jacobi_update
   end interface
 
 contains
-  
+
   subroutine device_jacobi_init(this, coef, dof, gs_h)
     class(device_jacobi_t), intent(inout) :: this
     type(coef_t), intent(inout), target :: coef
@@ -118,7 +118,7 @@ subroutine device_jacobi_free(this)
        call device_free(this%d_d)
        this%d_d = C_NULL_PTR
     end if
-    
+
     if (allocated(this%d)) then
        deallocate(this%d)
     end if
@@ -136,12 +136,12 @@ subroutine device_jacobi_solve(this, z, r, n)
     real(kind=rp), dimension(n), intent(inout) :: z
     real(kind=rp), dimension(n), intent(inout) :: r
     type(c_ptr) :: z_d, r_d
-    
+
     z_d = device_get_ptr(z)
     r_d = device_get_ptr(r)
-    
+
     call device_col3(z_d, r_d, this%d_d, n)
-    
+
   end subroutine device_jacobi_solve
 
   subroutine device_jacobi_update(this)
@@ -176,7 +176,7 @@ subroutine device_jacobi_update(this)
       if (coef%ifh2) then
          call device_addcol3(this%d_d, coef%h2_d, coef%B_d, coef%dof%size())
       end if
-      
+
       call gs_h%op(this%d, dof%size(), GS_OP_ADD)
 
       call device_invcol1(this%d_d, dof%size())
diff --git a/src/krylov/bcknd/device/pipecg_device.F90 b/src/krylov/bcknd/device/pipecg_device.F90
index 4ab0f234d10..829e95fea55 100644
--- a/src/krylov/bcknd/device/pipecg_device.F90
+++ b/src/krylov/bcknd/device/pipecg_device.F90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021-2023, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,8 +40,9 @@ module pipecg_device
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, copy
-  use device_math, only : device_rzero, device_copy, device_glsc3
+  use math, only : glsc3, rzero, copy, abscmp
+  use device_math, only : device_rzero, device_copy, &
+       device_glsc3, device_vlsc3
   use device
   use comm
   implicit none
@@ -80,7 +81,7 @@ module pipecg_device
      procedure, pass(this) :: free => pipecg_device_free
      procedure, pass(this) :: solve => pipecg_device_solve
   end type pipecg_device_t
-  
+
 #ifdef HAVE_CUDA
   interface
      subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
@@ -95,7 +96,7 @@ subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
        real(c_rp) :: alpha, beta, reduction(3)
      end subroutine cuda_pipecg_vecops
   end interface
-  
+
   interface
      subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
                                   p_cur, p_space, n) &
@@ -132,9 +133,9 @@ subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, &
      end subroutine hip_cg_update_xp
   end interface
 #endif
-  
+
 contains
-  
+
   subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
        w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
     type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2
@@ -143,15 +144,15 @@ subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, &
     real(c_rp) :: alpha, beta, reduction(3)
 #ifdef HAVE_HIP
     call hip_pipecg_vecops(p_d, q_d, r_d,&
-       s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) 
+       s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
 #elif HAVE_CUDA
     call cuda_pipecg_vecops(p_d, q_d, r_d,&
-       s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) 
+       s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n)
 #else
     call neko_error('No device backend configured')
 #endif
   end subroutine device_pipecg_vecops
-  
+
   subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
     use, intrinsic :: iso_c_binding
     type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta
@@ -164,18 +165,19 @@ subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_cg_update_xp
-  
+
   !> Initialise a pipelined PCG solver
-  subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol)
+  subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(pipecg_device_t), target, intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
     type(c_ptr) :: ptr
     integer(c_size_t) :: u_size
     integer :: i
-        
+
     call this%free()
 
     allocate(this%p(n))
@@ -190,8 +192,8 @@ subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol)
     allocate(this%ni(n))
     allocate(this%alpha(DEVICE_PIPECG_P_SPACE))
     allocate(this%beta(DEVICE_PIPECG_P_SPACE))
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
@@ -213,20 +215,21 @@ subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol)
     u_size = 8*(DEVICE_PIPECG_P_SPACE+1)
     call device_alloc(this%u_d_d, u_size)
     ptr = c_loc(this%u_d)
-    call device_memcpy(ptr,this%u_d_d, u_size, HOST_TO_DEVICE)
+    call device_memcpy(ptr,this%u_d_d, u_size, &
+                       HOST_TO_DEVICE, sync=.false.)
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
 
     call device_event_create(this%gs_event, 2)
-          
+
   end subroutine pipecg_device_init
 
   !> Deallocate a pipelined PCG solver
@@ -269,7 +272,7 @@ subroutine pipecg_device_free(this)
     if (allocated(this%beta)) then
        deallocate(this%beta)
     end if
-    
+
 
     if (c_associated(this%p_d)) then
        call device_free(this%p_d)
@@ -319,7 +322,7 @@ subroutine pipecg_device_free(this)
     end if
 
   end subroutine pipecg_device_free
-  
+
   !> Pipelined PCG solve
   function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(pipecg_device_t), intent(inout) :: this
@@ -344,7 +347,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = 1.0_rp / sqrt(coef%volume)
 
@@ -355,7 +358,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
          p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, &
          s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, &
          w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d)
-      
+
       p_prev = DEVICE_PIPECG_P_SPACE !this%p_space
       u_prev = DEVICE_PIPECG_P_SPACE + 1 !this%p_space+1
       p_cur = 1
@@ -372,21 +375,21 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
       call gs_h%op(w, n, GS_OP_ADD, this%gs_event)
       call device_event_sync(this%gs_event)
       call bc_list_apply(blst, w, n)
-    
+
       rtr = device_glsc3(r_d, coef%mult_d, r_d, n)
       rnorm = sqrt(rtr)*norm_fac
       ksp_results%res_start = rnorm
       ksp_results%res_final = rnorm
       ksp_results%iter = 0
-      if(rnorm .eq. 0.0_rp) return
+      if(abscmp(rnorm, 0.0_rp)) return
 
       gamma1 = 0.0_rp
       tmp1 = 0.0_rp
       tmp2 = 0.0_rp
       tmp3 = 0.0_rp
-      tmp1 = device_glsc3(r_d,coef%mult_d,u_d(u_prev),n)
-      tmp2 = device_glsc3(w_d,coef%mult_d,u_d(u_prev),n)
-      tmp3 = device_glsc3(r_d,coef%mult_d,r_d,n)
+      tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n)
+      tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n)
+      tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n)
       reduction(1) = tmp1
       reduction(2) = tmp2
       reduction(3) = tmp3
@@ -394,7 +397,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
       do iter = 1, max_iter
          call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, &
               MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr)
-         
+
          call this%M%solve(mi, w, n)
          call Ax%compute(ni, mi, coef, x%msh, x%Xh)
          call gs_h%op(ni, n, GS_OP_ADD, this%gs_event)
@@ -402,7 +405,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
          call bc_list_apply(blst, ni, n)
 
          call MPI_Wait(request, status, ierr)
-         gamma2 = gamma1       
+         gamma2 = gamma1
          gamma1 = reduction(1)
          delta = reduction(2)
          rtr = reduction(3)
@@ -414,24 +417,26 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
          if (iter .gt. 1) then
             beta(p_cur) = gamma1 / gamma2
             alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev)))
-         else 
+         else
             beta(p_cur) = 0.0_rp
             alpha(p_cur) = gamma1/delta
          end if
-                  
+
          call device_pipecg_vecops(p_d, q_d, r_d,&
                                  s_d, u_d(u_prev), u_d(p_cur),&
                                  w_d, z_d, ni_d,&
                                  mi_d, alpha(p_cur), beta(p_cur),&
-                                 coef%mult_d, reduction,n) 
+                                 coef%mult_d, reduction, n)
          if (p_cur .eq. DEVICE_PIPECG_P_SPACE) then
-            call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE)
-            call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE)
+            call device_memcpy(alpha, alpha_d, p_cur, &
+                               HOST_TO_DEVICE, sync=.false.)
+            call device_memcpy(beta, beta_d, p_cur, &
+                               HOST_TO_DEVICE, sync=.false.)
             call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
                                      DEVICE_PIPECG_P_SPACE, n)
             p_prev = p_cur
             u_prev = DEVICE_PIPECG_P_SPACE + 1
-            alpha(1) = alpha(p_cur) 
+            alpha(1) = alpha(p_cur)
             beta(1) = beta(p_cur)
             p_cur = 1
          else
@@ -440,21 +445,21 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(
             p_cur = p_cur + 1
          end if
       end do
-      
+
       if ( p_cur .ne. 1) then
-         call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE)
-         call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE)
+         call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE, sync=.false.)
+         call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE, sync=.false.)
          call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, &
                                   DEVICE_PIPECG_P_SPACE, n)
       end if
 
       ksp_results%res_final = rnorm
       ksp_results%iter = iter
-      
+
     end associate
-    
+
   end function pipecg_device_solve
-   
+
 end module pipecg_device
-  
+
 
diff --git a/src/krylov/bcknd/sx/cg_sx.f90 b/src/krylov/bcknd/sx/cg_sx.f90
index f7caa5a08e1..90d5fc8088e 100644
--- a/src/krylov/bcknd/sx/cg_sx.f90
+++ b/src/krylov/bcknd/sx/cg_sx.f90
@@ -40,11 +40,11 @@ module cg_sx
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, copy, add2s1
+  use math, only : glsc3, add2s1, abscmp
   implicit none
   private
 
-  !> Standard preconditioned conjugate gradient method
+  !> Standard preconditioned conjugate gradient method (SX version)
   type, public, extends(ksp_t) :: sx_cg_t
      real(kind=rp), allocatable :: w(:)
      real(kind=rp), allocatable :: r(:)
@@ -59,34 +59,35 @@ module cg_sx
 contains
 
   !> Initialise a standard PCG solver
-  subroutine sx_cg_init(this, n, M, rel_tol, abs_tol)
+  subroutine sx_cg_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(sx_cg_t), intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
-        
+
     call this%free()
-    
+
     allocate(this%w(n))
     allocate(this%r(n))
     allocate(this%p(n))
     allocate(this%z(n))
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine sx_cg_init
 
   !> Deallocate a standard PCG solver
@@ -106,7 +107,7 @@ subroutine sx_cg_free(this)
     if (allocated(this%p)) then
        deallocate(this%p)
     end if
-    
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
@@ -114,7 +115,7 @@ subroutine sx_cg_free(this)
     nullify(this%M)
 
   end subroutine sx_cg_free
-  
+
   !> Standard PCG solve
   function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(sx_cg_t), intent(inout) :: this
@@ -132,11 +133,11 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
     integer :: i, iter, max_iter
     real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1
     real(kind=rp) :: beta, pap, alpha, alphm, norm_fac
-    
+
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = one / sqrt(coef%volume)
 
@@ -152,7 +153,7 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
     ksp_results%res_start = rnorm
     ksp_results%res_final = rnorm
     ksp_results%iter = 0
-    if(rnorm .eq. zero) return
+    if(abscmp(rnorm, zero)) return
 
     do iter = 1, max_iter
        call this%M%solve(this%z, this%r, n)
@@ -162,7 +163,7 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
        beta = rtz1 / rtz2
        if (iter .eq. 1) beta = zero
        call add2s1(this%p, this%z, beta, n)
-       
+
        call Ax%compute(this%w, this%p, coef, x%msh, x%Xh)
        call gs_h%op(this%w, n, GS_OP_ADD)
        call bc_list_apply(blst, this%w, n)
@@ -188,5 +189,5 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu
   end function sx_cg_solve
 
 end module cg_sx
-  
+
 
diff --git a/src/krylov/bcknd/sx/gmres_sx.f90 b/src/krylov/bcknd/sx/gmres_sx.f90
index 7ff7f925750..0f4e3a0b9cc 100644
--- a/src/krylov/bcknd/sx/gmres_sx.f90
+++ b/src/krylov/bcknd/sx/gmres_sx.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2021, The Neko Authors
+! Copyright (c) 2021-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -40,12 +40,12 @@ module gmres_sx
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, rone, copy, cmult2, col2, col3, add2s2
+  use math, only : glsc3, rzero, rone, copy, cmult2, col2, col3, add2s2, abscmp
   use comm
   implicit none
   private
 
-  !> Standard preconditioned conjugate gradient method
+  !> Standard preconditioned generalized minimal residual method (SX version)
   type, public, extends(ksp_t) :: sx_gmres_t
      integer :: lgmres
      real(kind=rp), allocatable :: w(:)
@@ -69,9 +69,10 @@ module gmres_sx
 contains
 
   !> Initialise a standard GMRES solver
-  subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol)
+  subroutine sx_gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol)
     class(sx_gmres_t), intent(inout) :: this
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     class(pc_t), optional, intent(inout), target :: M
     integer, optional, intent(inout) :: lgmres
     real(kind=rp), optional, intent(inout) :: rel_tol
@@ -82,11 +83,11 @@ subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol)
     else
        this%lgmres = 30
     end if
-    
+
 
     call this%free()
-    
-    if (present(M)) then 
+
+    if (present(M)) then
        this%M => M
     end if
 
@@ -95,27 +96,27 @@ subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol)
     allocate(this%ml(n))
     allocate(this%mu(n))
     allocate(this%wk1(n))
-    
+
     allocate(this%c(this%lgmres))
     allocate(this%s(this%lgmres))
     allocate(this%gam(this%lgmres + 1))
-    
+
     allocate(this%z(n,this%lgmres))
     allocate(this%v(n,this%lgmres))
-    
+
     allocate(this%h(this%lgmres,this%lgmres))
-    
-       
+
+
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init(abs_tol)
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine sx_gmres_init
 
   !> Deallocate a standard GMRES solver
@@ -135,7 +136,7 @@ subroutine sx_gmres_free(this)
     if (allocated(this%r)) then
        deallocate(this%r)
     end if
- 
+
     if (allocated(this%z)) then
        deallocate(this%z)
     end if
@@ -143,35 +144,35 @@ subroutine sx_gmres_free(this)
     if (allocated(this%h)) then
        deallocate(this%h)
     end if
-    
+
     if (allocated(this%ml)) then
        deallocate(this%ml)
     end if
-    
+
     if (allocated(this%v)) then
        deallocate(this%v)
     end if
-    
+
     if (allocated(this%s)) then
        deallocate(this%s)
     end if
-    
+
     if (allocated(this%mu)) then
        deallocate(this%mu)
     end if
-    
+
     if (allocated(this%gam)) then
        deallocate(this%gam)
     end if
-    
+
     if (allocated(this%wk1)) then
        deallocate(this%wk1)
     end if
-    
+
     nullify(this%M)
-    
+
   end subroutine sx_gmres_free
- 
+
   !> Standard PCG solve
   function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(sx_gmres_t), intent(inout) :: this
@@ -184,10 +185,10 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
     type(gs_t), intent(inout) :: gs_h
     type(ksp_monitor_t) :: ksp_results
     integer, optional, intent(in) :: niter
-    integer :: iter, glb_n
-    integer :: i, j, k, ierr 
+    integer :: iter, max_iter, glb_n
+    integer :: i, j, k, ierr
     real(kind=rp), parameter :: one = 1.0
-    real(kind=rp) :: rnorm 
+    real(kind=rp) :: rnorm
     real(kind=rp) ::  alpha, temp, l
     real(kind=rp) :: ratio, div0, norm_fac
     logical :: conv
@@ -197,6 +198,12 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
     iter = 0
     glb_n = n / x%msh%nelv * x%msh%glb_nelv
 
+    if (present(niter)) then
+       max_iter = niter
+    else
+       max_iter = this%max_iter
+    end if
+
     call rone(this%ml, n)
     call rone(this%mu, n)
     norm_fac = one / sqrt(coef%volume)
@@ -206,19 +213,19 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
     call rone(this%c, this%lgmres)
     call rzero(this%h, this%lgmres * this%lgmres)
     outer = 0
-    do while (.not. conv .and. iter .lt. niter)
+    do while (.not. conv .and. iter .lt. max_iter)
        outer = outer + 1
 
-       if(iter.eq.0) then               
-          call col3(this%r,this%ml,f,n) 
+       if(iter.eq.0) then
+          call col3(this%r,this%ml,f,n)
        else
           !update residual
-          call copy  (this%r,f,n)      
+          call copy  (this%r,f,n)
           call Ax%compute(this%w, x%x, coef, x%msh, x%Xh)
           call gs_h%op(this%w, n, GS_OP_ADD)
           call bc_list_apply(blst, this%w, n)
-          call add2s2(this%r,this%w,-one,n) 
-          call col2(this%r,this%ml,n)       
+          call add2s2(this%r,this%w,-one,n)
+          call col2(this%r,this%ml,n)
        endif
        this%gam(1) = sqrt(glsc3(this%r, this%r, coef%mult, n))
        if(iter.eq.0) then
@@ -226,11 +233,11 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
           ksp_results%res_start = div0
        endif
 
-       if ( this%gam(1) .eq. 0) return
+       if (abscmp(this%gam(1), 0.0_rp)) return
 
        rnorm = 0.0_rp
        temp = one / this%gam(1)
-       call cmult2(this%v(1,1), this%r, temp, n) 
+       call cmult2(this%v(1,1), this%r, temp, n)
        do j = 1, this%lgmres
           iter = iter+1
           call col3(this%w, this%mu, this%v(1,j), n)
@@ -241,7 +248,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
           call Ax%compute(this%w, this%z(1,j), coef, x%msh, x%Xh)
           call gs_h%op(this%w, n, GS_OP_ADD)
           call bc_list_apply(blst, this%w, n)
-          call col2(this%w, this%ml, n)       
+          call col2(this%w, this%ml, n)
 
           do i = 1, j
              this%h(i,j) = 0.0_rp
@@ -254,7 +261,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
           !Could probably be done inplace...
           call MPI_Allreduce(this%h(1,j), this%wk1, j, &
                MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-          call copy(this%h(1,j), this%wk1, j) 
+          call copy(this%h(1,j), this%wk1, j)
 
           do i = 1, j
              do k = 1, n
@@ -264,16 +271,16 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
 
           !apply Givens rotations to new column
           do i=1,j-1
-             temp = this%h(i,j)                   
-             this%h(i  ,j) =  this%c(i)*temp + this%s(i)*this%h(i+1,j)  
+             temp = this%h(i,j)
+             this%h(i  ,j) =  this%c(i)*temp + this%s(i)*this%h(i+1,j)
              this%h(i+1,j) = -this%s(i)*temp + this%c(i)*this%h(i+1,j)
           end do
 
-          alpha = sqrt(glsc3(this%w, this%w, coef%mult, n))   
+          alpha = sqrt(glsc3(this%w, this%w, coef%mult, n))
           rnorm = 0.0_rp
-          if(alpha .eq. 0.0_rp) then 
-            conv = .true.
-            exit
+          if(abscmp(alpha, 0.0_rp)) then
+             conv = .true.
+             exit
           end if
           l = sqrt(this%h(j,j) * this%h(j,j) + alpha**2)
           temp = one / l
@@ -285,16 +292,16 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
 
           rnorm = abs(this%gam(j+1)) * norm_fac
           ratio = rnorm / div0
-          if (rnorm .lt. this%abs_tol) then 
+          if (rnorm .lt. this%abs_tol) then
              conv = .true.
              exit
           end if
-         
-          if (iter + 1 .gt. niter) exit
-          
+
+          if (iter + 1 .gt. max_iter) exit
+
           if( j .lt. this%lgmres) then
-            temp = one / alpha
-            call cmult2(this%v(1,j+1), this%w, temp, n)
+             temp = one / alpha
+             call cmult2(this%v(1,j+1), this%w, temp, n)
           endif
        end do
        j = min(j, this%lgmres)
@@ -311,7 +318,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
           do k = 1, n
              x%x(k,1,1,1) = x%x(k,1,1,1) + this%c(i) * this%z(k,i)
           end do
-       end do 
+       end do
     end do
 
     ksp_results%res_final = rnorm
@@ -319,5 +326,5 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r
   end function sx_gmres_solve
 
 end module gmres_sx
-  
+
 
diff --git a/src/krylov/bcknd/sx/pc_jacobi_sx.f90 b/src/krylov/bcknd/sx/pc_jacobi_sx.f90
index f7df4b75ad6..8ae01ff8fbf 100644
--- a/src/krylov/bcknd/sx/pc_jacobi_sx.f90
+++ b/src/krylov/bcknd/sx/pc_jacobi_sx.f90
@@ -55,7 +55,7 @@ module sx_jacobi
   end type sx_jacobi_t
 
 contains
-  
+
   subroutine sx_jacobi_init(this, coef, dof, gs_h)
     class(sx_jacobi_t), intent(inout) :: this
     type(coef_t), intent(inout), target :: coef
@@ -229,7 +229,7 @@ subroutine sx_update_lx(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n, lx)
     end do
 
   end subroutine sx_update_lx
-  
+
   subroutine sx_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 14
     integer, parameter :: ly = 14
@@ -308,7 +308,7 @@ subroutine sx_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx14
-  
+
   subroutine sx_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 13
     integer, parameter :: ly = 13
@@ -387,7 +387,7 @@ subroutine sx_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx13
-  
+
   subroutine sx_update_lx12(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 12
     integer, parameter :: ly = 12
@@ -624,7 +624,7 @@ subroutine sx_update_lx10(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx10
-  
+
   subroutine sx_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 9
     integer, parameter :: ly = 9
@@ -703,7 +703,7 @@ subroutine sx_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx9
-  
+
   subroutine sx_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 8
     integer, parameter :: ly = 8
@@ -782,7 +782,7 @@ subroutine sx_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx8
-  
+
   subroutine sx_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 7
     integer, parameter :: ly = 7
@@ -861,7 +861,7 @@ subroutine sx_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
 
 
   end subroutine sx_update_lx7
-  
+
   subroutine sx_update_lx6(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 6
     integer, parameter :: ly = 6
@@ -1019,7 +1019,7 @@ subroutine sx_update_lx5(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
 
 
   end subroutine sx_update_lx5
-  
+
   subroutine sx_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 4
     integer, parameter :: ly = 4
@@ -1098,7 +1098,7 @@ subroutine sx_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
 
 
   end subroutine sx_update_lx4
-  
+
   subroutine sx_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 3
     integer, parameter :: ly = 3
@@ -1177,7 +1177,7 @@ subroutine sx_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     end do
 
   end subroutine sx_update_lx3
-  
+
   subroutine sx_update_lx2(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 2
     integer, parameter :: ly = 2
diff --git a/src/krylov/bcknd/sx/pipecg_sx.f90 b/src/krylov/bcknd/sx/pipecg_sx.f90
index b6d10ffbb31..28b848b0212 100644
--- a/src/krylov/bcknd/sx/pipecg_sx.f90
+++ b/src/krylov/bcknd/sx/pipecg_sx.f90
@@ -40,11 +40,11 @@ module pipecg_sx
   use coefs, only : coef_t
   use gather_scatter, only : gs_t, GS_OP_ADD
   use bc, only : bc_list_t, bc_list_apply
-  use math, only : glsc3, rzero, copy
+  use math, only : glsc3, abscmp
   use comm
   implicit none
   private
-  
+
   !> Pipelined preconditioned conjugate gradient method for SX-Aurora
   type, public, extends(ksp_t) :: sx_pipecg_t
      real(kind=rp), allocatable :: p(:)
@@ -65,15 +65,16 @@ module pipecg_sx
 contains
 
   !> Initialise a pipelined PCG solver
-  subroutine sx_pipecg_init(this, n, M, rel_tol, abs_tol)
+  subroutine sx_pipecg_init(this, n, max_iter, M, rel_tol, abs_tol)
     class(sx_pipecg_t), intent(inout) :: this
     class(pc_t), optional, intent(inout), target :: M
     integer, intent(in) :: n
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(inout) :: rel_tol
     real(kind=rp), optional, intent(inout) :: abs_tol
-        
+
     call this%free()
-    
+
     allocate(this%p(n))
     allocate(this%q(n))
     allocate(this%r(n))
@@ -83,20 +84,20 @@ subroutine sx_pipecg_init(this, n, M, rel_tol, abs_tol)
     allocate(this%z(n))
     allocate(this%mi(n))
     allocate(this%ni(n))
-    if (present(M)) then 
+    if (present(M)) then
        this%M => M
     end if
 
     if (present(rel_tol) .and. present(abs_tol)) then
-       call this%ksp_init(rel_tol, abs_tol)
+       call this%ksp_init(max_iter, rel_tol, abs_tol)
     else if (present(rel_tol)) then
-       call this%ksp_init(rel_tol=rel_tol)
+       call this%ksp_init(max_iter, rel_tol=rel_tol)
     else if (present(abs_tol)) then
-       call this%ksp_init(abs_tol=abs_tol)
+       call this%ksp_init(max_iter, abs_tol=abs_tol)
     else
-       call this%ksp_init()
+       call this%ksp_init(max_iter)
     end if
-          
+
   end subroutine sx_pipecg_init
 
   !> Deallocate a pipelined PCG solver
@@ -137,7 +138,7 @@ subroutine sx_pipecg_free(this)
 
 
   end subroutine sx_pipecg_free
-  
+
   !> Pipelined PCG solve
   function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
     class(sx_pipecg_t), intent(inout) :: this
@@ -151,16 +152,16 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
     type(ksp_monitor_t) :: ksp_results
     integer, optional, intent(in) :: niter
     integer :: iter, max_iter, i, ierr
-    real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac 
+    real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac
     real(kind=rp) :: alpha, beta, gamma1, gamma2, delta
     real(kind=rp) :: tmp1, tmp2, tmp3
     type(MPI_Request) :: request
     type(MPI_Status) :: status
-    
+
     if (present(niter)) then
        max_iter = niter
     else
-       max_iter = KSP_MAX_ITER
+       max_iter = this%max_iter
     end if
     norm_fac = 1.0_rp / sqrt(coef%volume)
 
@@ -177,16 +178,16 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
     call Ax%compute(this%w, this%u, coef, x%msh, x%Xh)
     call gs_h%op(this%w, n, GS_OP_ADD)
     call bc_list_apply(blst, this%w, n)
-    
+
     rtr = glsc3(this%r, coef%mult, this%r, n)
     rnorm = sqrt(rtr)*norm_fac
     ksp_results%res_start = rnorm
     ksp_results%res_final = rnorm
     ksp_results%iter = 0
-    if(rnorm .eq. 0.0_rp) return
+    if(abscmp(rnorm, 0.0_rp)) return
 
     gamma1 = 0.0_rp
-      
+
     do iter = 1, max_iter
 
        tmp1 = 0.0_rp
@@ -200,34 +201,34 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
        reduction(1) = tmp1
        reduction(2) = tmp2
        reduction(3) = tmp3
-       
+
        call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, &
             MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr)
-       
+
        call this%M%solve(this%mi, this%w, n)
        call Ax%compute(this%ni, this%mi, coef, x%msh, x%Xh)
        call gs_h%op(this%ni, n, GS_OP_ADD)
        call bc_list_apply(blst, this%ni, n)
 
        call MPI_Wait(request, status, ierr)
-       gamma2 = gamma1       
+       gamma2 = gamma1
        gamma1 = reduction(1)
        delta = reduction(2)
        rtr = reduction(3)
-       
+
        rnorm = sqrt(rtr)*norm_fac
        if (rnorm .lt. this%abs_tol) then
           exit
        end if
-       
+
        if (iter .gt. 1) then
           beta = gamma1 / gamma2
           alpha = gamma1 / (delta - (beta * gamma1/alpha))
-       else 
+       else
           beta = 0.0_rp
           alpha = gamma1/delta
        end if
-       
+
        do i = 1, n
           this%z(i) = beta * this%z(i) + this%ni(i)
           this%q(i) = beta * this%q(i) + this%mi(i)
@@ -241,14 +242,14 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_
           this%u(i) = this%u(i) - alpha * this%q(i)
           this%w(i) = this%w(i) - alpha * this%z(i)
        end do
-       
+
     end do
-    
+
     ksp_results%res_final = rnorm
     ksp_results%iter = iter
-    
+
   end function sx_pipecg_solve
-   
+
 end module pipecg_sx
-  
+
 
diff --git a/src/krylov/krylov.f90 b/src/krylov/krylov.f90
index 88cfe377436..00a689b87e7 100644
--- a/src/krylov/krylov.f90
+++ b/src/krylov/krylov.f90
@@ -40,26 +40,25 @@ module krylov
   use mesh, only : mesh_t
   use field, only : field_t
   use utils, only : neko_error, neko_warning
-  use bc, only : bc_list_t, bc_list_apply_vector, bc_list_apply_scalar, &
-                 bc_list_apply
+  use bc, only : bc_list_t
   use identity, only : ident_t
   use device_identity, only : device_ident_t
   use neko_config
   implicit none
   private
 
-  integer, public, parameter :: KSP_MAX_ITER = 1e4 !< Maximum number of iters.
+  integer, public, parameter :: KSP_MAX_ITER = 1e4       !< Maximum number of iters.
   real(kind=rp), public, parameter :: KSP_ABS_TOL = 1d-9 !< Absolut tolerance
   real(kind=rp), public, parameter :: KSP_REL_TOL = 1d-9 !< Relative tolerance
 
   !> Type for storing initial and final residuals in a Krylov solver.
   type, public :: ksp_monitor_t
-    !> Iteration number.
-    integer :: iter
-    !> Initial residual.
-    real(kind=rp) :: res_start
-    !> FInal residual
-    real(kind=rp) :: res_final
+     !> Iteration number.
+     integer :: iter
+     !> Initial residual.
+     real(kind=rp) :: res_start
+     !> FInal residual
+     real(kind=rp) :: res_final
   end type ksp_monitor_t
 
   !> Base abstract type for a canonical Krylov method, solving \f$ Ax = f \f$.
@@ -67,20 +66,26 @@ module krylov
      class(pc_t), pointer :: M => null() !< Preconditioner
      real(kind=rp) :: rel_tol            !< Relative tolerance
      real(kind=rp) :: abs_tol            !< Absolute tolerance
+     integer :: max_iter                 !< Maximum number of iterations
      class(pc_t), allocatable :: M_ident !< Internal preconditioner (Identity)
    contains
+     !> Base type constructor.
      procedure, pass(this) :: ksp_init => krylov_init
+     !> Base type destructor.
      procedure, pass(this) :: ksp_free => krylov_free
+     !> Set preconditioner.
      procedure, pass(this) :: set_pc => krylov_set_pc
+     !> Solve the system.
      procedure(ksp_method), pass(this), deferred :: solve
+     !> Destructor.
      procedure(ksp_t_free), pass(this), deferred :: free
   end type ksp_t
 
-  
+
   !> Abstract interface for a Krylov method's solve routine
   !!
   !! @param x field to solve for
-  !! @param f right hand side 
+  !! @param f right hand side
   !! @param n integer, size of vectors
   !! @param coef Coefficients
   !! @param blst list of  boundary conditions
@@ -88,7 +93,7 @@ module krylov
   !! @param niter iteration trip count
   abstract interface
      function ksp_method(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results)
-       import :: bc_list_t       
+       import :: bc_list_t
        import :: field_t
        import :: ksp_t
        import :: coef_t
@@ -104,8 +109,8 @@ function ksp_method(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul
        real(kind=rp), dimension(n), intent(inout) :: f
        type(coef_t), intent(inout) :: coef
        type(bc_list_t), intent(inout) :: blst
-       type(gs_t), intent(inout) :: gs_h              
-       integer, optional, intent(in) :: niter       
+       type(gs_t), intent(inout) :: gs_h
+       integer, optional, intent(in) :: niter
        type(ksp_monitor_t) :: ksp_results
      end function ksp_method
   end interface
@@ -117,19 +122,21 @@ subroutine ksp_t_free(this)
        class(ksp_t), intent(inout) :: this
      end subroutine ksp_t_free
   end interface
-  
+
 contains
 
-  !> Create a krylov solver
+  !> Constructor for the base type.
+  !! @param max_iter Maximum number of iterations.
   !! @param rel_tol Relative tolarance for converence.
   !! @param rel_tol Absolute tolarance for converence.
   !! @param M The preconditioner.
-  subroutine krylov_init(this, rel_tol, abs_tol, M)    
+  subroutine krylov_init(this, max_iter, rel_tol, abs_tol, M)
     class(ksp_t), target, intent(inout) :: this
+    integer, intent(in) :: max_iter
     real(kind=rp), optional, intent(in) :: rel_tol
     real(kind=rp), optional, intent(in) :: abs_tol
     class(pc_t), optional, target, intent(in) :: M
-    
+
     call krylov_free(this)
 
     if (present(rel_tol)) then
@@ -144,6 +151,8 @@ subroutine krylov_init(this, rel_tol, abs_tol, M)
        this%abs_tol = KSP_ABS_TOL
     end if
 
+    this%max_iter = max_iter
+
     if (present(M)) then
        this%M => M
     else
@@ -158,7 +167,7 @@ subroutine krylov_init(this, rel_tol, abs_tol, M)
     end if
 
   end subroutine krylov_init
-  
+
   !> Deallocate a Krylov solver
   subroutine krylov_free(this)
     class(ksp_t), intent(inout) :: this
@@ -181,9 +190,9 @@ subroutine krylov_set_pc(this, M)
           call neko_error('Preconditioner already defined')
        end select
     end if
-    
+
     this%M => M
-    
+
   end subroutine krylov_set_pc
-  
+
 end module krylov
diff --git a/src/krylov/krylov_fctry.f90 b/src/krylov/krylov_fctry.f90
index f0d49ebf016..f122c670b65 100644
--- a/src/krylov/krylov_fctry.f90
+++ b/src/krylov/krylov_fctry.f90
@@ -38,6 +38,7 @@ module krylov_fctry
   use pipecg, only : pipecg_t
   use pipecg_sx, only : sx_pipecg_t
   use pipecg_device, only : pipecg_device_t
+  use fusedcg_device, only : fusedcg_device_t
   use bicgstab, only : bicgstab_t
   use gmres, only : gmres_t
   use gmres_sx, only : sx_gmres_t
@@ -53,14 +54,15 @@ module krylov_fctry
 
 contains
 
-  !> Initialize an interative Krylov solver
-  subroutine krylov_solver_factory(ksp, n, solver, abstol, M)
+  !> Initialize an iterative Krylov solver.
+  subroutine krylov_solver_factory(ksp, n, solver, max_iter, abstol, M)
     class(ksp_t), allocatable, target, intent(inout) :: ksp
     integer, intent(in), value :: n
     character(len=*), intent(in) :: solver
+    integer, intent(in) :: max_iter
     real(kind=rp), optional :: abstol
     class(pc_t), optional, intent(inout), target :: M
- 
+
     if (allocated(ksp)) then
        call krylov_solver_destroy(ksp)
        deallocate(ksp)
@@ -84,6 +86,15 @@ subroutine krylov_solver_factory(ksp, n, solver, abstol, M)
        else
           allocate(pipecg_t::ksp)
        end if
+    else if (trim(solver) .eq. 'fusedcg') then
+       if (NEKO_BCKND_DEVICE .eq. 1) then
+          if (NEKO_BCKND_OPENCL .eq. 1) then
+             call neko_error('FusedCG not supported for OpenCL')
+          end if
+          allocate(fusedcg_device_t::ksp)
+       else
+          call neko_error('FusedCG only supported for CUDA/HIP')
+       end if
     else if (trim(solver) .eq. 'cacg') then
        allocate(cacg_t::ksp)
     else if (trim(solver) .eq. 'gmres') then
@@ -103,102 +114,110 @@ subroutine krylov_solver_factory(ksp, n, solver, abstol, M)
     if (present(abstol) .and. present(M)) then
        select type(kp => ksp)
        type is(cg_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(sx_cg_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(cg_device_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(pipecg_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(sx_pipecg_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(pipecg_device_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
+       type is(fusedcg_device_t)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(cacg_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(gmres_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(sx_gmres_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(gmres_device_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        type is(bicgstab_t)
-          call kp%init(n, M = M, abs_tol = abstol)
+          call kp%init(n, max_iter, M = M, abs_tol = abstol)
        end select
     else if (present(abstol)) then
        select type(kp => ksp)
        type is(cg_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(sx_cg_t)
-          call kp%init(n, abs_tol = abstol)       
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(cg_device_t)
-          call kp%init(n, abs_tol = abstol)       
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(pipecg_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(sx_pipecg_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is (pipecg_device_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
+       type is (fusedcg_device_t)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(cacg_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(gmres_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(sx_gmres_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(gmres_device_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        type is(bicgstab_t)
-          call kp%init(n, abs_tol = abstol)
+          call kp%init(n, max_iter, abs_tol = abstol)
        end select
     else if (present(M)) then
        select type(kp => ksp)
        type is(cg_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(sx_cg_t)
-          call kp%init(n, M = M)       
+          call kp%init(n, max_iter, M = M)
        type is(cg_device_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(pipecg_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(sx_pipecg_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is (pipecg_device_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
+       type is (fusedcg_device_t)
+          call kp%init(n, max_iter, M = M)
        type is(cacg_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(gmres_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(sx_gmres_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(gmres_device_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        type is(bicgstab_t)
-          call kp%init(n, M = M)
+          call kp%init(n, max_iter, M = M)
        end select
     else
        select type(kp => ksp)
        type is(cg_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(sx_cg_t)
-          call kp%init(n)       
+          call kp%init(n, max_iter)
        type is(cg_device_t)
-          call kp%init(n)       
+          call kp%init(n, max_iter)
        type is(pipecg_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(sx_pipecg_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is (pipecg_device_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
+       type is (fusedcg_device_t)
+          call kp%init(n, max_iter)
        type is(cacg_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(gmres_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(sx_gmres_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(gmres_device_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        type is(bicgstab_t)
-          call kp%init(n)
+          call kp%init(n, max_iter)
        end select
     end if
 
@@ -215,13 +234,15 @@ subroutine krylov_solver_destroy(ksp)
        type is(sx_cg_t)
           call kp%free()
        type is(cg_device_t)
-          call kp%free()       
+          call kp%free()
        type is(pipecg_t)
           call kp%free()
        type is(sx_pipecg_t)
           call kp%free()
        type is (pipecg_device_t)
           call kp%free()
+       type is (fusedcg_device_t)
+          call kp%free()
        type is(cacg_t)
           call kp%free()
        type is(gmres_t)
@@ -234,8 +255,8 @@ subroutine krylov_solver_destroy(ksp)
           call kp%free()
        end select
     end if
- 
+
   end subroutine krylov_solver_destroy
-    
+
 end module krylov_fctry
-  
+
diff --git a/src/krylov/pc_hsmg.f90 b/src/krylov/pc_hsmg.f90
index 0e911d9b024..ce667e8d06b 100644
--- a/src/krylov/pc_hsmg.f90
+++ b/src/krylov/pc_hsmg.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Krylov preconditioner
@@ -82,7 +82,7 @@ module hsmg
   use field, only : field_t
   use coefs, only : coef_t
   use mesh, only : mesh_t
-  use krylov, only : ksp_t, ksp_monitor_t
+  use krylov, only : ksp_t, ksp_monitor_t, KSP_MAX_ITER
   use krylov_fctry, only : krylov_solver_factory, krylov_solver_destroy
   !$ use omp_lib
   implicit none
@@ -98,15 +98,15 @@ module hsmg
      type(schwarz_t), pointer :: schwarz
      type(field_t), pointer :: e
   end type multigrid_t
- 
+
   type, public, extends(pc_t) :: hsmg_t
      type(mesh_t), pointer :: msh
      integer :: nlvls !< Number of levels in the multigrid
      type(multigrid_t), allocatable :: grids(:) !< array for multigrids
      type(gs_t) :: gs_crs, gs_mg !< gather scatter for lower levels
      type(space_t) :: Xh_crs, Xh_mg !< spaces for lower levels
-     type(dofmap_t) :: dm_crs, dm_mg 
-     type(coef_t) :: c_crs, c_mg 
+     type(dofmap_t) :: dm_crs, dm_mg
+     type(coef_t) :: c_crs, c_mg
      type(dirichlet_t) :: bc_crs, bc_mg, bc_reg
      type(bc_list_t) :: bclst_crs, bclst_mg, bclst_reg
      type(schwarz_t) :: schwarz, schwarz_mg, schwarz_crs !< Schwarz decompostions
@@ -124,15 +124,15 @@ module hsmg
      type(c_ptr) :: r_d = C_NULL_PTR
      type(c_ptr) :: hsmg_event
      type(c_ptr) :: gs_event
-  contains
+   contains
      procedure, pass(this) :: init => hsmg_init
      procedure, pass(this) :: free => hsmg_free
      procedure, pass(this) :: solve => hsmg_solve
      procedure, pass(this) :: update => hsmg_set_h
   end type hsmg_t
-  
+
 contains
-  
+
   !> @note I do not think we actually use the same grids as they do in the original!
   subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     class(hsmg_t), intent(inout), target :: this
@@ -140,18 +140,18 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     type(space_t), intent(inout), target :: Xh
     type(coef_t), intent(inout), target :: coef
     type(dofmap_t), intent(inout), target :: dof
-    type(gs_t), intent(inout), target :: gs_h 
+    type(gs_t), intent(inout), target :: gs_h
     type(bc_list_t), intent(inout), target :: bclst
     character(len=*), optional :: crs_pctype
     integer :: n, i
     integer :: lx_crs, lx_mid
-    
+
     call this%free()
-    this%nlvls = 3 
+    this%nlvls = 3
     lx_crs = 2
     if (Xh%lx .lt. 5) then
        lx_mid = max(Xh%lx-1,3)
-       
+
        if(Xh%lx .le. 2) call neko_error('Polynomial order < 2 not supported for hsmg precon')
 
     else
@@ -162,22 +162,22 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     allocate(this%w(dof%size()))
     allocate(this%r(dof%size()))
 
-    
+
     ! Compute all elements as if they are deformed
     call msh%all_deformed()
 
     n = dof%size()
     call this%e%init(dof, 'work array')
     call this%wf%init(dof, 'work 2')
-    
+
     call this%Xh_crs%init(GLL, lx_crs, lx_crs, lx_crs)
-    this%dm_crs = dofmap_t(msh, this%Xh_crs) 
+    this%dm_crs = dofmap_t(msh, this%Xh_crs)
     call this%gs_crs%init(this%dm_crs)
     call this%e_crs%init(this%dm_crs, 'work crs')
     call this%c_crs%init(this%gs_crs)
-    
+
     call this%Xh_mg%init(GLL, lx_mid, lx_mid, lx_mid)
-    this%dm_mg = dofmap_t(msh, this%Xh_mg) 
+    this%dm_mg = dofmap_t(msh, this%Xh_mg)
     call this%gs_mg%init(this%dm_mg)
     call this%e_mg%init(this%dm_mg, 'work midl')
     call this%c_mg%init(this%gs_mg)
@@ -201,15 +201,15 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     ! Create a backend specific krylov solver
     if (present(crs_pctype)) then
        call krylov_solver_factory(this%crs_solver, &
-            this%dm_crs%size(), trim(crs_pctype), M = this%pc_crs)
+            this%dm_crs%size(), trim(crs_pctype), KSP_MAX_ITER, M = this%pc_crs)
     else
        call krylov_solver_factory(this%crs_solver, &
-            this%dm_crs%size(), 'cg', M = this%pc_crs)
+            this%dm_crs%size(), 'cg', KSP_MAX_ITER, M = this%pc_crs)
     end if
 
-    call this%bc_crs%init(this%dm_crs)
-    call this%bc_mg%init(this%dm_mg)
-    call this%bc_reg%init(dof)
+    call this%bc_crs%init(this%c_crs)
+    call this%bc_mg%init(this%c_mg)
+    call this%bc_reg%init(coef)
     if (bclst%n .gt. 0) then
        do i = 1, bclst%n
           call this%bc_reg%mark_facets(bclst%bc(i)%bcp%marked_facet)
@@ -227,7 +227,7 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     call bc_list_init(this%bclst_crs)
     call bc_list_add(this%bclst_crs, this%bc_crs)
 
-    
+
     call this%bc_mg%finalize()
     call this%bc_mg%set_g(0.0_rp)
     call bc_list_init(this%bclst_mg)
@@ -241,14 +241,14 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     call this%interp_mid_crs%init(this%Xh_mg,this%Xh_crs)
 
     call hsmg_fill_grid(dof, gs_h, Xh, coef, this%bclst_reg, this%schwarz, &
-                        this%e, this%grids, 3) 
+                        this%e, this%grids, 3)
     call hsmg_fill_grid(this%dm_mg, this%gs_mg, this%Xh_mg, this%c_mg, &
                         this%bclst_mg, this%schwarz_mg, this%e_mg, &
-                        this%grids, 2) 
+                        this%grids, 2)
     call hsmg_fill_grid(this%dm_crs, this%gs_crs, this%Xh_crs, &
                         this%c_crs, this%bclst_crs, this%schwarz_crs, &
-                        this%e_crs, this%grids, 1) 
-                 
+                        this%e_crs, this%grids, 1)
+
     call hsmg_set_h(this)
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(this%w, this%w_d, n)
@@ -265,11 +265,11 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype)
     call device_event_create(this%hsmg_event, 2)
     call device_event_create(this%gs_event, 2)
   end subroutine hsmg_init
-  
+
   subroutine hsmg_set_h(this)
-   class(hsmg_t), intent(inout) :: this
+    class(hsmg_t), intent(inout) :: this
 !    integer :: i
-   !Yeah I dont really know what to do here. For incompressible flow not much happens
+    !Yeah I dont really know what to do here. For incompressible flow not much happens
     this%grids(1)%coef%ifh2 = .false.
     call copy(this%grids(1)%coef%h1, this%grids(3)%coef%h1, &
          this%grids(1)%dof%size())
@@ -280,7 +280,7 @@ subroutine hsmg_set_h(this)
   end subroutine hsmg_set_h
 
 
-  subroutine hsmg_fill_grid(dof, gs_h, Xh, coef, bclst, schwarz, e, grids, l) 
+  subroutine hsmg_fill_grid(dof, gs_h, Xh, coef, bclst, schwarz, e, grids, l)
     type(dofmap_t), target, intent(in):: dof
     type(gs_t), target, intent(in) :: gs_h
     type(space_t), target, intent(in) :: Xh
@@ -308,15 +308,15 @@ subroutine hsmg_free(this)
     if (allocated(this%ax)) then
        deallocate(this%ax)
     end if
-    
+
     if (allocated(this%grids)) then
        deallocate(this%grids)
     end if
-    
+
     if (allocated(this%w)) then
        deallocate(this%w)
     end if
-    
+
     if (allocated(this%r)) then
        deallocate(this%r)
     end if
@@ -339,17 +339,17 @@ subroutine hsmg_free(this)
        call krylov_solver_destroy(this%crs_solver)
        deallocate(this%crs_solver)
     end if
-    
-    if (allocated(this%pc_crs)) then 
+
+    if (allocated(this%pc_crs)) then
        select type(pc => this%pc_crs)
        type is (jacobi_t)
           call pc%free()
        type is (sx_jacobi_t)
           call pc%free()
-       end select       
+       end select
        deallocate(this%pc_crs)
     end if
-    
+
   end subroutine hsmg_free
 
   !> The h1mg preconditioner from Nek5000.
@@ -362,11 +362,11 @@ subroutine hsmg_solve(this, z, r, n)
     type(ksp_monitor_t) :: crs_info
     integer :: i, thrdid, nthrds
 
-    call profiler_start_region('HSMG solve')
+    call profiler_start_region('HSMG solve', 8)
     if (NEKO_BCKND_DEVICE .eq. 1) then
        z_d = device_get_ptr(z)
        r_d = device_get_ptr(r)
-       !We should not work with the input 
+       !We should not work with the input
        call device_copy(this%r_d, r_d, n)
        call bc_list_apply_scalar(this%bclst_reg, r, n)
 
@@ -402,18 +402,21 @@ subroutine hsmg_solve(this, z, r, n)
        nthrds = 1
        !$ thrdid = omp_get_thread_num()
        !$ nthrds = omp_get_num_threads()
-       
+
        if (thrdid .eq. 0) then
-          call this%grids(3)%schwarz%compute(z, this%r)      
-          call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w) 
+          call profiler_start_region('HSMG schwarz', 9)
+          call this%grids(3)%schwarz%compute(z, this%r)
+          call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w)
+          call profiler_end_region
        end if
-       if (nthrds .eq. 1 .or. thrdid .eq. 1) then 
+       if (nthrds .eq. 1 .or. thrdid .eq. 1) then
+          call profiler_start_region('HSMG coarse grid', 10)
           call this%grids(1)%gs_h%op(this%wf%x, &
                this%grids(1)%dof%size(), GS_OP_ADD, this%gs_event)
           call device_event_sync(this%gs_event)
           call bc_list_apply_scalar(this%grids(1)%bclst, this%wf%x, &
                                     this%grids(1)%dof%size())
-          call profiler_start_region('HSMG coarse-solve')
+          call profiler_start_region('HSMG coarse-solve', 11)
           crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, this%wf%x, &
                                        this%grids(1)%dof%size(), &
                                        this%grids(1)%coef, &
@@ -421,8 +424,8 @@ subroutine hsmg_solve(this, z, r, n)
                                        this%grids(1)%gs_h, this%niter)
           call profiler_end_region
           call bc_list_apply_scalar(this%grids(1)%bclst, this%grids(1)%e%x,&
-                                    this%grids(1)%dof%size()) 
-
+                                    this%grids(1)%dof%size())
+          call profiler_end_region
        end if
        !$omp end parallel
 
@@ -438,11 +441,11 @@ subroutine hsmg_solve(this, z, r, n)
        call device_event_sync(this%gs_event)
        call device_col2(z_d, this%grids(3)%coef%mult_d, this%grids(3)%dof%size())
     else
-       !We should not work with the input 
+       !We should not work with the input
        call copy(this%r, r, n)
 
        !OVERLAPPING Schwarz exchange and solve
-       call this%grids(3)%schwarz%compute(z, this%r)      
+       call this%grids(3)%schwarz%compute(z, this%r)
        ! DOWNWARD Leg of V-cycle, we are pretty hardcoded here but w/e
        call col2(this%r, this%grids(3)%coef%mult, &
                  this%grids(3)%dof%size())
@@ -451,7 +454,7 @@ subroutine hsmg_solve(this, z, r, n)
                                      this%msh%nelv, this%grids(2)%Xh)
        call this%grids(2)%gs_h%op(this%w, this%grids(2)%dof%size(), GS_OP_ADD)
        !OVERLAPPING Schwarz exchange and solve
-       call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w)  
+       call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w)
        call col2(this%w, this%grids(2)%coef%mult, this%grids(2)%dof%size())
        !restrict residual to crs
        call this%interp_mid_crs%map(this%r,this%w,this%msh%nelv,this%grids(1)%Xh)
@@ -460,7 +463,7 @@ subroutine hsmg_solve(this, z, r, n)
        call this%grids(1)%gs_h%op(this%r, this%grids(1)%dof%size(), GS_OP_ADD)
        call bc_list_apply_scalar(this%grids(1)%bclst, this%r, &
                                  this%grids(1)%dof%size())
-       call profiler_start_region('HSMG coarse-solve')
+       call profiler_start_region('HSMG coarse-solve', 11)
        crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, this%r, &
                                     this%grids(1)%dof%size(), &
                                     this%grids(1)%coef, &
diff --git a/src/krylov/pc_identity.f90 b/src/krylov/pc_identity.f90
index 2f82954628b..6d47665f35e 100644
--- a/src/krylov/pc_identity.f90
+++ b/src/krylov/pc_identity.f90
@@ -37,10 +37,10 @@ module identity
   use num_types, only : rp
   implicit none
   private
-  
+
   !> Defines a canonical Krylov preconditioner
   type, public, extends(pc_t) :: ident_t
-  contains
+   contains
      procedure, pass(this) :: solve => ident_solve
      procedure, pass(this) :: update => ident_update
   end type ident_t
@@ -53,12 +53,12 @@ subroutine ident_solve(this, z, r, n)
     class(ident_t), intent(inout) :: this
     real(kind=rp), dimension(n), intent(inout) :: z
     real(kind=rp), dimension(n), intent(inout) :: r
-    call copy(z, r, n)    
+    call copy(z, r, n)
   end subroutine ident_solve
 
   !> Mandatory update routine
   subroutine ident_update(this)
     class(ident_t), intent(inout) :: this
   end subroutine ident_update
-  
+
 end module identity
diff --git a/src/krylov/precon.f90 b/src/krylov/precon.f90
index 17c17e8f0cd..003c22a3376 100644
--- a/src/krylov/precon.f90
+++ b/src/krylov/precon.f90
@@ -35,7 +35,7 @@ module precon
   use num_types, only : rp
   implicit none
   private
-  
+
   !> Defines a canonical Krylov preconditioner
   type, public, abstract :: pc_t
    contains
diff --git a/src/krylov/precon_fctry.f90 b/src/krylov/precon_fctry.f90
index 265296e507a..58eaa59338d 100644
--- a/src/krylov/precon_fctry.f90
+++ b/src/krylov/precon_fctry.f90
@@ -43,7 +43,7 @@ module precon_fctry
   implicit none
 
 contains
-  
+
   !> Create a preconditioner
   subroutine precon_factory(pc, pctype)
     class(pc_t), target, allocatable, intent(inout) :: pc
@@ -73,7 +73,7 @@ subroutine precon_factory(pc, pctype)
     else
        call neko_error('Unknown preconditioner '//trim(pctype))
     end if
-    
+
   end subroutine precon_factory
 
   !> Destroy a preconditioner
@@ -90,9 +90,9 @@ subroutine precon_destroy(pc)
           call pcp%free()
        type is (hsmg_t)
           call pcp%free()
-       end select                 
+       end select
     end if
-    
+
   end subroutine precon_destroy
-  
+
 end module precon_fctry
diff --git a/src/les/bcknd/cpu/vreman_cpu.f90 b/src/les/bcknd/cpu/vreman_cpu.f90
new file mode 100644
index 00000000000..645f670bedf
--- /dev/null
+++ b/src/les/bcknd/cpu/vreman_cpu.f90
@@ -0,0 +1,139 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the CPU kernel for the `vreman_t` type.
+module vreman_cpu
+  use num_types, only : rp
+  use field_list, only : field_list_t
+  use math, only : cadd, NEKO_EPS
+  use scratch_registry, only : neko_scratch_registry
+  use field_registry, only : neko_field_registry
+  use field, only : field_t
+  use operators, only : dudxyz
+  use coefs, only : coef_t
+  implicit none
+  private
+
+  public :: vreman_compute_cpu
+
+contains
+
+  !> Compute eddy viscosity on the CPU.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  !! @param coef SEM coefficients.
+  !! @param nut The SGS viscosity array.
+  !! @param delta The LES lengthscale.
+  !! @param c The Vreman model constant
+  subroutine vreman_compute_cpu(t, tstep, coef, nut, delta, c)
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    type(coef_t), intent(in) :: coef
+    type(field_t), intent(inout) :: nut
+    type(field_t), intent(in) :: delta
+    real(kind=rp), intent(in) :: c
+    ! This is the alpha tensor in the paper
+    type(field_t), pointer :: a11, a12, a13, a21, a22, a23, a31, a32, a33
+    type(field_t), pointer :: u, v, w
+
+    real(kind=rp) :: beta11
+    real(kind=rp) :: beta12
+    real(kind=rp) :: beta13
+    real(kind=rp) :: beta22
+    real(kind=rp) :: beta23
+    real(kind=rp) :: beta33
+    real(kind=rp) :: b_beta
+    real(kind=rp) :: aijaij
+    integer :: temp_indices(9)
+    integer :: e, i
+
+    u => neko_field_registry%get_field_by_name("u")
+    v => neko_field_registry%get_field_by_name("v")
+    w => neko_field_registry%get_field_by_name("u")
+
+    call neko_scratch_registry%request_field(a11, temp_indices(1))
+    call neko_scratch_registry%request_field(a12, temp_indices(2))
+    call neko_scratch_registry%request_field(a13, temp_indices(3))
+    call neko_scratch_registry%request_field(a21, temp_indices(4))
+    call neko_scratch_registry%request_field(a22, temp_indices(5))
+    call neko_scratch_registry%request_field(a23, temp_indices(6))
+    call neko_scratch_registry%request_field(a31, temp_indices(7))
+    call neko_scratch_registry%request_field(a32, temp_indices(8))
+    call neko_scratch_registry%request_field(a33, temp_indices(9))
+
+
+    ! Compute the derivatives of the velocity (the alpha tensor)
+    call dudxyz (a11%x, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
+    call dudxyz (a12%x, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
+    call dudxyz (a13%x, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
+
+    call dudxyz (a21%x, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
+    call dudxyz (a22%x, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
+    call dudxyz (a23%x, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
+
+    call dudxyz (a31%x, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
+    call dudxyz (a32%x, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
+    call dudxyz (a33%x, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef)
+
+    do e=1, coef%msh%nelv
+       do i=1, coef%Xh%lxyz
+          ! beta_ij = alpha_mi alpha_mj
+          beta11 = a11%x(i,1,1,e)**2 + a21%x(i,1,1,e)**2 + a31%x(i,1,1,e)**2
+          beta22 = a12%x(i,1,1,e)**2 + a22%x(i,1,1,e)**2 + a32%x(i,1,1,e)**2
+          beta33 = a13%x(i,1,1,e)**2 + a23%x(i,1,1,e)**2 + a33%x(i,1,1,e)**2
+          beta12 = a11%x(i,1,1,e)*a12%x(i,1,1,e) + &
+                   a21%x(i,1,1,e)*a22%x(i,1,1,e) + &
+                   a31%x(i,1,1,e)*a32%x(i,1,1,e)
+          beta13 = a11%x(i,1,1,e)*a13%x(i,1,1,e) + &
+                   a21%x(i,1,1,e)*a23%x(i,1,1,e) + &
+                   a31%x(i,1,1,e)*a33%x(i,1,1,e)
+          beta23 = a12%x(i,1,1,e)*a13%x(i,1,1,e) + &
+                   a22%x(i,1,1,e)*a23%x(i,1,1,e) + &
+                   a32%x(i,1,1,e)*a33%x(i,1,1,e)
+
+          b_beta = beta11*beta22 - beta12*beta12 + beta11*beta33 - beta13*beta13 &
+                  + beta22*beta33 - beta23*beta23
+
+          b_beta = max(0.0_rp, b_beta)
+
+          ! alpha_ij alpha_ij
+          aijaij = beta11 + beta22 + beta33
+
+          nut%x(i,1,1,e) = c*delta%x(i,1,1,e) * sqrt(b_beta/(aijaij + NEKO_EPS))
+       end do
+    end do
+
+    call neko_scratch_registry%relinquish_field(temp_indices)
+  end subroutine vreman_compute_cpu
+
+end module vreman_cpu
+
diff --git a/src/les/les_model.f90 b/src/les/les_model.f90
new file mode 100644
index 00000000000..4ec763120f3
--- /dev/null
+++ b/src/les/les_model.f90
@@ -0,0 +1,194 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!> Implements `les_model_t`.
+module les_model
+  use num_types, only : rp
+  use field, only : field_t, field_ptr_t
+  use json_module, only : json_file
+  use field_registry, only : neko_field_registry
+  use dofmap, only : dofmap_t
+  use coefs, only : coef_t
+  use gs_ops, only : GS_OP_ADD
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use device, only : device_memcpy, HOST_TO_DEVICE
+  implicit none
+  private
+
+  !> Base abstract type for LES models based on the Boussinesq approximation.
+  type, abstract, public :: les_model_t
+     !> Subgrid kinematic viscosity.
+     type(field_t), pointer :: nut => null()
+     !> LES lengthscale.
+     type(field_t), pointer :: delta => null()
+     !> SEM coefficients.
+     type(coef_t), pointer :: coef => null()
+   contains
+     !> Constructor for the les_model_t (base) class.
+     procedure, pass(this) :: init_base => les_model_init_base
+     !> Destructor for the les_model_t (base) class.
+     procedure, pass(this) :: free_base => les_model_free_base
+     !> Compute the LES length-scale
+     procedure, pass(this) :: compute_delta => les_model_compute_delta
+     !> The common constructor.
+     procedure(les_model_init), pass(this), deferred :: init
+     !> Destructor.
+     procedure(les_model_free), pass(this), deferred :: free
+     !> Compute eddy viscosity.
+     procedure(les_model_compute), pass(this), deferred :: compute
+  end type les_model_t
+
+  abstract interface
+     !> Compute eddy viscosity.
+     !! @param t The time value.
+     !! @param tstep The current time-step.
+     subroutine les_model_compute(this, t, tstep)
+       import les_model_t, rp
+       class(les_model_t), intent(inout) :: this
+       real(kind=rp), intent(in) :: t
+       integer, intent(in) :: tstep
+     end subroutine les_model_compute
+  end interface
+
+  abstract interface
+     !> Common constructor.
+     !! @param dofmap SEM map of degrees of freedom.
+     !! @param coef SEM coefficients.
+     !! @param json A dictionary with parameters.
+     subroutine les_model_init(this, dofmap, coef, json)
+       import les_model_t, json_file, dofmap_t, coef_t
+       class(les_model_t), intent(inout) :: this
+       type(coef_t), intent(in) :: coef
+       type(dofmap_t), intent(in) :: dofmap
+       type(json_file), intent(inout) :: json
+     end subroutine les_model_init
+  end interface
+
+  abstract interface
+     !> Destructor.
+     subroutine les_model_free(this)
+       import les_model_t
+       class(les_model_t), intent(inout) :: this
+     end subroutine les_model_free
+  end interface
+
+contains
+  !> Constructor for the les_model_t (base) class.
+  !! @param dofmap SEM map of degrees of freedom.
+  !! @param coef SEM coefficients.
+  !! @param nu_name The name of the turbulent viscosity field.
+  subroutine les_model_init_base(this, dofmap, coef, nut_name)
+    class(les_model_t), intent(inout) :: this
+    type(dofmap_t), intent(in) :: dofmap
+    type(coef_t), target, intent(in) :: coef
+    character(len=*), intent(in) :: nut_name
+
+    if (.not. neko_field_registry%field_exists(trim(nut_name))) then
+       call neko_field_registry%add_field(dofmap, trim(nut_name))
+    end if
+    if (.not. neko_field_registry%field_exists("les_delta")) then
+       call neko_field_registry%add_field(dofmap, "les_delta")
+    end if
+    this%nut => neko_field_registry%get_field(trim(nut_name))
+    this%delta => neko_field_registry%get_field("les_delta")
+    this%coef => coef
+
+    call this%compute_delta()
+  end subroutine les_model_init_base
+
+  !> Destructor for the les_model_t (base) class.
+  subroutine les_model_free_base(this)
+    class(les_model_t), intent(inout) :: this
+
+    nullify(this%nut)
+    nullify(this%coef)
+  end subroutine les_model_free_base
+
+  !> Compute the LES lengthscale.
+  !! For each GLL point, we take the distance between its neighbours in all 3
+  !! directions divided by 2 with the exception of face nodes, where only one
+  !! neighbour exists. To form the lengthscale the distances along 3 directions
+  !! are multiplied, and a cubic root is extracted from the result. This
+  !! roughly corresponds to a cubic root of the cell volume in FVM computations.
+  subroutine les_model_compute_delta(this)
+    class(les_model_t), intent(inout) :: this
+    integer :: e, i, j, k
+    integer ::  im, ip, jm, jp, km, kp
+    real(kind=rp) :: di, dj, dk, ndim_inv
+
+
+    do e=1, this%coef%msh%nelv
+       do k=1, this%coef%Xh%lz
+         km = max(1, k-1)
+         kp = min(this%coef%Xh%lz, k+1)
+
+         do j=1, this%coef%Xh%ly
+           jm = max(1, j-1)
+           jp = min(this%coef%Xh%ly, j+1)
+
+           do i=1, this%coef%Xh%lx
+             im = max(1, i-1)
+             ip = min(this%coef%Xh%lx, i+1)
+
+             di = (this%coef%dof%x(ip,j,k,e) - this%coef%dof%x(im,j,k,e))**2 &
+                + (this%coef%dof%y(ip,j,k,e) - this%coef%dof%y(im,j,k,e))**2 &
+                + (this%coef%dof%z(ip,j,k,e) - this%coef%dof%z(im,j,k,e))**2
+
+             dj = (this%coef%dof%x(i,jp,k,e) - this%coef%dof%x(i,jm,k,e))**2 &
+                + (this%coef%dof%y(i,jp,k,e) - this%coef%dof%y(i,jm,k,e))**2 &
+                + (this%coef%dof%z(i,jp,k,e) - this%coef%dof%z(i,jm,k,e))**2
+
+             dk = (this%coef%dof%x(i,j,kp,e) - this%coef%dof%x(i,j,km,e))**2 &
+                + (this%coef%dof%y(i,j,kp,e) - this%coef%dof%y(i,j,km,e))**2 &
+                + (this%coef%dof%z(i,j,kp,e) - this%coef%dof%z(i,j,km,e))**2
+
+             di = sqrt(di) / (ip - im)
+             dj = sqrt(dj) / (jp - jm)
+             dk = sqrt(dk) / (kp - km)
+             this%delta%x(i,j,k,e) = (di * dj * dk)**(1.0_rp / 3.0_rp)
+
+           enddo
+         enddo
+       enddo
+    enddo
+
+    call this%coef%gs_h%op(this%delta, GS_OP_ADD)
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(this%delta%x, this%delta%x_d, this%delta%dof%size(),&
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+  end subroutine les_model_compute_delta
+
+end module les_model
\ No newline at end of file
diff --git a/src/les/les_model_fctry.f90 b/src/les/les_model_fctry.f90
new file mode 100644
index 00000000000..b562198967a
--- /dev/null
+++ b/src/les/les_model_fctry.f90
@@ -0,0 +1,71 @@
+
+! Copyright (c) 2021-2022, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+module les_model_fctry
+  use les_model, only : les_model_t
+  use vreman, only : vreman_t
+  use dofmap, only : dofmap_t
+  use coefs, only : coef_t
+  use json_module, only : json_file
+  implicit none
+  private
+
+  public :: les_model_factory
+
+contains
+  !> LES model factory. Both constructs and initializes the object.
+  !! @param les_model The object to be allocated.
+  !! @param name The name of the LES model.
+  !! @param dofmap SEM map of degrees of freedom.
+  !! @param coef SEM coefficients.
+  !! @param json A dictionary with parameters.
+  subroutine les_model_factory(les_model, name, dofmap, coef, json)
+    class(les_model_t), allocatable, target, intent(inout) :: les_model
+    character(len=*), intent(in) :: name
+    type(dofmap_t), intent(in) :: dofmap
+    type(coef_t), intent(in) :: coef
+    type(json_file), intent(inout) :: json
+
+    if (allocated(les_model)) then
+       deallocate(les_model)
+    end if
+
+    if (trim(name) .eq. 'vreman') then
+       allocate(vreman_t::les_model)
+    end if
+
+    call les_model%init(dofmap, coef, json)
+
+  end subroutine les_model_factory
+
+end module les_model_fctry
\ No newline at end of file
diff --git a/src/les/vreman.f90 b/src/les/vreman.f90
new file mode 100644
index 00000000000..71d2903e1e9
--- /dev/null
+++ b/src/les/vreman.f90
@@ -0,0 +1,128 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!> Implements `vreman_t`.
+module vreman
+  use num_types, only : rp
+  use field, only : field_t
+  use les_model, only : les_model_t
+  use dofmap , only : dofmap_t
+  use json_utils, only : json_get, json_get_or_default
+  use json_module, only : json_file
+  use utils, only : neko_error
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use vreman_cpu, only : vreman_compute_cpu
+  use coefs, only : coef_t
+  implicit none
+  private
+
+  !> Implements the Vreman LES model.
+  !! @note Reference DOI: 10.1063/1.1785131
+  type, public, extends(les_model_t) :: vreman_t
+     !> Model constant, defaults to 0.07.
+     real(kind=rp) :: c
+   contains
+     !> Constructor from JSON.
+     procedure, pass(this) :: init => vreman_init
+     !> Constructor from components.
+     procedure, pass(this) :: init_from_components => vreman_init_from_components
+     !> Destructor.
+     procedure, pass(this) :: free => vreman_free
+     !> Compute eddy viscosity.
+     procedure, pass(this) :: compute => vreman_compute
+  end type vreman_t
+
+contains
+  !> Constructor.
+  !! @param dofmap SEM map of degrees of freedom.
+  !! @param coef SEM coefficients.
+  !! @param json A dictionary with parameters.
+  subroutine vreman_init(this, dofmap, coef, json)
+    class(vreman_t), intent(inout) :: this
+    type(dofmap_t), intent(in) :: dofmap
+    type(coef_t), intent(in) :: coef
+    type(json_file), intent(inout) :: json
+    character(len=:), allocatable :: nut_name
+    real(kind=rp) :: c
+
+    call json_get(json, "nut_field", nut_name)
+    ! Based on the Smagorinsky Cs = 0.17.
+    call json_get_or_default(json, "c", c, 0.07_rp)
+
+    call vreman_init_from_components(this, dofmap, coef, c, nut_name)
+  end subroutine vreman_init
+
+  !> Constructor from components.
+  !! @param dofmap SEM map of degrees of freedom.
+  !! @param coef SEM coefficients.
+  !! @param c The model constant.
+  !! @param nut_name The name of the SGS viscosity field.
+  subroutine vreman_init_from_components(this, dofmap, coef, c, nut_name)
+    class(vreman_t), intent(inout) :: this
+    type(dofmap_t), intent(in) :: dofmap
+    type(coef_t), intent(in) :: coef
+    real(kind=rp) :: c
+    character(len=*), intent(in) :: nut_name
+
+    call this%free()
+
+    call this%init_base(dofmap, coef, nut_name)
+    this%c = c
+
+  end subroutine vreman_init_from_components
+
+  !> Destructor for the les_model_t (base) class.
+  subroutine vreman_free(this)
+    class(vreman_t), intent(inout) :: this
+
+    call this%free_base()
+  end subroutine vreman_free
+
+  !> Compute eddy viscosity.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  subroutine vreman_compute(this, t, tstep)
+    class(vreman_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+        call neko_error("Vreman model not implemented on accelarators.")
+    else
+        call vreman_compute_cpu(t, tstep, this%coef, this%nut, this%delta,&
+                                this%c)
+    end if
+
+  end subroutine vreman_compute
+
+end module vreman
\ No newline at end of file
diff --git a/src/math/ax.f90 b/src/math/ax.f90
index b5a553e277a..6e71a6b336a 100644
--- a/src/math/ax.f90
+++ b/src/math/ax.f90
@@ -35,7 +35,6 @@ module ax_product
   use num_types, only : rp
   use coefs, only : coef_t
   use space, only : space_t
-  use field, only : field_t
   use mesh, only : mesh_t
   implicit none
   private
@@ -54,7 +53,7 @@ module ax_product
   !! @param msh Mesh.
   !! @param Xh Function space \f$ X_h \f$.
   abstract interface
-  subroutine ax_compute(w, u, coef, msh, Xh)
+     subroutine ax_compute(w, u, coef, msh, Xh)
        import space_t
        import mesh_t
        import coef_t
@@ -62,11 +61,11 @@ subroutine ax_compute(w, u, coef, msh, Xh)
        import rp
        implicit none
        type(space_t), intent(inout) :: Xh
-       type(mesh_t), intent(inout) :: msh       
+       type(mesh_t), intent(inout) :: msh
        type(coef_t), intent(inout) :: coef
        real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
        real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
      end subroutine ax_compute
   end interface
-  
+
 end module ax_product
diff --git a/src/math/ax_helm_fctry.f90 b/src/math/ax_helm_fctry.f90
index 6e14c5c991c..d4960c16425 100644
--- a/src/math/ax_helm_fctry.f90
+++ b/src/math/ax_helm_fctry.f90
@@ -49,7 +49,7 @@ module ax_helm_fctry
   !! @param Ax The matrix-vector product type to be allocated.
   subroutine ax_helm_factory(Ax)
     class(ax_t), allocatable, intent(inout) :: Ax
-    
+
     if (allocated(Ax)) then
        deallocate(Ax)
     end if
diff --git a/src/math/bcknd/cpu/ax_helm.f90 b/src/math/bcknd/cpu/ax_helm.f90
index ddcf9c2ab5c..4cf504d9fa0 100644
--- a/src/math/bcknd/cpu/ax_helm.f90
+++ b/src/math/bcknd/cpu/ax_helm.f90
@@ -35,11 +35,10 @@ module ax_helm
   use num_types, only : rp
   use coefs, only : coef_t
   use space, only : space_t
-  use field, only : field_t
   use mesh, only : mesh_t
   use math, only : addcol4
   implicit none
-  private 
+  private
 
   !> CPU matrix-vector product for a Helmholtz problem.
   type, public, extends(ax_t) :: ax_helm_t
@@ -48,7 +47,7 @@ module ax_helm
      procedure, nopass :: compute => ax_helm_compute
   end type ax_helm_t
 
-contains 
+contains
 
   !> Compute the product.
   !! @param w Vector of size @a (lx,ly,lz,nelv).
@@ -64,8 +63,8 @@ subroutine ax_helm_compute(w, u, coef, msh, Xh)
     type(coef_t), intent(inout) :: coef
     real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
     real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
-    
-    
+
+
     select case(Xh%lx)
     case (14)
        call ax_helm_lx14(w, u, Xh%dx, Xh%dy, Xh%dz, Xh%dxt, Xh%dyt, Xh%dzt, &
@@ -110,10 +109,10 @@ subroutine ax_helm_compute(w, u, coef, msh, Xh)
        call ax_helm_lx(w, u, Xh%dx, Xh%dy, Xh%dz, Xh%dxt, Xh%dyt, Xh%dzt, coef%h1, &
             coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, msh%nelv, Xh%lx)
     end select
-    
+
     if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%size())
-    
- 
+
+
   end subroutine ax_helm_compute
 
   !> Generic CPU kernel for the Helmholtz matrix-vector product.
@@ -177,7 +176,7 @@ subroutine ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -226,7 +225,7 @@ subroutine ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
 
     end do
   end subroutine ax_helm_lx
-  
+
   subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 14
@@ -270,7 +269,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,11) * u(11,j,1,e) &
                         + Dx(i,12) * u(12,j,1,e) &
                         + Dx(i,13) * u(13,j,1,e) &
-                        + Dx(i,14) * u(14,j,1,e) 
+                        + Dx(i,14) * u(14,j,1,e)
           end do
        end do
 
@@ -314,7 +313,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -344,7 +343,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,11) * ur(11,j,1) &
                         + Dxt(i,12) * ur(12,j,1) &
                         + Dxt(i,13) * ur(13,j,1) &
-                        + Dxt(i,14) * ur(14,j,1) 
+                        + Dxt(i,14) * ur(14,j,1)
           end do
        end do
 
@@ -365,7 +364,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,11) * us(i,11,k) &
                            + Dyt(j,12) * us(i,12,k) &
                            + Dyt(j,13) * us(i,13,k) &
-                           + Dyt(j,14) * us(i,14,k) 
+                           + Dyt(j,14) * us(i,14,k)
              end do
           end do
        end do
@@ -386,13 +385,13 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,11) * ut(i,1,11) &
                         + Dzt(k,12) * ut(i,1,12) &
                         + Dzt(k,13) * ut(i,1,13) &
-                        + Dzt(k,14) * ut(i,1,14) 
+                        + Dzt(k,14) * ut(i,1,14)
           end do
        end do
 
     end do
   end subroutine ax_helm_lx14
-  
+
   subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 13
@@ -435,7 +434,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,10) * u(10,j,1,e) &
                         + Dx(i,11) * u(11,j,1,e) &
                         + Dx(i,12) * u(12,j,1,e) &
-                        + Dx(i,13) * u(13,j,1,e) 
+                        + Dx(i,13) * u(13,j,1,e)
 
           end do
        end do
@@ -478,7 +477,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -507,7 +506,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,10) * ur(10,j,1) &
                         + Dxt(i,11) * ur(11,j,1) &
                         + Dxt(i,12) * ur(12,j,1) &
-                        + Dxt(i,13) * ur(13,j,1) 
+                        + Dxt(i,13) * ur(13,j,1)
           end do
        end do
 
@@ -527,7 +526,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,10) * us(i,10,k) &
                            + Dyt(j,11) * us(i,11,k) &
                            + Dyt(j,12) * us(i,12,k) &
-                           + Dyt(j,13) * us(i,13,k) 
+                           + Dyt(j,13) * us(i,13,k)
              end do
           end do
        end do
@@ -547,13 +546,13 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,10) * ut(i,1,10) &
                         + Dzt(k,11) * ut(i,1,11) &
                         + Dzt(k,12) * ut(i,1,12) &
-                        + Dzt(k,13) * ut(i,1,13) 
+                        + Dzt(k,13) * ut(i,1,13)
           end do
        end do
 
     end do
   end subroutine ax_helm_lx13
-  
+
   subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 12
@@ -595,7 +594,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,9) * u(9,j,1,e) &
                         + Dx(i,10) * u(10,j,1,e) &
                         + Dx(i,11) * u(11,j,1,e) &
-                        + Dx(i,12) * u(12,j,1,e) 
+                        + Dx(i,12) * u(12,j,1,e)
           end do
        end do
 
@@ -635,7 +634,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -682,7 +681,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,9) * us(i,9,k) &
                            + Dyt(j,10) * us(i,10,k) &
                            + Dyt(j,11) * us(i,11,k) &
-                           + Dyt(j,12) * us(i,12,k) 
+                           + Dyt(j,12) * us(i,12,k)
              end do
           end do
        end do
@@ -701,7 +700,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,9) * ut(i,1,9) &
                         + Dzt(k,10) * ut(i,1,10) &
                         + Dzt(k,11) * ut(i,1,11) &
-                        + Dzt(k,12) * ut(i,1,12) 
+                        + Dzt(k,12) * ut(i,1,12)
           end do
        end do
 
@@ -748,7 +747,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,8) * u(8,j,1,e) &
                         + Dx(i,9) * u(9,j,1,e) &
                         + Dx(i,10) * u(10,j,1,e) &
-                        + Dx(i,11) * u(11,j,1,e) 
+                        + Dx(i,11) * u(11,j,1,e)
           end do
        end do
 
@@ -765,7 +764,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,8) * u(i,8,k,e) &
                            + Dy(j,9) * u(i,9,k,e) &
                            + Dy(j,10) * u(i,10,k,e) &
-                           + Dy(j,11) * u(i,11,k,e) 
+                           + Dy(j,11) * u(i,11,k,e)
              end do
           end do
        end do
@@ -782,11 +781,11 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,8) * u(i,1,8,e) &
                         + Dz(k,9) * u(i,1,9,e) &
                         + Dz(k,10) * u(i,1,10,e) &
-                        + Dz(k,11) * u(i,1,11,e) 
+                        + Dz(k,11) * u(i,1,11,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -813,7 +812,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,8) * ur(8,j,1) &
                         + Dxt(i,9) * ur(9,j,1) &
                         + Dxt(i,10) * ur(10,j,1) &
-                        + Dxt(i,11) * ur(11,j,1) 
+                        + Dxt(i,11) * ur(11,j,1)
           end do
        end do
 
@@ -831,7 +830,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,8) * us(i,8,k) &
                            + Dyt(j,9) * us(i,9,k) &
                            + Dyt(j,10) * us(i,10,k) &
-                           + Dyt(j,11) * us(i,11,k) 
+                           + Dyt(j,11) * us(i,11,k)
              end do
           end do
        end do
@@ -849,7 +848,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,8) * ut(i,1,8) &
                         + Dzt(k,9) * ut(i,1,9) &
                         + Dzt(k,10) * ut(i,1,10) &
-                        + Dzt(k,11) * ut(i,1,11) 
+                        + Dzt(k,11) * ut(i,1,11)
           end do
        end do
 
@@ -895,7 +894,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,7) * u(7,j,1,e) &
                         + Dx(i,8) * u(8,j,1,e) &
                         + Dx(i,9) * u(9,j,1,e) &
-                        + Dx(i,10) * u(10,j,1,e) 
+                        + Dx(i,10) * u(10,j,1,e)
           end do
        end do
 
@@ -911,7 +910,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,7) * u(i,7,k,e) &
                            + Dy(j,8) * u(i,8,k,e) &
                            + Dy(j,9) * u(i,9,k,e) &
-                           + Dy(j,10) * u(i,10,k,e) 
+                           + Dy(j,10) * u(i,10,k,e)
              end do
           end do
        end do
@@ -927,11 +926,11 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,7) * u(i,1,7,e) &
                         + Dz(k,8) * u(i,1,8,e) &
                         + Dz(k,9) * u(i,1,9,e) &
-                        + Dz(k,10) * u(i,1,10,e) 
+                        + Dz(k,10) * u(i,1,10,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -957,7 +956,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,7) * ur(7,j,1) &
                         + Dxt(i,8) * ur(8,j,1) &
                         + Dxt(i,9) * ur(9,j,1) &
-                        + Dxt(i,10) * ur(10,j,1) 
+                        + Dxt(i,10) * ur(10,j,1)
           end do
        end do
 
@@ -974,7 +973,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,7) * us(i,7,k) &
                            + Dyt(j,8) * us(i,8,k) &
                            + Dyt(j,9) * us(i,9,k) &
-                           + Dyt(j,10) * us(i,10,k) 
+                           + Dyt(j,10) * us(i,10,k)
              end do
           end do
        end do
@@ -991,7 +990,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,7) * ut(i,1,7) &
                         + Dzt(k,8) * ut(i,1,8) &
                         + Dzt(k,9) * ut(i,1,9) &
-                        + Dzt(k,10) * ut(i,1,10) 
+                        + Dzt(k,10) * ut(i,1,10)
           end do
        end do
 
@@ -1036,7 +1035,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,6) * u(6,j,1,e) &
                         + Dx(i,7) * u(7,j,1,e) &
                         + Dx(i,8) * u(8,j,1,e) &
-                        + Dx(i,9) * u(9,j,1,e) 
+                        + Dx(i,9) * u(9,j,1,e)
           end do
        end do
 
@@ -1051,7 +1050,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,6) * u(i,6,k,e) &
                            + Dy(j,7) * u(i,7,k,e) &
                            + Dy(j,8) * u(i,8,k,e) &
-                           + Dy(j,9) * u(i,9,k,e) 
+                           + Dy(j,9) * u(i,9,k,e)
              end do
           end do
        end do
@@ -1066,11 +1065,11 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,6) * u(i,1,6,e) &
                         + Dz(k,7) * u(i,1,7,e) &
                         + Dz(k,8) * u(i,1,8,e) &
-                        + Dz(k,9) * u(i,1,9,e) 
+                        + Dz(k,9) * u(i,1,9,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1095,7 +1094,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,6) * ur(6,j,1) &
                         + Dxt(i,7) * ur(7,j,1) &
                         + Dxt(i,8) * ur(8,j,1) &
-                        + Dxt(i,9) * ur(9,j,1) 
+                        + Dxt(i,9) * ur(9,j,1)
           end do
        end do
 
@@ -1111,7 +1110,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,6) * us(i,6,k) &
                            + Dyt(j,7) * us(i,7,k) &
                            + Dyt(j,8) * us(i,8,k) &
-                           + Dyt(j,9) * us(i,9,k) 
+                           + Dyt(j,9) * us(i,9,k)
              end do
           end do
        end do
@@ -1127,7 +1126,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,6) * ut(i,1,6) &
                         + Dzt(k,7) * ut(i,1,7) &
                         + Dzt(k,8) * ut(i,1,8) &
-                        + Dzt(k,9) * ut(i,1,9) 
+                        + Dzt(k,9) * ut(i,1,9)
           end do
        end do
 
@@ -1171,7 +1170,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,5) * u(5,j,1,e) &
                         + Dx(i,6) * u(6,j,1,e) &
                         + Dx(i,7) * u(7,j,1,e) &
-                        + Dx(i,8) * u(8,j,1,e) 
+                        + Dx(i,8) * u(8,j,1,e)
           end do
        end do
 
@@ -1185,7 +1184,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,5) * u(i,5,k,e) &
                            + Dy(j,6) * u(i,6,k,e) &
                            + Dy(j,7) * u(i,7,k,e) &
-                           + Dy(j,8) * u(i,8,k,e) 
+                           + Dy(j,8) * u(i,8,k,e)
              end do
           end do
        end do
@@ -1199,11 +1198,11 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,5) * u(i,1,5,e) &
                         + Dz(k,6) * u(i,1,6,e) &
                         + Dz(k,7) * u(i,1,7,e) &
-                        + Dz(k,8) * u(i,1,8,e) 
+                        + Dz(k,8) * u(i,1,8,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1227,7 +1226,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,5) * ur(5,j,1) &
                         + Dxt(i,6) * ur(6,j,1) &
                         + Dxt(i,7) * ur(7,j,1) &
-                        + Dxt(i,8) * ur(8,j,1) 
+                        + Dxt(i,8) * ur(8,j,1)
           end do
        end do
 
@@ -1242,7 +1241,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,5) * us(i,5,k) &
                            + Dyt(j,6) * us(i,6,k) &
                            + Dyt(j,7) * us(i,7,k) &
-                           + Dyt(j,8) * us(i,8,k) 
+                           + Dyt(j,8) * us(i,8,k)
              end do
           end do
        end do
@@ -1257,7 +1256,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,5) * ut(i,1,5) &
                         + Dzt(k,6) * ut(i,1,6) &
                         + Dzt(k,7) * ut(i,1,7) &
-                        + Dzt(k,8) * ut(i,1,8) 
+                        + Dzt(k,8) * ut(i,1,8)
           end do
        end do
 
@@ -1300,7 +1299,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,4) * u(4,j,1,e) &
                         + Dx(i,5) * u(5,j,1,e) &
                         + Dx(i,6) * u(6,j,1,e) &
-                        + Dx(i,7) * u(7,j,1,e) 
+                        + Dx(i,7) * u(7,j,1,e)
           end do
        end do
 
@@ -1313,7 +1312,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,4) * u(i,4,k,e) &
                            + Dy(j,5) * u(i,5,k,e) &
                            + Dy(j,6) * u(i,6,k,e) &
-                           + Dy(j,7) * u(i,7,k,e) 
+                           + Dy(j,7) * u(i,7,k,e)
              end do
           end do
        end do
@@ -1326,11 +1325,11 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,4) * u(i,1,4,e) &
                         + Dz(k,5) * u(i,1,5,e) &
                         + Dz(k,6) * u(i,1,6,e) &
-                        + Dz(k,7) * u(i,1,7,e) 
+                        + Dz(k,7) * u(i,1,7,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1353,7 +1352,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,4) * ur(4,j,1) &
                         + Dxt(i,5) * ur(5,j,1) &
                         + Dxt(i,6) * ur(6,j,1) &
-                        + Dxt(i,7) * ur(7,j,1) 
+                        + Dxt(i,7) * ur(7,j,1)
           end do
        end do
 
@@ -1367,7 +1366,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,4) * us(i,4,k) &
                            + Dyt(j,5) * us(i,5,k) &
                            + Dyt(j,6) * us(i,6,k) &
-                           + Dyt(j,7) * us(i,7,k) 
+                           + Dyt(j,7) * us(i,7,k)
              end do
           end do
        end do
@@ -1381,7 +1380,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,4) * ut(i,1,4) &
                         + Dzt(k,5) * ut(i,1,5) &
                         + Dzt(k,6) * ut(i,1,6) &
-                        + Dzt(k,7) * ut(i,1,7) 
+                        + Dzt(k,7) * ut(i,1,7)
           end do
        end do
 
@@ -1423,7 +1422,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,3) * u(3,j,1,e) &
                         + Dx(i,4) * u(4,j,1,e) &
                         + Dx(i,5) * u(5,j,1,e) &
-                        + Dx(i,6) * u(6,j,1,e) 
+                        + Dx(i,6) * u(6,j,1,e)
           end do
        end do
 
@@ -1435,7 +1434,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,3) * u(i,3,k,e) &
                            + Dy(j,4) * u(i,4,k,e) &
                            + Dy(j,5) * u(i,5,k,e) &
-                           + Dy(j,6) * u(i,6,k,e) 
+                           + Dy(j,6) * u(i,6,k,e)
              end do
           end do
        end do
@@ -1447,11 +1446,11 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,3) * u(i,1,3,e) &
                         + Dz(k,4) * u(i,1,4,e) &
                         + Dz(k,5) * u(i,1,5,e) &
-                        + Dz(k,6) * u(i,1,6,e) 
+                        + Dz(k,6) * u(i,1,6,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1473,7 +1472,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,3) * ur(3,j,1) &
                         + Dxt(i,4) * ur(4,j,1) &
                         + Dxt(i,5) * ur(5,j,1) &
-                        + Dxt(i,6) * ur(6,j,1) 
+                        + Dxt(i,6) * ur(6,j,1)
           end do
        end do
 
@@ -1486,7 +1485,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,3) * us(i,3,k) &
                            + Dyt(j,4) * us(i,4,k) &
                            + Dyt(j,5) * us(i,5,k) &
-                           + Dyt(j,6) * us(i,6,k) 
+                           + Dyt(j,6) * us(i,6,k)
              end do
           end do
        end do
@@ -1499,7 +1498,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,3) * ut(i,1,3) &
                         + Dzt(k,4) * ut(i,1,4) &
                         + Dzt(k,5) * ut(i,1,5) &
-                        + Dzt(k,6) * ut(i,1,6) 
+                        + Dzt(k,6) * ut(i,1,6)
           end do
        end do
 
@@ -1540,7 +1539,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dx(i,2) * u(2,j,1,e) &
                         + Dx(i,3) * u(3,j,1,e) &
                         + Dx(i,4) * u(4,j,1,e) &
-                        + Dx(i,5) * u(5,j,1,e) 
+                        + Dx(i,5) * u(5,j,1,e)
           end do
        end do
 
@@ -1551,7 +1550,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dy(j,2) * u(i,2,k,e) &
                            + Dy(j,3) * u(i,3,k,e) &
                            + Dy(j,4) * u(i,4,k,e) &
-                           + Dy(j,5) * u(i,5,k,e) 
+                           + Dy(j,5) * u(i,5,k,e)
              end do
           end do
        end do
@@ -1562,11 +1561,11 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dz(k,2) * u(i,1,2,e) &
                         + Dz(k,3) * u(i,1,3,e) &
                         + Dz(k,4) * u(i,1,4,e) &
-                        + Dz(k,5) * u(i,1,5,e) 
+                        + Dz(k,5) * u(i,1,5,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1587,7 +1586,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dxt(i,2) * ur(2,j,1) &
                         + Dxt(i,3) * ur(3,j,1) &
                         + Dxt(i,4) * ur(4,j,1) &
-                        + Dxt(i,5) * ur(5,j,1) 
+                        + Dxt(i,5) * ur(5,j,1)
           end do
        end do
 
@@ -1599,7 +1598,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,2) * us(i,2,k) &
                            + Dyt(j,3) * us(i,3,k) &
                            + Dyt(j,4) * us(i,4,k) &
-                           + Dyt(j,5) * us(i,5,k) 
+                           + Dyt(j,5) * us(i,5,k)
              end do
           end do
        end do
@@ -1611,7 +1610,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,2) * ut(i,1,2) &
                         + Dzt(k,3) * ut(i,1,3) &
                         + Dzt(k,4) * ut(i,1,4) &
-                        + Dzt(k,5) * ut(i,1,5) 
+                        + Dzt(k,5) * ut(i,1,5)
           end do
        end do
 
@@ -1651,7 +1650,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              wur(i,j,1) = Dx(i,1) * u(1,j,1,e) &
                         + Dx(i,2) * u(2,j,1,e) &
                         + Dx(i,3) * u(3,j,1,e) &
-                        + Dx(i,4) * u(4,j,1,e) 
+                        + Dx(i,4) * u(4,j,1,e)
           end do
        end do
 
@@ -1661,7 +1660,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 wus(i,j,k) = Dy(j,1) * u(i,1,k,e) &
                            + Dy(j,2) * u(i,2,k,e) &
                            + Dy(j,3) * u(i,3,k,e) &
-                           + Dy(j,4) * u(i,4,k,e) 
+                           + Dy(j,4) * u(i,4,k,e)
              end do
           end do
        end do
@@ -1671,11 +1670,11 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              wut(i,1,k) = Dz(k,1) * u(i,1,1,e) &
                         + Dz(k,2) * u(i,1,2,e) &
                         + Dz(k,3) * u(i,1,3,e) &
-                        + Dz(k,4) * u(i,1,4,e) 
+                        + Dz(k,4) * u(i,1,4,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1695,7 +1694,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) &
                         + Dxt(i,2) * ur(2,j,1) &
                         + Dxt(i,3) * ur(3,j,1) &
-                        + Dxt(i,4) * ur(4,j,1) 
+                        + Dxt(i,4) * ur(4,j,1)
           end do
        end do
 
@@ -1706,7 +1705,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                            + Dyt(j,1) * us(i,1,k) &
                            + Dyt(j,2) * us(i,2,k) &
                            + Dyt(j,3) * us(i,3,k) &
-                           + Dyt(j,4) * us(i,4,k) 
+                           + Dyt(j,4) * us(i,4,k)
              end do
           end do
        end do
@@ -1717,7 +1716,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                         + Dzt(k,1) * ut(i,1,1) &
                         + Dzt(k,2) * ut(i,1,2) &
                         + Dzt(k,3) * ut(i,1,3) &
-                        + Dzt(k,4) * ut(i,1,4) 
+                        + Dzt(k,4) * ut(i,1,4)
           end do
        end do
 
@@ -1756,7 +1755,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           do i = 1, lx
              wur(i,j,1) = Dx(i,1) * u(1,j,1,e) &
                         + Dx(i,2) * u(2,j,1,e) &
-                        + Dx(i,3) * u(3,j,1,e) 
+                        + Dx(i,3) * u(3,j,1,e)
           end do
        end do
 
@@ -1765,7 +1764,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do i = 1, lx
                 wus(i,j,k) = Dy(j,1) * u(i,1,k,e) &
                            + Dy(j,2) * u(i,2,k,e) &
-                           + Dy(j,3) * u(i,3,k,e) 
+                           + Dy(j,3) * u(i,3,k,e)
              end do
           end do
        end do
@@ -1774,11 +1773,11 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           do i = 1, lx*lx
              wut(i,1,k) = Dz(k,1) * u(i,1,1,e) &
                         + Dz(k,2) * u(i,1,2,e) &
-                        + Dz(k,3) * u(i,1,3,e) 
+                        + Dz(k,3) * u(i,1,3,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1797,7 +1796,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           do i = 1, lx
              w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) &
                         + Dxt(i,2) * ur(2,j,1) &
-                        + Dxt(i,3) * ur(3,j,1) 
+                        + Dxt(i,3) * ur(3,j,1)
           end do
        end do
 
@@ -1807,7 +1806,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 w(i,j,k,e) = w(i,j,k,e) &
                            + Dyt(j,1) * us(i,1,k) &
                            + Dyt(j,2) * us(i,2,k) &
-                           + Dyt(j,3) * us(i,3,k) 
+                           + Dyt(j,3) * us(i,3,k)
              end do
           end do
        end do
@@ -1817,7 +1816,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              w(i,1,k,e) = w(i,1,k,e) &
                         + Dzt(k,1) * ut(i,1,1) &
                         + Dzt(k,2) * ut(i,1,2) &
-                        + Dzt(k,3) * ut(i,1,3) 
+                        + Dzt(k,3) * ut(i,1,3)
           end do
        end do
 
@@ -1855,7 +1854,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        do j = 1, lx * lx
           do i = 1, lx
              wur(i,j,1) = Dx(i,1) * u(1,j,1,e) &
-                        + Dx(i,2) * u(2,j,1,e) 
+                        + Dx(i,2) * u(2,j,1,e)
           end do
        end do
 
@@ -1863,7 +1862,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           do j = 1, lx
              do i = 1, lx
                 wus(i,j,k) = Dy(j,1) * u(i,1,k,e) &
-                           + Dy(j,2) * u(i,2,k,e) 
+                           + Dy(j,2) * u(i,2,k,e)
              end do
           end do
        end do
@@ -1871,11 +1870,11 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        do k = 1, lx
           do i = 1, lx*lx
              wut(i,1,k) = Dz(k,1) * u(i,1,1,e) &
-                        + Dz(k,2) * u(i,1,2,e) 
+                        + Dz(k,2) * u(i,1,2,e)
           end do
        end do
 
-       do i = 1, lx*lx*lx          
+       do i = 1, lx*lx*lx
           ur(i,1,1) = h1(i,1,1,e) &
                     * ( G11(i,1,1,e) * wur(i,1,1) &
                       + G12(i,1,1,e) * wus(i,1,1) &
@@ -1893,7 +1892,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        do j = 1, lx*lx
           do i = 1, lx
              w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) &
-                        + Dxt(i,2) * ur(2,j,1) 
+                        + Dxt(i,2) * ur(2,j,1)
           end do
        end do
 
@@ -1902,7 +1901,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do i = 1, lx
                 w(i,j,k,e) = w(i,j,k,e) &
                            + Dyt(j,1) * us(i,1,k) &
-                           + Dyt(j,2) * us(i,2,k) 
+                           + Dyt(j,2) * us(i,2,k)
              end do
           end do
        end do
@@ -1911,7 +1910,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           do i = 1, lx*lx
              w(i,1,k,e) = w(i,1,k,e) &
                         + Dzt(k,1) * ut(i,1,1) &
-                        + Dzt(k,2) * ut(i,1,2) 
+                        + Dzt(k,2) * ut(i,1,2)
           end do
        end do
 
diff --git a/src/math/bcknd/cpu/cdtp.f90 b/src/math/bcknd/cpu/cdtp.f90
index d2b3389224c..1500f1384b1 100644
--- a/src/math/bcknd/cpu/cdtp.f90
+++ b/src/math/bcknd/cpu/cdtp.f90
@@ -36,7 +36,7 @@ module cpu_cdtp
   implicit none
 
 contains
-  
+
   subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx)
     integer, intent(in) :: nel, lx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
@@ -47,26 +47,26 @@ subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx)
     integer :: e, i, j, k, l
 
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              tmp = 0.0_rp
-             !DIR$ LOOP_INFO MIN_TRIPS(15)             
+             !DIR$ LOOP_INFO MIN_TRIPS(15)
              do k = 1, lx
                 tmp = tmp + dxt(i,k) * ta1(k,j,1)
              end do
              dtx(i,j,1,e) = tmp
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
@@ -75,52 +75,52 @@ subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx)
           do j = 1, lx
              do i = 1, lx
                 tmp = 0.0_rp
-                !DIR$ LOOP_INFO MIN_TRIPS(15)                
+                !DIR$ LOOP_INFO MIN_TRIPS(15)
                 do l = 1, lx
-                   tmp = tmp + dyt(j,l) * ta1(i,l,k)                   
+                   tmp = tmp + dyt(j,l) * ta1(i,l,k)
                 end do
                 dtx(i,j,k,e) = dtx(i,j,k,e) + tmp
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              tmp = 0.0_rp
-             !DIR$ LOOP_INFO MIN_TRIPS(15)             
+             !DIR$ LOOP_INFO MIN_TRIPS(15)
              do l = 1, lx
                 tmp = tmp + dzt(k,l) * ta1(i,1,l)
              end do
              dtx(i,1,k,e) = dtx(i,1,k,e) + tmp
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx
-  
+
   subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 14
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -136,14 +136,14 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,11) * ta1(11,j,1) &
                           + dxt(i,12) * ta1(12,j,1) &
                           + dxt(i,13) * ta1(13,j,1) &
-                          + dxt(i,14) * ta1(14,j,1) 
+                          + dxt(i,14) * ta1(14,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -161,15 +161,15 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,11) * ta1(i,11,k) &
                              + dyt(j,12) * ta1(i,12,k) &
                              + dyt(j,13) * ta1(i,13,k) &
-                             + dyt(j,14) * ta1(i,14,k) 
+                             + dyt(j,14) * ta1(i,14,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -186,32 +186,32 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,11) * ta1(i,1,11) &
                           + dzt(k,12) * ta1(i,1,12) &
                           + dzt(k,13) * ta1(i,1,13) &
-                          + dzt(k,14) * ta1(i,1,14) 
+                          + dzt(k,14) * ta1(i,1,14)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx14
-  
+
   subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 13
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -226,14 +226,14 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,10) * ta1(10,j,1) &
                           + dxt(i,11) * ta1(11,j,1) &
                           + dxt(i,12) * ta1(12,j,1) &
-                          + dxt(i,13) * ta1(13,j,1) 
+                          + dxt(i,13) * ta1(13,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -254,11 +254,11 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -274,32 +274,32 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,10) * ta1(i,1,10) &
                           + dzt(k,11) * ta1(i,1,11) &
                           + dzt(k,12) * ta1(i,1,12) &
-                          + dzt(k,13) * ta1(i,1,13) 
+                          + dzt(k,13) * ta1(i,1,13)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx13
-  
+
   subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 12
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -313,14 +313,14 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,9) * ta1(9,j,1) &
                           + dxt(i,10) * ta1(10,j,1) &
                           + dxt(i,11) * ta1(11,j,1) &
-                          + dxt(i,12) * ta1(12,j,1) 
+                          + dxt(i,12) * ta1(12,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -340,11 +340,11 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -362,29 +362,29 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,12) * ta1(i,1,12)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx12
-  
+
   subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 11
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -397,14 +397,14 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,8) * ta1(8,j,1) &
                           + dxt(i,9) * ta1(9,j,1) &
                           + dxt(i,10) * ta1(10,j,1) &
-                          + dxt(i,11) * ta1(11,j,1) 
+                          + dxt(i,11) * ta1(11,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -419,15 +419,15 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,8) * ta1(i,8,k) &
                              + dyt(j,9) * ta1(i,9,k) &
                              + dyt(j,10) * ta1(i,10,k) &
-                             + dyt(j,11) * ta1(i,11,k) 
+                             + dyt(j,11) * ta1(i,11,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -441,22 +441,22 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,8) * ta1(i,1,8) &
                           + dzt(k,9) * ta1(i,1,9) &
                           + dzt(k,10) * ta1(i,1,10) &
-                          + dzt(k,11) * ta1(i,1,11) 
+                          + dzt(k,11) * ta1(i,1,11)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx11
-  
+
   subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 10
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
 
        do i = 1, lx*lx*lx
@@ -478,14 +478,14 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,7) * ta1(7,j,1) &
                           + dxt(i,8) * ta1(8,j,1) &
                           + dxt(i,9) * ta1(9,j,1) &
-                          + dxt(i,10) * ta1(10,j,1) 
+                          + dxt(i,10) * ta1(10,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -499,16 +499,16 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,7) * ta1(i,7,k) &
                              + dyt(j,8) * ta1(i,8,k) &
                              + dyt(j,9) * ta1(i,9,k) &
-                             + dyt(j,10) * ta1(i,10,k) 
+                             + dyt(j,10) * ta1(i,10,k)
 
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -521,10 +521,10 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,7) * ta1(i,1,7) &
                           + dzt(k,8) * ta1(i,1,8) &
                           + dzt(k,9) * ta1(i,1,9) &
-                          + dzt(k,10) * ta1(i,1,10) 
+                          + dzt(k,10) * ta1(i,1,10)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx10
 
@@ -533,20 +533,20 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -557,14 +557,14 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,6) * ta1(6,j,1) &
                           + dxt(i,7) * ta1(7,j,1) &
                           + dxt(i,8) * ta1(8,j,1) &
-                          + dxt(i,9) * ta1(9,j,1) 
+                          + dxt(i,9) * ta1(9,j,1)
           end do
        end do
 
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -577,15 +577,15 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,6) * ta1(i,6,k) &
                              + dyt(j,7) * ta1(i,7,k) &
                              + dyt(j,8) * ta1(i,8,k) &
-                             + dyt(j,9) * ta1(i,9,k) 
+                             + dyt(j,9) * ta1(i,9,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -597,10 +597,10 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,6) * ta1(i,1,6) &
                           + dzt(k,7) * ta1(i,1,7) &
                           + dzt(k,8) * ta1(i,1,8) &
-                          + dzt(k,9) * ta1(i,1,9) 
+                          + dzt(k,9) * ta1(i,1,9)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx9
 
@@ -609,20 +609,20 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -632,14 +632,14 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,5) * ta1(5,j,1) &
                           + dxt(i,6) * ta1(6,j,1) &
                           + dxt(i,7) * ta1(7,j,1) &
-                          + dxt(i,8) * ta1(8,j,1) 
+                          + dxt(i,8) * ta1(8,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -655,11 +655,11 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -673,7 +673,7 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                          + dzt(k,8) * ta1(i,1,8)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx8
 
@@ -682,16 +682,16 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
@@ -704,14 +704,14 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,4) * ta1(4,j,1) &
                           + dxt(i,5) * ta1(5,j,1) &
                           + dxt(i,6) * ta1(6,j,1) &
-                          + dxt(i,7) * ta1(7,j,1) 
+                          + dxt(i,7) * ta1(7,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -722,15 +722,15 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,4) * ta1(i,4,k) &
                              + dyt(j,5) * ta1(i,5,k) &
                              + dyt(j,6) * ta1(i,6,k) &
-                             + dyt(j,7) * ta1(i,7,k) 
+                             + dyt(j,7) * ta1(i,7,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -740,32 +740,32 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,4) * ta1(i,1,4) &
                           + dzt(k,5) * ta1(i,1,5) &
                           + dzt(k,6) * ta1(i,1,6) &
-                          + dzt(k,7) * ta1(i,1,7) 
+                          + dzt(k,7) * ta1(i,1,7)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx7
-  
+
   subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 6
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
 
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
@@ -773,14 +773,14 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,3) * ta1(3,j,1) &
                           + dxt(i,4) * ta1(4,j,1) &
                           + dxt(i,5) * ta1(5,j,1) &
-                          + dxt(i,6) * ta1(6,j,1) 
+                          + dxt(i,6) * ta1(6,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -790,15 +790,15 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,3) * ta1(i,3,k) &
                              + dyt(j,4) * ta1(i,4,k) &
                              + dyt(j,5) * ta1(i,5,k) &
-                             + dyt(j,6) * ta1(i,6,k) 
+                             + dyt(j,6) * ta1(i,6,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -807,10 +807,10 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,3) * ta1(i,1,3) &
                           + dzt(k,4) * ta1(i,1,4) &
                           + dzt(k,5) * ta1(i,1,5) &
-                          + dzt(k,6) * ta1(i,1,6)              
+                          + dzt(k,6) * ta1(i,1,6)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx6
 
@@ -819,34 +819,34 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
-    
+
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
-       
+
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
                           + dxt(i,2) * ta1(2,j,1) &
                           + dxt(i,3) * ta1(3,j,1) &
                           + dxt(i,4) * ta1(4,j,1) &
-                          + dxt(i,5) * ta1(5,j,1) 
+                          + dxt(i,5) * ta1(5,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -855,15 +855,15 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,2) * ta1(i,2,k) &
                              + dyt(j,3) * ta1(i,3,k) &
                              + dyt(j,4) * ta1(i,4,k) &
-                             + dyt(j,5) * ta1(i,5,k) 
+                             + dyt(j,5) * ta1(i,5,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
@@ -871,10 +871,10 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dzt(k,2) * ta1(i,1,2) &
                           + dzt(k,3) * ta1(i,1,3) &
                           + dzt(k,4) * ta1(i,1,4) &
-                          + dzt(k,5) * ta1(i,1,5) 
+                          + dzt(k,5) * ta1(i,1,5)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx5
 
@@ -883,12 +883,12 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
 
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
@@ -902,14 +902,14 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
                           + dxt(i,2) * ta1(2,j,1) &
                           + dxt(i,3) * ta1(3,j,1) &
-                          + dxt(i,4) * ta1(4,j,1) 
+                          + dxt(i,4) * ta1(4,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -917,25 +917,25 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                              + dyt(j,1) * ta1(i,1,k) &
                              + dyt(j,2) * ta1(i,2,k) &
                              + dyt(j,3) * ta1(i,3,k) &
-                             + dyt(j,4) * ta1(i,4,k) 
+                             + dyt(j,4) * ta1(i,4,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
                           + dzt(k,1) * ta1(i,1,1) &
                           + dzt(k,2) * ta1(i,1,2) &
                           + dzt(k,3) * ta1(i,1,3) &
-                          + dzt(k,4) * ta1(i,1,4) 
+                          + dzt(k,4) * ta1(i,1,4)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx4
 
@@ -944,16 +944,16 @@ subroutine cpu_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
 
     do e = 1, nel
-       
+
        do i = 1, lx*lx*lx
           wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e)
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e)
        end do
@@ -965,44 +965,44 @@ subroutine cpu_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
                           + dxt(i,3) * ta1(3,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
                 dtx(i,j,k,e) = dtx(i,j,k,e) &
                              + dyt(j,1) * ta1(i,1,k) &
                              + dyt(j,2) * ta1(i,2,k) &
-                             + dyt(j,3) * ta1(i,3,k) 
+                             + dyt(j,3) * ta1(i,3,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
                           + dzt(k,1) * ta1(i,1,1) &
                           + dzt(k,2) * ta1(i,1,2) &
-                          + dzt(k,3) * ta1(i,1,3) 
+                          + dzt(k,3) * ta1(i,1,3)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx3
-  
+
   subroutine cpu_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
     integer, parameter :: lx = 2
     integer, intent(in) :: nel
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx) :: wx, ta1
     integer :: e, i, j, k
 
@@ -1019,36 +1019,36 @@ subroutine cpu_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel)
        do j = 1, lx * lx
           do i = 1, lx
              dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) &
-                          + dxt(i,2) * ta1(2,j,1) 
+                          + dxt(i,2) * ta1(2,j,1)
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
                 dtx(i,j,k,e) = dtx(i,j,k,e) &
                              + dyt(j,1) * ta1(i,1,k) &
-                             + dyt(j,2) * ta1(i,2,k) 
+                             + dyt(j,2) * ta1(i,2,k)
              end do
           end do
        end do
-       
+
        do i = 1, lx*lx*lx
           ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dtx(i,1,k,e) = dtx(i,1,k,e) &
                           + dzt(k,1) * ta1(i,1,1) &
-                          + dzt(k,2) * ta1(i,1,2) 
+                          + dzt(k,2) * ta1(i,1,2)
           end do
        end do
-       
+
     end do
   end subroutine cpu_cdtp_lx2
 
diff --git a/src/math/bcknd/cpu/conv1.f90 b/src/math/bcknd/cpu/conv1.f90
index 0739fd70016..90bd51421ba 100644
--- a/src/math/bcknd/cpu/conv1.f90
+++ b/src/math/bcknd/cpu/conv1.f90
@@ -36,7 +36,7 @@ module cpu_conv1
   implicit none
 
 contains
-  
+
   subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, lx)
@@ -47,7 +47,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -64,7 +64,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
              dudr(i,j,1) = tmp
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -76,7 +76,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              tmp = 0.0_rp
@@ -86,7 +86,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
              dudt(i,1,k) = tmp
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = jacinv(i,1,1,e) &
                       * ( vx(i,1,1,e) &
@@ -103,9 +103,9 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
                           + dtdz(i,1,1,e) * dudt(i,1,1) ) )
        end do
     end do
-    
+
   end subroutine cpu_conv1_lx
-  
+
   subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
@@ -117,7 +117,7 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -139,10 +139,10 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,11) * u(11,j,1,e) &
                          + dx(i,12) * u(12,j,1,e) &
                          + dx(i,13) * u(13,j,1,e) &
-                         + dx(i,14) * u(14,j,1,e) 
+                         + dx(i,14) * u(14,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -159,11 +159,11 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,11) * u(i,11,k,e) &
                             + dy(j,12) * u(i,12,k,e) &
                             + dy(j,13) * u(i,13,k,e) &
-                            + dy(j,14) * u(i,14,k,e) 
+                            + dy(j,14) * u(i,14,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -179,10 +179,10 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,11) * u(i,1,11,e) &
                          + dz(k,12) * u(i,1,12,e) &
                          + dz(k,13) * u(i,1,13,e) &
-                         + dz(k,14) * u(i,1,14,e) 
+                         + dz(k,14) * u(i,1,14,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = jacinv(i,1,1,e) &
                       * ( vx(i,1,1,e) &
@@ -199,9 +199,9 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
                           + dtdz(i,1,1,e) * dudt(i,1,1) ) )
        end do
     end do
-    
+
   end subroutine cpu_conv1_lx14
-   
+
   subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
@@ -213,7 +213,7 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -234,10 +234,10 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,10) * u(10,j,1,e) &
                          + dx(i,11) * u(11,j,1,e) &
                          + dx(i,12) * u(12,j,1,e) &
-                         + dx(i,13) * u(13,j,1,e) 
+                         + dx(i,13) * u(13,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -253,11 +253,11 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,10) * u(i,10,k,e) &
                             + dy(j,11) * u(i,11,k,e) &
                             + dy(j,12) * u(i,12,k,e) &
-                            + dy(j,13) * u(i,13,k,e) 
+                            + dy(j,13) * u(i,13,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -272,12 +272,12 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,10) * u(i,1,10,e) &
                          + dz(k,11) * u(i,1,11,e) &
                          + dz(k,12) * u(i,1,12,e) &
-                         + dz(k,13) * u(i,1,13,e) 
+                         + dz(k,13) * u(i,1,13,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
-         du(i,1,1,e) = jacinv(i,1,1,e) &
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                      * ( vx(i,1,1,e) &
                        * ( drdx(i,1,1,e) * dudr(i,1,1) &
                          + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -290,11 +290,11 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
                        * ( drdz(i,1,1,e) * dudr(i,1,1) &
                          + dsdz(i,1,1,e) * duds(i,1,1) &
                          + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-      end do
-   end do
-    
- end subroutine cpu_conv1_lx13
-   
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx13
+
   subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
@@ -306,7 +306,7 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -326,10 +326,10 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,9) * u(9,j,1,e) &
                          + dx(i,10) * u(10,j,1,e) &
                          + dx(i,11) * u(11,j,1,e) &
-                         + dx(i,12) * u(12,j,1,e) 
+                         + dx(i,12) * u(12,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -348,7 +348,7 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -363,11 +363,11 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,10) * u(i,1,10,e) &
                          + dz(k,11) * u(i,1,11,e) &
                          + dz(k,12) * u(i,1,12,e)
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -380,12 +380,12 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx12
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx12
 
-   subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 11
@@ -396,7 +396,7 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -415,10 +415,10 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,8) * u(8,j,1,e) &
                          + dx(i,9) * u(9,j,1,e) &
                          + dx(i,10) * u(10,j,1,e) &
-                         + dx(i,11) * u(11,j,1,e) 
+                         + dx(i,11) * u(11,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -432,11 +432,11 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,8) * u(i,8,k,e) &
                             + dy(j,9) * u(i,9,k,e) &
                             + dy(j,10) * u(i,10,k,e) &
-                            + dy(j,11) * u(i,11,k,e) 
+                            + dy(j,11) * u(i,11,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -449,12 +449,12 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,8) * u(i,1,8,e) &
                          + dz(k,9) * u(i,1,9,e) &
                          + dz(k,10) * u(i,1,10,e) &
-                         + dz(k,11) * u(i,1,11,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,11) * u(i,1,11,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -467,12 +467,12 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx11
-   
-   subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx11
+
+  subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 10
@@ -483,7 +483,7 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -501,10 +501,10 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,7) * u(7,j,1,e) &
                          + dx(i,8) * u(8,j,1,e) &
                          + dx(i,9) * u(9,j,1,e) &
-                         + dx(i,10) * u(10,j,1,e) 
+                         + dx(i,10) * u(10,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -517,11 +517,11 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,7) * u(i,7,k,e) &
                             + dy(j,8) * u(i,8,k,e) &
                             + dy(j,9) * u(i,9,k,e) &
-                            + dy(j,10) * u(i,10,k,e) 
+                            + dy(j,10) * u(i,10,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -533,12 +533,12 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,7) * u(i,1,7,e) &
                          + dz(k,8) * u(i,1,8,e) &
                          + dz(k,9) * u(i,1,9,e) &
-                         + dz(k,10) * u(i,1,10,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,10) * u(i,1,10,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -551,12 +551,12 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx10
-   
-   subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx10
+
+  subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 9
@@ -567,7 +567,7 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -584,10 +584,10 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,6) * u(6,j,1,e) &
                          + dx(i,7) * u(7,j,1,e) &
                          + dx(i,8) * u(8,j,1,e) &
-                         + dx(i,9) * u(9,j,1,e) 
+                         + dx(i,9) * u(9,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -599,11 +599,11 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,6) * u(i,6,k,e) &
                             + dy(j,7) * u(i,7,k,e) &
                             + dy(j,8) * u(i,8,k,e) &
-                            + dy(j,9) * u(i,9,k,e) 
+                            + dy(j,9) * u(i,9,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -614,12 +614,12 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,6) * u(i,1,6,e) &
                          + dz(k,7) * u(i,1,7,e) &
                          + dz(k,8) * u(i,1,8,e) &
-                         + dz(k,9) * u(i,1,9,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,9) * u(i,1,9,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -632,12 +632,12 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx9
+       end do
+    end do
 
-   subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
+  end subroutine cpu_conv1_lx9
+
+  subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 8
@@ -648,7 +648,7 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -664,10 +664,10 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,5) * u(5,j,1,e) &
                          + dx(i,6) * u(6,j,1,e) &
                          + dx(i,7) * u(7,j,1,e) &
-                         + dx(i,8) * u(8,j,1,e) 
+                         + dx(i,8) * u(8,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -678,11 +678,11 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,5) * u(i,5,k,e) &
                             + dy(j,6) * u(i,6,k,e) &
                             + dy(j,7) * u(i,7,k,e) &
-                            + dy(j,8) * u(i,8,k,e) 
+                            + dy(j,8) * u(i,8,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -692,12 +692,12 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,5) * u(i,1,5,e) &
                          + dz(k,6) * u(i,1,6,e) &
                          + dz(k,7) * u(i,1,7,e) &
-                         + dz(k,8) * u(i,1,8,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,8) * u(i,1,8,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -710,12 +710,12 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx8
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx8
 
-   subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 7
@@ -726,7 +726,7 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -741,10 +741,10 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,4) * u(4,j,1,e) &
                          + dx(i,5) * u(5,j,1,e) &
                          + dx(i,6) * u(6,j,1,e) &
-                         + dx(i,7) * u(7,j,1,e) 
+                         + dx(i,7) * u(7,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -754,11 +754,11 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,4) * u(i,4,k,e) &
                             + dy(j,5) * u(i,5,k,e) &
                             + dy(j,6) * u(i,6,k,e) &
-                            + dy(j,7) * u(i,7,k,e) 
+                            + dy(j,7) * u(i,7,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -768,11 +768,11 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,5) * u(i,1,5,e) &
                          + dz(k,6) * u(i,1,6,e) &
                          + dz(k,7) * u(i,1,7,e)
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -785,12 +785,12 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx7
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx7
 
-   subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 6
@@ -801,12 +801,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
     integer :: e, i, j, k
-    
+
     do e = 1, nelv
        do j = 1, lx * lx
           do i = 1, lx
@@ -815,10 +815,10 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,3) * u(3,j,1,e) &
                          + dx(i,4) * u(4,j,1,e) &
                          + dx(i,5) * u(5,j,1,e) &
-                         + dx(i,6) * u(6,j,1,e) 
+                         + dx(i,6) * u(6,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -827,11 +827,11 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,3) * u(i,3,k,e) &
                             + dy(j,4) * u(i,4,k,e) &
                             + dy(j,5) * u(i,5,k,e) &
-                            + dy(j,6) * u(i,6,k,e) 
+                            + dy(j,6) * u(i,6,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -839,12 +839,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
                          + dz(k,3) * u(i,1,3,e) &
                          + dz(k,4) * u(i,1,4,e) &
                          + dz(k,5) * u(i,1,5,e) &
-                         + dz(k,6) * u(i,1,6,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,6) * u(i,1,6,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -857,12 +857,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx6
+       end do
+    end do
 
-   subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
+  end subroutine cpu_conv1_lx6
+
+  subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 5
@@ -873,7 +873,7 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -886,10 +886,10 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
                          + dx(i,2) * u(2,j,1,e) &
                          + dx(i,3) * u(3,j,1,e) &
                          + dx(i,4) * u(4,j,1,e) &
-                         + dx(i,5) * u(5,j,1,e) 
+                         + dx(i,5) * u(5,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -897,23 +897,23 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
                             + dy(j,2) * u(i,2,k,e) &
                             + dy(j,3) * u(i,3,k,e) &
                             + dy(j,4) * u(i,4,k,e) &
-                            + dy(j,5) * u(i,5,k,e) 
+                            + dy(j,5) * u(i,5,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
                          + dz(k,2) * u(i,1,2,e) &
                          + dz(k,3) * u(i,1,3,e) &
                          + dz(k,4) * u(i,1,4,e) &
-                         + dz(k,5) * u(i,1,5,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,5) * u(i,1,5,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -926,12 +926,12 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx5
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx5
 
-   subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 4
@@ -942,7 +942,7 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -954,32 +954,32 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
              dudr(i,j,1) = dx(i,1) * u(1,j,1,e) &
                          + dx(i,2) * u(2,j,1,e) &
                          + dx(i,3) * u(3,j,1,e) &
-                         + dx(i,4) * u(4,j,1,e) 
+                         + dx(i,4) * u(4,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
                 duds(i,j,k) = dy(j,1) * u(i,1,k,e) &
                             + dy(j,2) * u(i,2,k,e) &
                             + dy(j,3) * u(i,3,k,e) &
-                            + dy(j,4) * u(i,4,k,e) 
+                            + dy(j,4) * u(i,4,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
                          + dz(k,2) * u(i,1,2,e) &
                          + dz(k,3) * u(i,1,3,e) &
-                         + dz(k,4) * u(i,1,4,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,4) * u(i,1,4,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -992,12 +992,12 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx4
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx4
 
-   subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 3
@@ -1008,7 +1008,7 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -1019,30 +1019,30 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
           do i = 1, lx
              dudr(i,j,1) = dx(i,1) * u(1,j,1,e) &
                          + dx(i,2) * u(2,j,1,e) &
-                         + dx(i,3) * u(3,j,1,e) 
+                         + dx(i,3) * u(3,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
                 duds(i,j,k) = dy(j,1) * u(i,1,k,e) &
                             + dy(j,2) * u(i,2,k,e) &
-                            + dy(j,3) * u(i,3,k,e) 
+                            + dy(j,3) * u(i,3,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
                          + dz(k,2) * u(i,1,2,e) &
-                         + dz(k,3) * u(i,1,3,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,3) * u(i,1,3,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -1055,12 +1055,12 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx3
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx3
 
-   subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
+  subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv)
     integer, parameter :: lx = 2
@@ -1071,7 +1071,7 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) ::  dudr
     real(kind=rp), dimension(lx,lx,lx) ::  duds
     real(kind=rp), dimension(lx,lx,lx) ::  dudt
@@ -1081,28 +1081,28 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
        do j = 1, lx * lx
           do i = 1, lx
              dudr(i,j,1) = dx(i,1) * u(1,j,1,e) &
-                         + dx(i,2) * u(2,j,1,e) 
+                         + dx(i,2) * u(2,j,1,e)
           end do
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
                 duds(i,j,k) = dy(j,1) * u(i,1,k,e) &
-                            + dy(j,2) * u(i,2,k,e) 
+                            + dy(j,2) * u(i,2,k,e)
              end do
           end do
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              dudt(i,1,k) = dz(k,1) * u(i,1,1,e) &
-                         + dz(k,2) * u(i,1,2,e) 
-           end do
-        end do
-       
-        do i = 1, lx * lx * lx
-           du(i,1,1,e) = jacinv(i,1,1,e) &
+                         + dz(k,2) * u(i,1,2,e)
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          du(i,1,1,e) = jacinv(i,1,1,e) &
                        * ( vx(i,1,1,e) &
                          * ( drdx(i,1,1,e) * dudr(i,1,1) &
                            + dsdx(i,1,1,e) * duds(i,1,1) &
@@ -1115,9 +1115,9 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
                          * ( drdz(i,1,1,e) * dudr(i,1,1) &
                            + dsdz(i,1,1,e) * duds(i,1,1) &
                            + dtdz(i,1,1,e) * dudt(i,1,1) ) )
-        end do
-     end do
-    
-   end subroutine cpu_conv1_lx2
+       end do
+    end do
+
+  end subroutine cpu_conv1_lx2
 
 end module cpu_conv1
diff --git a/src/math/bcknd/cpu/dudxyz.f90 b/src/math/bcknd/cpu/dudxyz.f90
index 02f5c8e6523..27b8b409ab5 100644
--- a/src/math/bcknd/cpu/dudxyz.f90
+++ b/src/math/bcknd/cpu/dudxyz.f90
@@ -73,11 +73,11 @@ subroutine cpu_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, lx)
              end do
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              tmp = 0.0_rp
@@ -87,19 +87,19 @@ subroutine cpu_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, lx)
              drst(i,1,k) = tmp
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-        
+
     end do
-    
+
   end subroutine cpu_dudxyz_lx
-  
+
   subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     integer, parameter :: lx = 14
     integer, intent(in) :: nel
@@ -126,7 +126,7 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,11) * u(11,j,1,e) &
                          + dx(i,12) * u(12,j,1,e) &
                          + dx(i,13) * u(13,j,1,e) &
-                         + dx(i,14) * u(14,j,1,e) 
+                         + dx(i,14) * u(14,j,1,e)
           end do
        end do
 
@@ -150,15 +150,15 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                             + dy(j,11) * u(i,11,k,e) &
                             + dy(j,12) * u(i,12,k,e) &
                             + dy(j,13) * u(i,13,k,e) &
-                            + dy(j,14) * u(i,14,k,e) 
+                            + dy(j,14) * u(i,14,k,e)
              end do
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -174,22 +174,22 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,11) * u(i,1,11,e) &
                          + dz(k,12) * u(i,1,12,e) &
                          + dz(k,13) * u(i,1,13,e) &
-                         + dz(k,14) * u(i,1,14,e) 
+                         + dz(k,14) * u(i,1,14,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-        
+
     end do
-    
+
   end subroutine cpu_dudxyz_lx14
-  
+
   subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     integer, parameter :: lx = 13
     integer, intent(in) :: nel
@@ -215,7 +215,7 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,10) * u(10,j,1,e) &
                          + dx(i,11) * u(11,j,1,e) &
                          + dx(i,12) * u(12,j,1,e) &
-                         + dx(i,13) * u(13,j,1,e) 
+                         + dx(i,13) * u(13,j,1,e)
           end do
        end do
 
@@ -238,15 +238,15 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                             + dy(j,10) * u(i,10,k,e) &
                             + dy(j,11) * u(i,11,k,e) &
                             + dy(j,12) * u(i,12,k,e) &
-                            + dy(j,13) * u(i,13,k,e) 
+                            + dy(j,13) * u(i,13,k,e)
              end do
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -261,22 +261,22 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,10) * u(i,1,10,e) &
                          + dz(k,11) * u(i,1,11,e) &
                          + dz(k,12) * u(i,1,12,e) &
-                         + dz(k,13) * u(i,1,13,e) 
+                         + dz(k,13) * u(i,1,13,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-        
+
     end do
-    
+
   end subroutine cpu_dudxyz_lx13
-  
+
   subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     integer, parameter :: lx = 12
     integer, intent(in) :: nel
@@ -301,7 +301,7 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,9) * u(9,j,1,e) &
                          + dx(i,10) * u(10,j,1,e) &
                          + dx(i,11) * u(11,j,1,e) &
-                         + dx(i,12) * u(12,j,1,e) 
+                         + dx(i,12) * u(12,j,1,e)
           end do
        end do
 
@@ -327,11 +327,11 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -348,17 +348,17 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,12) * u(i,1,12,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-        
+
     end do
-    
+
   end subroutine cpu_dudxyz_lx12
 
   subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
@@ -384,14 +384,14 @@ subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,8) * u(8,j,1,e) &
                          + dx(i,9) * u(9,j,1,e) &
                          + dx(i,10) * u(10,j,1,e) &
-                         + dx(i,11) * u(11,j,1,e) 
+                         + dx(i,11) * u(11,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -426,20 +426,20 @@ subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,8) * u(i,1,8,e) &
                          + dz(k,9) * u(i,1,9,e) &
                          + dz(k,10) * u(i,1,10,e) &
-                         + dz(k,11) * u(i,1,11,e) 
+                         + dz(k,11) * u(i,1,11,e)
           end do
        end do
 
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-        
+
     end do
-    
+
   end subroutine cpu_dudxyz_lx11
 
   subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
@@ -451,7 +451,7 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) :: drst
     integer :: e, i, j, k
-    
+
     do e = 1, nel
        do j = 1, lx * lx
           do i = 1, lx
@@ -464,14 +464,14 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,7) * u(7,j,1,e) &
                          + dx(i,8) * u(8,j,1,e) &
                          + dx(i,9) * u(9,j,1,e) &
-                         + dx(i,10) * u(10,j,1,e) 
+                         + dx(i,10) * u(10,j,1,e)
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -484,15 +484,15 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                             + dy(j,7) * u(i,7,k,e) &
                             + dy(j,8) * u(i,8,k,e) &
                             + dy(j,9) * u(i,9,k,e) &
-                            + dy(j,10) * u(i,10,k,e) 
+                            + dy(j,10) * u(i,10,k,e)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -511,7 +511,7 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
@@ -541,7 +541,7 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,6) * u(6,j,1,e) &
                          + dx(i,7) * u(7,j,1,e) &
                          + dx(i,8) * u(8,j,1,e) &
-                         + dx(i,9) * u(9,j,1,e) 
+                         + dx(i,9) * u(9,j,1,e)
           end do
        end do
 
@@ -560,15 +560,15 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                             + dy(j,6) * u(i,6,k,e) &
                             + dy(j,7) * u(i,7,k,e) &
                             + dy(j,8) * u(i,8,k,e) &
-                            + dy(j,9) * u(i,9,k,e) 
+                            + dy(j,9) * u(i,9,k,e)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -579,10 +579,10 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,6) * u(i,1,6,e) &
                          + dz(k,7) * u(i,1,7,e) &
                          + dz(k,8) * u(i,1,8,e) &
-                         + dz(k,9) * u(i,1,9,e) 
+                         + dz(k,9) * u(i,1,9,e)
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
@@ -615,10 +615,10 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,5) * u(5,j,1,e) &
                          + dx(i,6) * u(6,j,1,e) &
                          + dx(i,7) * u(7,j,1,e) &
-                         + dx(i,8) * u(8,j,1,e) 
+                         + dx(i,8) * u(8,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
@@ -637,7 +637,7 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
@@ -651,7 +651,7 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,5) * u(i,1,5,e) &
                          + dz(k,6) * u(i,1,6,e) &
                          + dz(k,7) * u(i,1,7,e) &
-                         + dz(k,8) * u(i,1,8,e) 
+                         + dz(k,8) * u(i,1,8,e)
           end do
        end do
 
@@ -662,9 +662,9 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-       
+
     end do
-     
+
   end subroutine cpu_dudxyz_lx8
 
   subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
@@ -676,7 +676,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) :: drst
     integer :: e, i, j, k
-    
+
     do e = 1, nel
        do j = 1, lx * lx
           do i = 1, lx
@@ -686,10 +686,10 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,4) * u(4,j,1,e) &
                          + dx(i,5) * u(5,j,1,e) &
                          + dx(i,6) * u(6,j,1,e) &
-                         + dx(i,7) * u(7,j,1,e) 
+                         + dx(i,7) * u(7,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
@@ -707,7 +707,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
@@ -731,7 +731,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-       
+
     end do
 
   end subroutine cpu_dudxyz_lx7
@@ -745,7 +745,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) :: drst
     integer :: e, i, j, k
-    
+
     do e = 1, nel
        do j = 1, lx * lx
           do i = 1, lx
@@ -757,7 +757,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,6) * u(6,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
@@ -774,7 +774,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
@@ -797,7 +797,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
-       
+
     end do
 
   end subroutine cpu_dudxyz_lx6
@@ -822,7 +822,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dx(i,5) * u(5,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
@@ -842,7 +842,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
@@ -852,7 +852,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,5) * u(i,1,5,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
@@ -874,7 +874,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) :: drst
     integer :: e, i, j, k
-    
+
     do e = 1, nel
        do j = 1, lx * lx
           do i = 1, lx
@@ -888,7 +888,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -899,7 +899,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-        
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
@@ -912,7 +912,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,4) * u(i,1,4,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
@@ -934,7 +934,7 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx) :: drst
     integer :: e, i, j, k
-    
+
     do e = 1, nel
        do j = 1, lx * lx
           do i = 1, lx
@@ -957,7 +957,7 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
@@ -969,17 +969,17 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
                          + dz(k,3) * u(i,1,3,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e)
        end do
 
     end do
-    
+
   end subroutine cpu_dudxyz_lx3
 
   subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
@@ -996,14 +996,14 @@ subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
        do j = 1, lx * lx
           do i = 1, lx
              du(i,j,1,e) = dx(i,1) * u(1,j,1,e) &
-                         + dx(i,2) * u(2,j,1,e) 
+                         + dx(i,2) * u(2,j,1,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do j = 1, lx
              do i = 1, lx
@@ -1012,18 +1012,18 @@ subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel)
              end do
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e)
        end do
-       
+
        do k = 1, lx
           do i = 1, lx*lx
              drst(i,1,k) = dz(k,1) * u(i,1,1,e) &
-                         + dz(k,2) * u(i,1,2,e) 
+                         + dz(k,2) * u(i,1,2,e)
           end do
        end do
-       
+
        do i = 1, lx * lx * lx
           du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e)
        end do
diff --git a/src/math/bcknd/cpu/fdm_cpu.f90 b/src/math/bcknd/cpu/fdm_cpu.f90
index ce3d0587d5e..92b57cd0efd 100644
--- a/src/math/bcknd/cpu/fdm_cpu.f90
+++ b/src/math/bcknd/cpu/fdm_cpu.f90
@@ -1,4 +1,4 @@
-!> Fast Diagonalization 
+!> Fast Diagonalization
 module fdm_cpu
   use num_types, only : rp
   use tensor_cpu
@@ -14,7 +14,7 @@ subroutine fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
     real(kind=rp), intent(inout) :: e(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: r(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv)
-    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)    
+    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)
     integer ::  ie, nn, i
 
     nn = nl**ldim
@@ -38,5 +38,5 @@ subroutine fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv)
        end do
     end if
   end subroutine fdm_do_fast_cpu
-  
+
 end module fdm_cpu
diff --git a/src/math/bcknd/cpu/opgrad.f90 b/src/math/bcknd/cpu/opgrad.f90
index 07e28d5d60a..136e5ce15c0 100644
--- a/src/math/bcknd/cpu/opgrad.f90
+++ b/src/math/bcknd/cpu/opgrad.f90
@@ -45,8 +45,8 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -85,7 +85,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
              ut(i,1,k) = tmp
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -94,7 +94,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -102,7 +102,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx
-  
+
   subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 18
@@ -112,8 +112,8 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -139,7 +139,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,15) * u(15,j,1,e) &
                         + dx(i,16) * u(16,j,1,e) &
                         + dx(i,17) * u(17,j,1,e) &
-                        + dx(i,18) * u(18,j,1,e) 
+                        + dx(i,18) * u(18,j,1,e)
           end do
        end do
 
@@ -163,7 +163,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,15) * u(i,15,k,e) &
                           + dy(j,16) * u(i,16,k,e) &
                           + dy(j,17) * u(i,17,k,e) &
-                          + dy(j,18) * u(i,18,k,e) 
+                          + dy(j,18) * u(i,18,k,e)
              end do
           end do
        end do
@@ -187,10 +187,10 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,15) * u(i,1,15,e) &
                        + dz(k,16) * u(i,1,16,e) &
                        + dz(k,17) * u(i,1,17,e) &
-                       + dz(k,18) * u(i,1,18,e) 
+                       + dz(k,18) * u(i,1,18,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -199,7 +199,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -207,7 +207,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx18
-  
+
   subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 17
@@ -217,8 +217,8 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -243,7 +243,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,14) * u(14,j,1,e) &
                         + dx(i,15) * u(15,j,1,e) &
                         + dx(i,16) * u(16,j,1,e) &
-                        + dx(i,17) * u(17,j,1,e) 
+                        + dx(i,17) * u(17,j,1,e)
           end do
        end do
 
@@ -266,7 +266,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,14) * u(i,14,k,e) &
                           + dy(j,15) * u(i,15,k,e) &
                           + dy(j,16) * u(i,16,k,e) &
-                          + dy(j,17) * u(i,17,k,e) 
+                          + dy(j,17) * u(i,17,k,e)
              end do
           end do
        end do
@@ -289,10 +289,10 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,14) * u(i,1,14,e) &
                        + dz(k,15) * u(i,1,15,e) &
                        + dz(k,16) * u(i,1,16,e) &
-                       + dz(k,17) * u(i,1,17,e) 
+                       + dz(k,17) * u(i,1,17,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -301,7 +301,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -309,7 +309,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx17
-  
+
   subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 16
@@ -319,8 +319,8 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -344,7 +344,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,13) * u(13,j,1,e) &
                         + dx(i,14) * u(14,j,1,e) &
                         + dx(i,15) * u(15,j,1,e) &
-                        + dx(i,16) * u(16,j,1,e) 
+                        + dx(i,16) * u(16,j,1,e)
           end do
        end do
 
@@ -366,7 +366,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,13) * u(i,13,k,e) &
                           + dy(j,14) * u(i,14,k,e) &
                           + dy(j,15) * u(i,15,k,e) &
-                          + dy(j,16) * u(i,16,k,e) 
+                          + dy(j,16) * u(i,16,k,e)
              end do
           end do
        end do
@@ -391,7 +391,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,16) * u(i,1,16,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -400,7 +400,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -408,7 +408,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx16
-  
+
   subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 15
@@ -418,8 +418,8 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -442,7 +442,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,12) * u(12,j,1,e) &
                         + dx(i,13) * u(13,j,1,e) &
                         + dx(i,14) * u(14,j,1,e) &
-                        + dx(i,15) * u(15,j,1,e) 
+                        + dx(i,15) * u(15,j,1,e)
           end do
        end do
 
@@ -463,7 +463,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,12) * u(i,12,k,e) &
                           + dy(j,13) * u(i,13,k,e) &
                           + dy(j,14) * u(i,14,k,e) &
-                          + dy(j,15) * u(i,15,k,e) 
+                          + dy(j,15) * u(i,15,k,e)
              end do
           end do
        end do
@@ -484,10 +484,10 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,12) * u(i,1,12,e) &
                        + dz(k,13) * u(i,1,13,e) &
                        + dz(k,14) * u(i,1,14,e) &
-                       + dz(k,15) * u(i,1,15,e) 
+                       + dz(k,15) * u(i,1,15,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -496,7 +496,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -514,8 +514,8 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -537,7 +537,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,11) * u(11,j,1,e) &
                         + dx(i,12) * u(12,j,1,e) &
                         + dx(i,13) * u(13,j,1,e) &
-                        + dx(i,14) * u(14,j,1,e) 
+                        + dx(i,14) * u(14,j,1,e)
           end do
        end do
 
@@ -580,7 +580,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,14) * u(i,1,14,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -589,7 +589,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -597,7 +597,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx14
-  
+
   subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 13
@@ -607,8 +607,8 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -629,7 +629,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,10) * u(10,j,1,e) &
                         + dx(i,11) * u(11,j,1,e) &
                         + dx(i,12) * u(12,j,1,e) &
-                        + dx(i,13) * u(13,j,1,e) 
+                        + dx(i,13) * u(13,j,1,e)
           end do
        end do
 
@@ -648,7 +648,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,10) * u(i,10,k,e) &
                           + dy(j,11) * u(i,11,k,e) &
                           + dy(j,12) * u(i,12,k,e) &
-                          + dy(j,13) * u(i,13,k,e) 
+                          + dy(j,13) * u(i,13,k,e)
              end do
           end do
        end do
@@ -670,7 +670,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,13) * u(i,1,13,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -679,7 +679,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -687,7 +687,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine cpu_opgrad_lx13
-  
+
   subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 12
@@ -697,8 +697,8 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -718,7 +718,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,9) * u(9,j,1,e) &
                         + dx(i,10) * u(10,j,1,e) &
                         + dx(i,11) * u(11,j,1,e) &
-                        + dx(i,12) * u(12,j,1,e) 
+                        + dx(i,12) * u(12,j,1,e)
           end do
        end do
 
@@ -757,7 +757,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,12) * u(i,1,12,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -766,7 +766,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -784,8 +784,8 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -804,7 +804,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,8) * u(8,j,1,e) &
                         + dx(i,9) * u(9,j,1,e) &
                         + dx(i,10) * u(10,j,1,e) &
-                        + dx(i,11) * u(11,j,1,e) 
+                        + dx(i,11) * u(11,j,1,e)
           end do
        end do
 
@@ -821,7 +821,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,8) * u(i,8,k,e) &
                           + dy(j,9) * u(i,9,k,e) &
                           + dy(j,10) * u(i,10,k,e) &
-                          + dy(j,11) * u(i,11,k,e) 
+                          + dy(j,11) * u(i,11,k,e)
              end do
           end do
        end do
@@ -838,10 +838,10 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,8) * u(i,1,8,e) &
                        + dz(k,9) * u(i,1,9,e) &
                        + dz(k,10) * u(i,1,10,e) &
-                       + dz(k,11) * u(i,1,11,e) 
+                       + dz(k,11) * u(i,1,11,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -850,7 +850,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -868,8 +868,8 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -887,7 +887,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,7) * u(7,j,1,e) &
                         + dx(i,8) * u(8,j,1,e) &
                         + dx(i,9) * u(9,j,1,e) &
-                        + dx(i,10) * u(10,j,1,e) 
+                        + dx(i,10) * u(10,j,1,e)
           end do
        end do
 
@@ -903,7 +903,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,7) * u(i,7,k,e) &
                           + dy(j,8) * u(i,8,k,e) &
                           + dy(j,9) * u(i,9,k,e) &
-                          + dy(j,10) * u(i,10,k,e) 
+                          + dy(j,10) * u(i,10,k,e)
              end do
           end do
        end do
@@ -919,10 +919,10 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,7) * u(i,1,7,e) &
                        + dz(k,8) * u(i,1,8,e) &
                        + dz(k,9) * u(i,1,9,e) &
-                       + dz(k,10) * u(i,1,10,e) 
+                       + dz(k,10) * u(i,1,10,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -931,7 +931,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -949,8 +949,8 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -967,7 +967,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,6) * u(6,j,1,e) &
                         + dx(i,7) * u(7,j,1,e) &
                         + dx(i,8) * u(8,j,1,e) &
-                        + dx(i,9) * u(9,j,1,e) 
+                        + dx(i,9) * u(9,j,1,e)
           end do
        end do
 
@@ -982,7 +982,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,6) * u(i,6,k,e) &
                           + dy(j,7) * u(i,7,k,e) &
                           + dy(j,8) * u(i,8,k,e) &
-                          + dy(j,9) * u(i,9,k,e) 
+                          + dy(j,9) * u(i,9,k,e)
              end do
           end do
        end do
@@ -997,10 +997,10 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,6) * u(i,1,6,e) &
                        + dz(k,7) * u(i,1,7,e) &
                        + dz(k,8) * u(i,1,8,e) &
-                       + dz(k,9) * u(i,1,9,e) 
+                       + dz(k,9) * u(i,1,9,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1009,7 +1009,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1027,8 +1027,8 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1044,7 +1044,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,5) * u(5,j,1,e) &
                         + dx(i,6) * u(6,j,1,e) &
                         + dx(i,7) * u(7,j,1,e) &
-                        + dx(i,8) * u(8,j,1,e) 
+                        + dx(i,8) * u(8,j,1,e)
           end do
        end do
 
@@ -1058,7 +1058,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,5) * u(i,5,k,e) &
                           + dy(j,6) * u(i,6,k,e) &
                           + dy(j,7) * u(i,7,k,e) &
-                          + dy(j,8) * u(i,8,k,e) 
+                          + dy(j,8) * u(i,8,k,e)
              end do
           end do
        end do
@@ -1072,10 +1072,10 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,5) * u(i,1,5,e) &
                        + dz(k,6) * u(i,1,6,e) &
                        + dz(k,7) * u(i,1,7,e) &
-                       + dz(k,8) * u(i,1,8,e) 
+                       + dz(k,8) * u(i,1,8,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1084,7 +1084,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1102,8 +1102,8 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1118,7 +1118,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,4) * u(4,j,1,e) &
                         + dx(i,5) * u(5,j,1,e) &
                         + dx(i,6) * u(6,j,1,e) &
-                        + dx(i,7) * u(7,j,1,e) 
+                        + dx(i,7) * u(7,j,1,e)
           end do
        end do
 
@@ -1131,7 +1131,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,4) * u(i,4,k,e) &
                           + dy(j,5) * u(i,5,k,e) &
                           + dy(j,6) * u(i,6,k,e) &
-                          + dy(j,7) * u(i,7,k,e) 
+                          + dy(j,7) * u(i,7,k,e)
              end do
           end do
        end do
@@ -1144,10 +1144,10 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,4) * u(i,1,4,e) &
                        + dz(k,5) * u(i,1,5,e) &
                        + dz(k,6) * u(i,1,6,e) &
-                       + dz(k,7) * u(i,1,7,e) 
+                       + dz(k,7) * u(i,1,7,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1156,7 +1156,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1174,8 +1174,8 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1189,7 +1189,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,3) * u(3,j,1,e) &
                         + dx(i,4) * u(4,j,1,e) &
                         + dx(i,5) * u(5,j,1,e) &
-                        + dx(i,6) * u(6,j,1,e) 
+                        + dx(i,6) * u(6,j,1,e)
           end do
        end do
 
@@ -1201,7 +1201,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,3) * u(i,3,k,e) &
                           + dy(j,4) * u(i,4,k,e) &
                           + dy(j,5) * u(i,5,k,e) &
-                          + dy(j,6) * u(i,6,k,e) 
+                          + dy(j,6) * u(i,6,k,e)
              end do
           end do
        end do
@@ -1213,10 +1213,10 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,3) * u(i,1,3,e) &
                        + dz(k,4) * u(i,1,4,e) &
                        + dz(k,5) * u(i,1,5,e) &
-                       + dz(k,6) * u(i,1,6,e) 
+                       + dz(k,6) * u(i,1,6,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1225,7 +1225,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1243,8 +1243,8 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1257,7 +1257,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
                         + dx(i,2) * u(2,j,1,e) &
                         + dx(i,3) * u(3,j,1,e) &
                         + dx(i,4) * u(4,j,1,e) &
-                        + dx(i,5) * u(5,j,1,e) 
+                        + dx(i,5) * u(5,j,1,e)
           end do
        end do
 
@@ -1268,7 +1268,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
                           + dy(j,2) * u(i,2,k,e) &
                           + dy(j,3) * u(i,3,k,e) &
                           + dy(j,4) * u(i,4,k,e) &
-                          + dy(j,5) * u(i,5,k,e) 
+                          + dy(j,5) * u(i,5,k,e)
              end do
           end do
        end do
@@ -1279,10 +1279,10 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
                        + dz(k,2) * u(i,1,2,e) &
                        + dz(k,3) * u(i,1,3,e) &
                        + dz(k,4) * u(i,1,4,e) &
-                       + dz(k,5) * u(i,1,5,e) 
+                       + dz(k,5) * u(i,1,5,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1291,7 +1291,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1309,8 +1309,8 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1322,7 +1322,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
              ur(i,j,1) = dx(i,1) * u(1,j,1,e) &
                         + dx(i,2) * u(2,j,1,e) &
                         + dx(i,3) * u(3,j,1,e) &
-                        + dx(i,4) * u(4,j,1,e) 
+                        + dx(i,4) * u(4,j,1,e)
           end do
        end do
 
@@ -1332,7 +1332,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
                 us(i,j,k) = dy(j,1) * u(i,1,k,e) &
                           + dy(j,2) * u(i,2,k,e) &
                           + dy(j,3) * u(i,3,k,e) &
-                          + dy(j,4) * u(i,4,k,e) 
+                          + dy(j,4) * u(i,4,k,e)
              end do
           end do
        end do
@@ -1342,10 +1342,10 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
              ut(i,1,k) = dz(k,1) * u(i,1,1,e) &
                        + dz(k,2) * u(i,1,2,e) &
                        + dz(k,3) * u(i,1,3,e) &
-                       + dz(k,4) * u(i,1,4,e) 
+                       + dz(k,4) * u(i,1,4,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1354,7 +1354,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1372,8 +1372,8 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1384,7 +1384,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
           do i = 1, lx
              ur(i,j,1) = dx(i,1) * u(1,j,1,e) &
                         + dx(i,2) * u(2,j,1,e) &
-                        + dx(i,3) * u(3,j,1,e) 
+                        + dx(i,3) * u(3,j,1,e)
           end do
        end do
 
@@ -1393,7 +1393,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
              do i = 1, lx
                 us(i,j,k) = dy(j,1) * u(i,1,k,e) &
                           + dy(j,2) * u(i,2,k,e) &
-                          + dy(j,3) * u(i,3,k,e) 
+                          + dy(j,3) * u(i,3,k,e)
              end do
           end do
        end do
@@ -1402,10 +1402,10 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
           do i = 1, lx*lx
              ut(i,1,k) = dz(k,1) * u(i,1,1,e) &
                        + dz(k,2) * u(i,1,2,e) &
-                       + dz(k,3) * u(i,1,3,e) 
+                       + dz(k,3) * u(i,1,3,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1414,7 +1414,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
@@ -1432,8 +1432,8 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx)
     real(kind=rp) :: us(lx,lx,lx)
     real(kind=rp) :: ut(lx,lx,lx)
@@ -1443,7 +1443,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
        do j = 1, lx * lx
           do i = 1, lx
              ur(i,j,1) = dx(i,1) * u(1,j,1,e) &
-                        + dx(i,2) * u(2,j,1,e) 
+                        + dx(i,2) * u(2,j,1,e)
           end do
        end do
 
@@ -1451,7 +1451,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
           do j = 1, lx
              do i = 1, lx
                 us(i,j,k) = dy(j,1) * u(i,1,k,e) &
-                          + dy(j,2) * u(i,2,k,e) 
+                          + dy(j,2) * u(i,2,k,e)
              end do
           end do
        end do
@@ -1459,10 +1459,10 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
        do k = 1, lx
           do i = 1, lx*lx
              ut(i,1,k) = dz(k,1) * u(i,1,1,e) &
-                       + dz(k,2) * u(i,1,2,e) 
+                       + dz(k,2) * u(i,1,2,e)
           end do
        end do
-    
+
        do i = 1, lx * lx * lx
           ux(i,1,1,e) = w3(i,1,1) &
                       * ( drdx(i,1,1,e) * ur(i,1,1) &
@@ -1471,7 +1471,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
           uy(i,1,1,e) = w3(i,1,1) &
                       * ( dsdy(i,1,1,e) * us(i,1,1) &
                         + drdy(i,1,1,e) * ur(i,1,1) &
-                        + dtdy(i,1,1,e) * ut(i,1,1) )          
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
           uz(i,1,1,e) = w3(i,1,1) &
                       * ( dtdz(i,1,1,e) * ut(i,1,1) &
                         + drdz(i,1,1,e) * ur(i,1,1) &
diff --git a/src/math/bcknd/cpu/opr_cpu.f90 b/src/math/bcknd/cpu/opr_cpu.f90
index 9cef11346c6..a9c2985515c 100644
--- a/src/math/bcknd/cpu/opr_cpu.f90
+++ b/src/math/bcknd/cpu/opr_cpu.f90
@@ -40,7 +40,6 @@ module opr_cpu
   use space, only : space_t
   use coefs, only : coef_t
   use math
-  use mesh, only : mesh_t
   use field, only : field_t
   use gather_scatter
   use mathops
@@ -48,8 +47,8 @@ module opr_cpu
   private
 
   public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, &
-       opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl
-  
+       opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2
+
 contains
 
   subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
@@ -62,46 +61,46 @@ subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
     associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)
       select case(coef%Xh%lx)
       case(14)
-         call cpu_dudxyz_lx14(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx14(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(13)
-         call cpu_dudxyz_lx13(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx13(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(12)
-         call cpu_dudxyz_lx12(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx12(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(11)
-         call cpu_dudxyz_lx11(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx11(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(10)
-         call cpu_dudxyz_lx10(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx10(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(9)
-         call cpu_dudxyz_lx9(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx9(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(8)
-         call cpu_dudxyz_lx8(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx8(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(7)
-         call cpu_dudxyz_lx7(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx7(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(6)
-         call cpu_dudxyz_lx6(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx6(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(5)
-         call cpu_dudxyz_lx5(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx5(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(4)
-         call cpu_dudxyz_lx4(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx4(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(3)
-         call cpu_dudxyz_lx3(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx3(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case(2)
-         call cpu_dudxyz_lx2(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx2(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv)
       case default
-         call cpu_dudxyz_lx(du, u, dr, ds, dt, & 
+         call cpu_dudxyz_lx(du, u, dr, ds, dt, &
               Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv, Xh%lx)
       end select
 
@@ -109,8 +108,8 @@ subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
 
   end subroutine opr_cpu_dudxyz
 
-  subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) 
-    type(coef_t), intent(in) :: coef  
+  subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
+    type(coef_t), intent(in) :: coef
     integer, intent(in) :: e_start, e_end
     real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: ux
     real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: uy
@@ -122,7 +121,7 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
          drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, &
          dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, &
          dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz)
-      
+
       select case(Xh%lx)
       case(18)
          call cpu_opgrad_lx18(ux, uy, uz, u, &
@@ -194,49 +193,49 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(8)
          call cpu_opgrad_lx8(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(7)
          call cpu_opgrad_lx7(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(6)
          call cpu_opgrad_lx6(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(5)
          call cpu_opgrad_lx5(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(4)
          call cpu_opgrad_lx4(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len)  
+              Xh%w3, e_len)
       case(3)
          call cpu_opgrad_lx3(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len) 
+              Xh%w3, e_len)
       case(2)
          call cpu_opgrad_lx2(ux, uy, uz, u, &
               Xh%dx, Xh%dy, Xh%dz, &
@@ -250,7 +249,7 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end)
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              Xh%w3, e_len, Xh%lx) 
+              Xh%w3, e_len, Xh%lx)
       end select
     end associate
 
@@ -359,25 +358,25 @@ subroutine opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end)
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              jacinv(1,1,1,e_start), e_len)         
+              jacinv(1,1,1,e_start), e_len)
       case(9)
          call cpu_conv1_lx9(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              jacinv(1,1,1,e_start), e_len)         
+              jacinv(1,1,1,e_start), e_len)
       case(8)
          call cpu_conv1_lx8(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              jacinv(1,1,1,e_start), e_len)         
+              jacinv(1,1,1,e_start), e_len)
       case(7)
          call cpu_conv1_lx7(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
               drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), &
               drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), &
-              jacinv(1,1,1,e_start), e_len)         
+              jacinv(1,1,1,e_start), e_len)
       case(6)
          call cpu_conv1_lx6(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, &
               drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), &
@@ -416,7 +415,7 @@ subroutine opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end)
               jacinv(1,1,1,e_start), e_len, Xh%lx)
       end select
     end associate
-    
+
   end subroutine opr_cpu_conv1
 
   subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
@@ -459,9 +458,9 @@ subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
     !!    BC dependent, Needs to change if cyclic
 
     call opcolv(w1%x, w2%x, w3%x, c_Xh%B, gdim, n)
-    call c_Xh%gs_h%op(w1, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w2, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w3, GS_OP_ADD) 
+    call c_Xh%gs_h%op(w1, GS_OP_ADD)
+    call c_Xh%gs_h%op(w2, GS_OP_ADD)
+    call c_Xh%gs_h%op(w3, GS_OP_ADD)
     call opcolv(w1%x, w2%x, w3%x, c_Xh%Binv, gdim, n)
 
   end subroutine opr_cpu_curl
@@ -491,11 +490,11 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
                    ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) &
                       +   v(i,j,k,e)*coef%dtdy(i,j,k,e) &
                       +   w(i,j,k,e)*coef%dtdz(i,j,k,e) ) * coef%jacinv(i,j,k,e)
- 
+
                    cflr = abs(dt*ur*Xh%dr_inv(i))
                    cfls = abs(dt*us*Xh%ds_inv(j))
                    cflt = abs(dt*ut*Xh%dt_inv(k))
- 
+
                    cflm = cflr + cfls + cflt
                    cfl  = max(cfl,cflm)
                 end do
@@ -513,7 +512,7 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
 
                 cflr = abs(dt*ur*Xh%dr_inv(i))
                 cfls = abs(dt*us*Xh%ds_inv(j))
-                
+
                 cflm = cflr + cfls
                 cfl  = max(cfl,cflm)
 
@@ -523,4 +522,79 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
     end if
   end function opr_cpu_cfl
 
+  subroutine opr_cpu_lambda2(lambda2, u, v, w, coef)
+    type(coef_t), intent(in) :: coef
+    type(field_t), intent(inout) :: lambda2
+    type(field_t), intent(in) :: u, v, w
+    real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
+    integer :: temp_indices(9), e, i, ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+
+    do e = 1, coef%msh%nelv
+       call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
+                           u%x(1,1,1,e),coef,e,e)
+       call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
+                           v%x(1,1,1,e),coef,e,e)
+       call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
+                           w%x(1,1,1,e),coef,e,e)
+
+       do i = 1, coef%Xh%lxyz
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2%x(i,1,1,e) = l2/(coef%B(i,1,1,e)**2)
+       end do
+    end do
+       
+  end subroutine opr_cpu_lambda2  
+
 end module opr_cpu
diff --git a/src/math/bcknd/cpu/tensor_cpu.f90 b/src/math/bcknd/cpu/tensor_cpu.f90
index 8ef02a12b14..df64c0108b6 100644
--- a/src/math/bcknd/cpu/tensor_cpu.f90
+++ b/src/math/bcknd/cpu/tensor_cpu.f90
@@ -5,7 +5,7 @@ module tensor_cpu
   private
 
   public :: tnsr2d_el_cpu, tnsr3d_el_cpu, tnsr3d_cpu, tnsr1_3d_cpu
- 
+
 contains
 
   subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
@@ -16,9 +16,9 @@ subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
 
     call mxm(A, nv, u, nu, work, nu)
     call mxm(work, nv, Bt, nu, v, nv)
-    
+
   end subroutine tnsr2d_el_cpu
-  
+
   subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
     integer, intent(in) :: nv, nu
     real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
@@ -58,7 +58,7 @@ subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
     else
        call tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
     end if
-    
+
   end subroutine tnsr3d_el_cpu
 
   subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
@@ -70,20 +70,20 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
     integer :: i, j, k, l, nunu, nvnu, nvnv
     integer :: ii, jj
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
 
     do j = 1, nunu
        do i = 1, nv
           ii = i + nv * (j - 1)
-          tmp = 0.0_rp          
+          tmp = 0.0_rp
           do k = 1, nu
              tmp = tmp + A(i,k) * u(k + nu * (j - 1))
           end do
           work(ii) = tmp
        end do
     end do
-    
+
     do i = 1, nu
        do j = 1, nv
           do l = 1, nv
@@ -97,7 +97,7 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
           end do
        end do
     end do
-     
+
     do j = 1, nv
        do i = 1, nvnv
           jj = i + nvnv * (j - 1)
@@ -109,19 +109,19 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct)
           v(jj) = tmp
        end do
     end do
-    
+
   end subroutine tnsr3d_el_nvnu_cpu
 
   subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
     integer, intent(in) :: n
     real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n)
     real(kind=rp), intent(inout) :: A(n,n), Bt(n,n), Ct(n,n)
-    real(kind=rp) :: work(n**3), work2(n**3), tmp    
+    real(kind=rp) :: work(n**3), work2(n**3), tmp
     integer :: i, j, l, k
     integer :: ii, jj, nn
 
     nn = n**2
-    
+
     do j = 1, nn
        do i = 1, n
           ii = i + n * (j - 1)
@@ -132,7 +132,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
           work(ii) =  tmp
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -145,7 +145,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -156,7 +156,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n)
           v(jj) = tmp
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n_cpu
 
   subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
@@ -184,10 +184,10 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
                    + A(i,11) * u(11 + n * (j - 1)) &
                    + A(i,12) * u(12 + n * (j - 1)) &
                    + A(i,13) * u(13 + n * (j - 1)) &
-                   + A(i,14) * u(14 + n * (j - 1))                    
+                   + A(i,14) * u(14 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -205,11 +205,11 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) &
                        + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) &
                        + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j) &
-                       + work(l + n * (14 - 1) + nn * (i - 1)) * Bt(14,j) 
+                       + work(l + n * (14 - 1) + nn * (i - 1)) * Bt(14,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -226,10 +226,10 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (11 - 1)) * Ct(11, j) &
                 + work2(i + nn * (12 - 1)) * Ct(12, j) &
                 + work2(i + nn * (13 - 1)) * Ct(13, j) &
-                + work2(i + nn * (14 - 1)) * Ct(14, j) 
+                + work2(i + nn * (14 - 1)) * Ct(14, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n14_cpu
 
   subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
@@ -256,10 +256,10 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
                    + A(i,10) * u(10 + n * (j - 1)) &
                    + A(i,11) * u(11 + n * (j - 1)) &
                    + A(i,12) * u(12 + n * (j - 1)) &
-                   + A(i,13) * u(13 + n * (j - 1)) 
+                   + A(i,13) * u(13 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -276,11 +276,11 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) &
                        + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) &
                        + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) &
-                       + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j) 
+                       + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -296,10 +296,10 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (10 - 1)) * Ct(10, j) &
                 + work2(i + nn * (11 - 1)) * Ct(11, j) &
                 + work2(i + nn * (12 - 1)) * Ct(12, j) &
-                + work2(i + nn * (13 - 1)) * Ct(13, j) 
+                + work2(i + nn * (13 - 1)) * Ct(13, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n13_cpu
 
   subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
@@ -325,10 +325,10 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
                    + A(i,9) * u(9 + n * (j - 1)) &
                    + A(i,10) * u(10 + n * (j - 1)) &
                    + A(i,11) * u(11 + n * (j - 1)) &
-                   + A(i,12) * u(12 + n * (j - 1)) 
+                   + A(i,12) * u(12 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -344,11 +344,11 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) &
                        + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) &
                        + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) &
-                       + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) 
+                       + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -363,10 +363,10 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (9 - 1)) * Ct(9, j) &
                 + work2(i + nn * (10 - 1)) * Ct(10, j) &
                 + work2(i + nn * (11 - 1)) * Ct(11, j) &
-                + work2(i + nn * (12 - 1)) * Ct(12, j) 
+                + work2(i + nn * (12 - 1)) * Ct(12, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n12_cpu
 
   subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
@@ -391,10 +391,10 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
                    + A(i,8) * u(8 + n * (j - 1)) &
                    + A(i,9) * u(9 + n * (j - 1)) &
                    + A(i,10) * u(10 + n * (j - 1)) &
-                   + A(i,11) * u(11 + n * (j - 1)) 
+                   + A(i,11) * u(11 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -409,11 +409,11 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) &
                        + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) &
                        + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) &
-                       + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) 
+                       + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -427,10 +427,10 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (8 - 1)) * Ct(8, j) &
                 + work2(i + nn * (9 - 1)) * Ct(9, j) &
                 + work2(i + nn * (10 - 1)) * Ct(10, j) &
-                + work2(i + nn * (11 - 1)) * Ct(11, j) 
+                + work2(i + nn * (11 - 1)) * Ct(11, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n11_cpu
 
   subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
@@ -454,10 +454,10 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
                    + A(i,7) * u(7 + n * (j - 1)) &
                    + A(i,8) * u(8 + n * (j - 1)) &
                    + A(i,9) * u(9 + n * (j - 1)) &
-                   + A(i,10) * u(10 + n * (j - 1)) 
+                   + A(i,10) * u(10 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -471,11 +471,11 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) &
                        + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) &
                        + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) &
-                       + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) 
+                       + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -488,10 +488,10 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (7 - 1)) * Ct(7, j) &
                 + work2(i + nn * (8 - 1)) * Ct(8, j) &
                 + work2(i + nn * (9 - 1)) * Ct(9, j) &
-                + work2(i + nn * (10 - 1)) * Ct(10, j) 
+                + work2(i + nn * (10 - 1)) * Ct(10, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n10_cpu
 
   subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
@@ -514,10 +514,10 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
                    + A(i,6) * u(6 + n * (j - 1)) &
                    + A(i,7) * u(7 + n * (j - 1)) &
                    + A(i,8) * u(8 + n * (j - 1)) &
-                   + A(i,9) * u(9 + n * (j - 1)) 
+                   + A(i,9) * u(9 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -530,11 +530,11 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) &
                        + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) &
                        + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) &
-                       + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) 
+                       + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -546,10 +546,10 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (6 - 1)) * Ct(6, j) &
                 + work2(i + nn * (7 - 1)) * Ct(7, j) &
                 + work2(i + nn * (8 - 1)) * Ct(8, j) &
-                + work2(i + nn * (9 - 1)) * Ct(9, j) 
+                + work2(i + nn * (9 - 1)) * Ct(9, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n9_cpu
 
   subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
@@ -571,10 +571,10 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
                    + A(i,5) * u(5 + n * (j - 1)) &
                    + A(i,6) * u(6 + n * (j - 1)) &
                    + A(i,7) * u(7 + n * (j - 1)) &
-                   + A(i,8) * u(8 + n * (j - 1)) 
+                   + A(i,8) * u(8 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -586,11 +586,11 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) &
                        + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) &
                        + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) &
-                       + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) 
+                       + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -601,10 +601,10 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (5 - 1)) * Ct(5, j) &
                 + work2(i + nn * (6 - 1)) * Ct(6, j) &
                 + work2(i + nn * (7 - 1)) * Ct(7, j) &
-                + work2(i + nn * (8 - 1)) * Ct(8, j) 
+                + work2(i + nn * (8 - 1)) * Ct(8, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n8_cpu
 
   subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
@@ -625,10 +625,10 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
                    + A(i,4) * u(4 + n * (j - 1)) &
                    + A(i,5) * u(5 + n * (j - 1)) &
                    + A(i,6) * u(6 + n * (j - 1)) &
-                   + A(i,7) * u(7 + n * (j - 1)) 
+                   + A(i,7) * u(7 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -639,11 +639,11 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) &
                        + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) &
                        + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) &
-                       + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) 
+                       + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -653,10 +653,10 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (4 - 1)) * Ct(4, j) &
                 + work2(i + nn * (5 - 1)) * Ct(5, j) &
                 + work2(i + nn * (6 - 1)) * Ct(6, j) &
-                + work2(i + nn * (7 - 1)) * Ct(7, j) 
+                + work2(i + nn * (7 - 1)) * Ct(7, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n7_cpu
 
   subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
@@ -676,10 +676,10 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
                    + A(i,3) * u(3 + n * (j - 1)) &
                    + A(i,4) * u(4 + n * (j - 1)) &
                    + A(i,5) * u(5 + n * (j - 1)) &
-                   + A(i,6) * u(6 + n * (j - 1)) 
+                   + A(i,6) * u(6 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -689,11 +689,11 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
                        + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) &
                        + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) &
                        + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) &
-                       + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) 
+                       + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -702,10 +702,10 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (3 - 1)) * Ct(3, j) &
                 + work2(i + nn * (4 - 1)) * Ct(4, j) &
                 + work2(i + nn * (5 - 1)) * Ct(5, j) &
-                + work2(i + nn * (6 - 1)) * Ct(6, j) 
+                + work2(i + nn * (6 - 1)) * Ct(6, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n6_cpu
 
   subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
@@ -724,10 +724,10 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
                    + A(i,2) * u(2 + n * (j - 1)) &
                    + A(i,3) * u(3 + n * (j - 1)) &
                    + A(i,4) * u(4 + n * (j - 1)) &
-                   + A(i,5) * u(5 + n * (j - 1)) 
+                   + A(i,5) * u(5 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -740,7 +740,7 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
@@ -748,10 +748,10 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct)
                 + work2(i + nn * (2 - 1)) * Ct(2, j) &
                 + work2(i + nn * (3 - 1)) * Ct(3, j) &
                 + work2(i + nn * (4 - 1)) * Ct(4, j) &
-                + work2(i + nn * (5 - 1)) * Ct(5, j) 
+                + work2(i + nn * (5 - 1)) * Ct(5, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n5_cpu
 
   subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
@@ -769,10 +769,10 @@ subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
           work(ii) = A(i,1) * u(1 + n * (j - 1)) &
                    + A(i,2) * u(2 + n * (j - 1)) &
                    + A(i,3) * u(3 + n * (j - 1)) &
-                   + A(i,4) * u(4 + n * (j - 1)) 
+                   + A(i,4) * u(4 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -780,21 +780,21 @@ subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct)
              work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) &
                        + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) &
                        + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) &
-                       + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) 
+                       + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
           v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) &
                 + work2(i + nn * (2 - 1)) * Ct(2, j) &
                 + work2(i + nn * (3 - 1)) * Ct(3, j) &
-                + work2(i + nn * (4 - 1)) * Ct(4, j) 
+                + work2(i + nn * (4 - 1)) * Ct(4, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n4_cpu
 
   subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
@@ -811,30 +811,30 @@ subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct)
           ii = i + n * (j - 1)
           work(ii) = A(i,1) * u(1 + n * (j - 1)) &
                    + A(i,2) * u(2 + n * (j - 1)) &
-                   + A(i,3) * u(3 + n * (j - 1)) 
+                   + A(i,3) * u(3 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
              ii = l + n * (j - 1) + nn * (i - 1)
              work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) &
                        + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) &
-                       + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) 
+                       + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
           v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) &
                 + work2(i + nn * (2 - 1)) * Ct(2, j) &
-                + work2(i + nn * (3 - 1)) * Ct(3, j) 
+                + work2(i + nn * (3 - 1)) * Ct(3, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n3_cpu
 
   subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
@@ -850,30 +850,30 @@ subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct)
        do i = 1, n
           ii = i + n * (j - 1)
           work(ii) = A(i,1) * u(1 + n * (j - 1)) &
-                   + A(i,2) * u(2 + n * (j - 1)) 
+                   + A(i,2) * u(2 + n * (j - 1))
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
              ii = l + n * (j - 1) + nn * (i - 1)
              work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) &
-                       + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) 
+                       + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           jj = i + nn * (j - 1)
           v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) &
-                + work2(i + nn * (2 - 1)) * Ct(2, j) 
+                + work2(i + nn * (2 - 1)) * Ct(2, j)
        end do
     end do
-    
+
   end subroutine tnsr3d_el_n2_cpu
-  
+
   subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
     integer, intent(in) :: nv, nu, nelv
     real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
@@ -888,7 +888,7 @@ subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
     end if
 
   end subroutine tnsr3d_cpu
-  
+
   subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
     integer, intent(in) :: nv, nu, nelv
     real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
@@ -898,9 +898,9 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
     integer :: nunu, nvnu, nvnv
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
-    
+
     do ie = 1,nelv
        do j = 1, nunu
           do i = 1, nv
@@ -912,7 +912,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
              work(ii) = tmp
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -926,7 +926,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
              end do
           end do
        end do
-       
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1)
@@ -939,7 +939,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
   end subroutine tnsr3d_nvnu_cpu
 
   subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
@@ -953,16 +953,16 @@ subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
     real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv)
     real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp
     integer :: ie, i, j, k, l, ii, jj
-    
+
     do ie = 1,nelv
        do j = 1, nunu
           do i = 1, nv
              ii = i + nv * (j - 1)
              work(ii) = A(i,1) * u(1 + nu * (j - 1), ie) &
-                      + A(i,2) * u(2 + nu * (j - 1), ie)             
+                      + A(i,2) * u(2 + nu * (j - 1), ie)
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -976,16 +976,16 @@ subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv)
              end do
           end do
        end do
-       
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1)
              v(jj, ie) = work2(i + nvnv * (1 - 1)) * Ct(1, j) &
-                       + work2(i + nvnv * (2 - 1)) * Ct(2, j) 
+                       + work2(i + nvnv * (2 - 1)) * Ct(2, j)
           end do
        end do
     end do
-    
+
   end subroutine tnsr3d_nu2nv4_cpu
 
   subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
@@ -1000,7 +1000,7 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
 
     nvnu = nv * nu
     nvnv = nv * nv
-    
+
     do ie = 1,nelv
        do j = 1, nunu
           do i = 1, nv
@@ -1008,10 +1008,10 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
              work(ii) = A(i,1) * u(1 + nu * (j - 1), ie) &
                       + A(i,2) * u(2 + nu * (j - 1), ie) &
                       + A(i,3) * u(3 + nu * (j - 1), ie) &
-                      + A(i,4) * u(4 + nu * (j - 1), ie)             
+                      + A(i,4) * u(4 + nu * (j - 1), ie)
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -1025,14 +1025,14 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv)
              end do
           end do
        end do
-       
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1)
              v(jj, ie) = work2(i + nvnv * (1 - 1)) * Ct(1, j) &
                        + work2(i + nvnv * (2 - 1)) * Ct(2, j) &
                        + work2(i + nvnv * (3 - 1)) * Ct(3, j) &
-                       + work2(i + nvnv * (4 - 1)) * Ct(4, j) 
+                       + work2(i + nvnv * (4 - 1)) * Ct(4, j)
           end do
        end do
     end do
@@ -1049,10 +1049,10 @@ subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv)
     else
        call tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
     end if
-    
+
   end subroutine tnsr1_3d_cpu
 
-    subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
+  subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
     integer, intent(in) :: nv, nu, nelv
     real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
     real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv)
@@ -1063,26 +1063,26 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
     real(kind=rp) :: tmp
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
-    
+
     e0 = 1
     es = 1
     ee = nelv
-       
+
     if (nv.gt.nu) then
        e0 = nelv
        es = -1
        ee = 1
     endif
-    
+
     nu3 = nu**3
     nv3 = nv**3
 
     do e = e0,ee,es
        iu = (e-1)*nu3
        iv = (e-1)*nv3
-       
+
        do j = 1, nunu
           do i = 1, nv
              ii = i + nv * (j - 1)
@@ -1094,7 +1094,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
              work(ii) = tmp
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -1108,7 +1108,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
              end do
           end do
        end do
-       
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1) + iv
@@ -1121,7 +1121,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
   end subroutine tnsr1_3d_nvnu_cpu
 
   subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
@@ -1143,17 +1143,17 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
     do e = 1,nelv
        iu = (e-1)*nununu
        iv = (e-1)*nvnvnv
-          
+
        do j = 1, nunu
           do i = 1, nv
              ii = i + nv * (j - 1)
              work(ii) = A(i,1) * v(1 + nu * (j - 1) + iu) &
                       + A(i,2) * v(2 + nu * (j - 1) + iu) &
                       + A(i,3) * v(3 + nu * (j - 1) + iu) &
-                      + A(i,4) * v(4 + nu * (j - 1) + iu) 
+                      + A(i,4) * v(4 + nu * (j - 1) + iu)
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -1167,7 +1167,7 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
              end do
           end do
        end do
-          
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1) + iv
@@ -1175,11 +1175,11 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv)
                    + work2(i + nvnv * (2 - 1)) * Ct(2, j) &
                    + work2(i + nvnv * (3 - 1)) * Ct(3, j) &
                    + work2(i + nvnv * (4 - 1)) * Ct(4, j)
-                  
+
           end do
        end do
     end do
-    
+
   end subroutine tnsr1_3d_nu4nv2_cpu
-  
+
 end module tensor_cpu
diff --git a/src/math/bcknd/device/ax_helm_device.F90 b/src/math/bcknd/device/ax_helm_device.F90
index f0e76e75653..4fa9dbce984 100644
--- a/src/math/bcknd/device/ax_helm_device.F90
+++ b/src/math/bcknd/device/ax_helm_device.F90
@@ -35,7 +35,6 @@ module ax_helm_device
   use num_types, only : rp
   use coefs, only : coef_t
   use space, only : space_t
-  use field, only : field_t
   use mesh, only : mesh_t
   use device_math, only : device_addcol4
   use device, only : device_get_ptr
@@ -43,14 +42,14 @@ module ax_helm_device
   use, intrinsic :: iso_c_binding, only : c_ptr, c_int
   implicit none
   private
-  
+
   type, public, extends(ax_t) :: ax_helm_device_t
    contains
      procedure, nopass :: compute => ax_helm_device_compute
   end type ax_helm_device_t
 
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine hip_ax_helm(w_d, u_d, &
           dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, &
           h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
@@ -64,7 +63,7 @@ subroutine hip_ax_helm(w_d, u_d, &
      end subroutine hip_ax_helm
   end interface
 #elif HAVE_CUDA
-    interface
+  interface
      subroutine cuda_ax_helm(w_d, u_d, &
           dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,&
           h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) &
@@ -129,9 +128,9 @@ subroutine ax_helm_device_compute(w, u, coef, msh, Xh)
     if (coef%ifh2) then
        call device_addcol4(w_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size())
     end if
-    
+
   end subroutine ax_helm_device_compute
-  
+
 end module ax_helm_device
 
 
diff --git a/src/math/bcknd/device/cuda/ax_helm.cu b/src/math/bcknd/device/cuda/ax_helm.cu
index 4872ddcef9b..58f01824954 100644
--- a/src/math/bcknd/device/cuda/ax_helm.cu
+++ b/src/math/bcknd/device/cuda/ax_helm.cu
@@ -137,16 +137,21 @@ extern "C" {
       CASE_KSTEP(LX);                                                           \
       break
 
+#define CASE_LARGE_PADDED(LX)                                                   \
+    case LX:                                                                    \
+      CASE_KSTEP_PADDED(LX);                                                    \
+      break
+
 
     if ((*lx) < 12) {      
       switch(*lx) {
         CASE(2);
         CASE(3);
-        CASE(4);
+        CASE_PADDED(4);
         CASE(5);
         CASE(6);
         CASE(7);
-        CASE(8);
+        CASE_PADDED(8);
         CASE(9);
         CASE(10);
         CASE(11);
@@ -163,7 +168,7 @@ extern "C" {
         CASE_LARGE(13);
         CASE_LARGE(14);
         CASE_LARGE(15);
-        CASE_LARGE(16);
+        CASE_LARGE_PADDED(16);
       default:
         {
           fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx);
diff --git a/src/math/bcknd/device/cuda/ax_helm_kernel.h b/src/math/bcknd/device/cuda/ax_helm_kernel.h
index 9b0ab0fe557..57ece6bf669 100644
--- a/src/math/bcknd/device/cuda/ax_helm_kernel.h
+++ b/src/math/bcknd/device/cuda/ax_helm_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_AX_HELM_KERNEL_H__
+#define __MATH_AX_HELM_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -344,3 +346,5 @@ __global__ void ax_helm_kernel_kstep_padded(T * __restrict__ w,
     w[ij + k*LX*LX + ele] = rw[k]; 
   }
 }
+
+#endif // __MATH_AX_HELM_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/cdtp_kernel.h b/src/math/bcknd/device/cuda/cdtp_kernel.h
index a8d41681256..8f79d5da6e5 100644
--- a/src/math/bcknd/device/cuda/cdtp_kernel.h
+++ b/src/math/bcknd/device/cuda/cdtp_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CDTP_KERNEL_H__
+#define __MATH_CDTP_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -124,7 +126,6 @@ __global__ void __launch_bounds__(LX*LX,3)
   T rtar[LX];
   T rtas[LX];
   T rtat[LX];
-  T rjac[LX];
 
   const int e = blockIdx.x;
   const int j = threadIdx.y;
@@ -174,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3)
   }
 }
 
+
+#endif // __MATH_CDTP_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/cfl_kernel.h b/src/math/bcknd/device/cuda/cfl_kernel.h
index 436e9ed0365..5daf2416b45 100644
--- a/src/math/bcknd/device/cuda/cfl_kernel.h
+++ b/src/math/bcknd/device/cuda/cfl_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CFL_KERNEL_H__
+#define __MATH_CFL_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -179,3 +181,5 @@ __global__ void cfl_kernel(const T dt,
     cfl_h[blockIdx.x] = cfl_tmp;
 }
 
+
+#endif // __MATH_CFL_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/conv1_kernel.h b/src/math/bcknd/device/cuda/conv1_kernel.h
index c094ab98fe1..d77c1cde6aa 100644
--- a/src/math/bcknd/device/cuda/conv1_kernel.h
+++ b/src/math/bcknd/device/cuda/conv1_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CONV1_KERNEL_H__
+#define __MATH_CONV1_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -210,3 +212,5 @@ __global__ void __launch_bounds__(LX*LX,3)
   }
 }
 
+
+#endif // __MATH_CONV1_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/dudxyz_kernel.h b/src/math/bcknd/device/cuda/dudxyz_kernel.h
index a14123f5f6c..b42d111a217 100644
--- a/src/math/bcknd/device/cuda/dudxyz_kernel.h
+++ b/src/math/bcknd/device/cuda/dudxyz_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_DUDXYZ_KERNEL_H__
+#define __MATH_DUDXYZ_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -173,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3)
     __syncthreads();
   }
 }
+
+#endif // __MATH_DUDXYZ_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/fdm_kernel.h b/src/math/bcknd/device/cuda/fdm_kernel.h
index 5c5d82675a5..bcdcbdc963c 100644
--- a/src/math/bcknd/device/cuda/fdm_kernel.h
+++ b/src/math/bcknd/device/cuda/fdm_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_FDM_KERNEL_H__
+#define __MATH_FDM_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -135,3 +137,5 @@ __global__ void fdm_do_fast_kernel(T  * __restrict__ e,
 }
 
 
+
+#endif // __MATH_FDM_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/lambda2_kernel.h b/src/math/bcknd/device/cuda/lambda2_kernel.h
index b6598316e38..327f947fca2 100644
--- a/src/math/bcknd/device/cuda/lambda2_kernel.h
+++ b/src/math/bcknd/device/cuda/lambda2_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_LAMBDA2_KERNEL_H__
+#define __MATH_LAMBDA2_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -358,3 +360,5 @@ lambda2_kernel_kstep(T * __restrict__ lambda2,
   }
 }
 
+
+#endif // __MATH_LAMBDA2_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/math.cu b/src/math/bcknd/device/cuda/math.cu
index 67ae4c8159c..f71ef3869c8 100644
--- a/src/math/bcknd/device/cuda/math.cu
+++ b/src/math/bcknd/device/cuda/math.cu
@@ -52,6 +52,21 @@ extern "C" {
                                (cudaStream_t) glb_cmd_queue));
   }
 
+  /** Fortran wrapper for masked copy
+   * Copy a vector \f$ a(mask) = b(mask) \f$
+   */
+  void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m) {
+
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
+
+    masked_copy_kernel<real><<<nblcks, nthrds, 0,
+      (cudaStream_t) glb_cmd_queue>>>((real *) a, (real*) b,(int*) mask, *n, *m);
+    CUDA_CHECK(cudaGetLastError());
+
+  }
+
+
   /** Fortran wrapper for rzero
    * Zero a real vector
    */
@@ -371,6 +386,40 @@ extern "C" {
   real * bufred = NULL;
   real * bufred_d = NULL;
 
+  /**
+   * Fortran wrapper vlsc3
+   * Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$
+   */
+  real cuda_vlsc3(void *u, void *v, void *w, int *n) {
+        
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const int nb = ((*n) + 1024 - 1)/ 1024;
+    const cudaStream_t stream = (cudaStream_t) glb_cmd_queue;      
+    
+    if ( nb > red_s){
+      red_s = nb;
+      if (bufred != NULL) {
+        CUDA_CHECK(cudaFreeHost(bufred));
+        CUDA_CHECK(cudaFree(bufred_d));        
+      }
+      CUDA_CHECK(cudaMallocHost(&bufred,nb*sizeof(real)));
+      CUDA_CHECK(cudaMalloc(&bufred_d, nb*sizeof(real)));
+    }
+     
+    glsc3_kernel<real><<<nblcks, nthrds, 0, stream>>>
+      ((real *) u, (real *) v, (real *) w, bufred_d, *n);
+    CUDA_CHECK(cudaGetLastError());
+    reduce_kernel<real><<<1, 1024, 0, stream>>> (bufred_d, nb);
+    CUDA_CHECK(cudaGetLastError());
+
+    CUDA_CHECK(cudaMemcpyAsync(bufred, bufred_d, sizeof(real),
+                               cudaMemcpyDeviceToHost, stream));
+    cudaStreamSynchronize(stream);
+
+    return bufred[0];
+  }
+
   /**
    * Fortran wrapper glsc3
    * Weighted inner product \f$ a^T b c \f$
diff --git a/src/math/bcknd/device/cuda/math_kernel.h b/src/math/bcknd/device/cuda/math_kernel.h
index 9f355d82bbf..f1d14405016 100644
--- a/src/math/bcknd/device/cuda/math_kernel.h
+++ b/src/math/bcknd/device/cuda/math_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_MATH_KERNEL_H__
+#define __MATH_MATH_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -48,6 +50,25 @@ __global__ void cmult_kernel(T * __restrict__ a,
   }
 }
 
+/**
+ * Device kernel for masked copy
+ */
+template< typename T >
+__global__ void masked_copy_kernel(T * __restrict__ a,
+                                   T * __restrict__ b,
+                                   int * __restrict__ mask,                    
+                                   const int n,
+                                   const int m) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < m; i += str) {
+    a[mask[i+1]-1] = b[mask[i+1]-1];
+  }
+}
+
+
 /**
  * Device kernel for cmult2
  */
@@ -424,7 +445,6 @@ __global__ void reduce_kernel(T * bufred, const int n) {
     bufred[blockIdx.x] = sum;
 }
 
-
 /**
  * Reduction kernel for glsc3
  */
@@ -606,3 +626,5 @@ __global__ void glsum_kernel(const T * a,
     buf_h[blockIdx.x] = sum;
   
 }
+
+#endif // __MATH_MATH_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/mathops_kernel.h b/src/math/bcknd/device/cuda/mathops_kernel.h
index d691e3613a7..0341aa6f63d 100644
--- a/src/math/bcknd/device/cuda/mathops_kernel.h
+++ b/src/math/bcknd/device/cuda/mathops_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_MATHOPS_KERNEL_H__
+#define __MATH_MATHOPS_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -192,3 +194,5 @@ __global__ void opadd2col_kernel(T * __restrict__ a1,
 
 }
 
+
+#endif // __MATH_MATHOPS_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/opgrad_kernel.h b/src/math/bcknd/device/cuda/opgrad_kernel.h
index 1667a4ed00b..1eda4cfd4de 100644
--- a/src/math/bcknd/device/cuda/opgrad_kernel.h
+++ b/src/math/bcknd/device/cuda/opgrad_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_OPGRAD_KERNEL_H__
+#define __MATH_OPGRAD_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -197,3 +199,5 @@ opgrad_kernel_kstep(T * __restrict__ ux,
   }
 }
 
+
+#endif // __MATH_OPGRAD_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/schwarz_kernel.h b/src/math/bcknd/device/cuda/schwarz_kernel.h
index e54d868d44f..e3a25ccdbb6 100644
--- a/src/math/bcknd/device/cuda/schwarz_kernel.h
+++ b/src/math/bcknd/device/cuda/schwarz_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_SCHWARZ_KERNEL_H__
+#define __MATH_SCHWARZ_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -126,3 +128,5 @@ __global__ void schwarz_toreg3d_kernel(T * __restrict__ b,
     b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2];
   }
 }
+
+#endif // __MATH_SCHWARZ_KERNEL_H__
diff --git a/src/math/bcknd/device/cuda/tensor_kernel.h b/src/math/bcknd/device/cuda/tensor_kernel.h
index 2ae5b80f08f..4e7b58b49b4 100644
--- a/src/math/bcknd/device/cuda/tensor_kernel.h
+++ b/src/math/bcknd/device/cuda/tensor_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_TENSOR_KERNEL_H__
+#define __MATH_TENSOR_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -206,3 +208,5 @@ __global__ void tnsr3d_kernel_large(T  * __restrict__  v,
 }
 
 
+
+#endif // __MATH_TENSOR_KERNEL_H__
diff --git a/src/math/bcknd/device/device_math.F90 b/src/math/bcknd/device/device_math.F90
index ae9b9605a2a..2c034602078 100644
--- a/src/math/bcknd/device/device_math.F90
+++ b/src/math/bcknd/device/device_math.F90
@@ -33,7 +33,7 @@
 module device_math
   use comm
   use utils, only : neko_error
-  use num_types, only : rp, c_rp    
+  use num_types, only : rp, c_rp
   use, intrinsic :: iso_c_binding
   implicit none
   private
@@ -48,6 +48,15 @@ subroutine hip_copy(a_d, b_d, n) &
      end subroutine hip_copy
   end interface
 
+  interface
+     subroutine hip_masked_copy(a_d, b_d, mask_d, n, m) &
+          bind(c, name='hip_masked_copy')
+       use, intrinsic :: iso_c_binding
+       type(c_ptr), value :: a_d, b_d, mask_d
+       integer(c_int) :: n, m
+     end subroutine hip_masked_copy
+  end interface
+  
   interface
      subroutine hip_cmult(a_d, c, n) &
           bind(c, name='hip_cmult')
@@ -91,7 +100,7 @@ subroutine hip_cfill(a_d, c, n) &
        integer(c_int) :: n
      end subroutine hip_cfill
   end interface
-  
+
   interface
      subroutine hip_rzero(a_d, n) &
           bind(c, name='hip_rzero')
@@ -100,23 +109,23 @@ subroutine hip_rzero(a_d, n) &
        integer(c_int) :: n
      end subroutine hip_rzero
   end interface
-  
+
   interface
      subroutine hip_add2(a_d, b_d, n) &
           bind(c, name='hip_add2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d
        integer(c_int) :: n
      end subroutine hip_add2
   end interface
-  
+
   interface
      subroutine hip_add2s1(a_d, b_d, c1, n) &
           bind(c, name='hip_add2s1')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d
        real(c_rp) :: c1
@@ -135,7 +144,7 @@ subroutine hip_add2s2(a_d, b_d, c1, n) &
        integer(c_int) :: n
      end subroutine hip_add2s2
   end interface
-  
+
   interface
      subroutine hip_add2s2_many(y_d,x_d_d,a_d,j,n) &
           bind(c, name='hip_add2s2_many')
@@ -163,7 +172,7 @@ end subroutine hip_addsqr2s2
      subroutine hip_add3s2(a_d, b_d, c_d, c1, c2, n) &
           bind(c, name='hip_add3s2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d, c_d
        real(c_rp) :: c1, c2
@@ -190,7 +199,7 @@ subroutine hip_invcol2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine hip_invcol2
   end interface
-  
+
   interface
      subroutine hip_col2(a_d, b_d, n) &
           bind(c, name='hip_col2')
@@ -200,7 +209,7 @@ subroutine hip_col2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine hip_col2
   end interface
-  
+
   interface
      subroutine hip_col3(a_d, b_d, c_d, n) &
           bind(c, name='hip_col3')
@@ -230,7 +239,7 @@ subroutine hip_sub2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine hip_sub2
   end interface
-  
+
   interface
      subroutine hip_sub3(a_d, b_d, c_d, n) &
           bind(c, name='hip_sub3')
@@ -270,7 +279,18 @@ subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
        integer(c_int) :: n
      end subroutine hip_vdot3
   end interface
-    
+
+  interface
+     real(c_rp) function hip_vlsc3(u_d, v_d, w_d, n) &
+          bind(c, name='hip_vlsc3')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: u_d, v_d, w_d
+       integer(c_int) :: n
+     end function hip_vlsc3
+  end interface
+
   interface
      real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) &
           bind(c, name='hip_glsc3')
@@ -281,7 +301,7 @@ real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) &
        integer(c_int) :: n
      end function hip_glsc3
   end interface
-  
+
   interface
      subroutine hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n) &
           bind(c, name='hip_glsc3_many')
@@ -324,7 +344,14 @@ subroutine cuda_copy(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine cuda_copy
   end interface
-
+  interface
+     subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) &
+          bind(c, name='cuda_masked_copy')
+       use, intrinsic :: iso_c_binding
+       type(c_ptr), value :: a_d, b_d, mask_d
+       integer(c_int) :: n, m
+     end subroutine cuda_masked_copy
+  end interface
   interface
      subroutine cuda_cmult(a_d, c, n) &
           bind(c, name='cuda_cmult')
@@ -378,7 +405,7 @@ subroutine cuda_rzero(a_d, n) &
        integer(c_int) :: n
      end subroutine cuda_rzero
   end interface
-  
+
   interface
      subroutine cuda_add2(a_d, b_d, n) &
           bind(c, name='cuda_add2')
@@ -389,7 +416,7 @@ subroutine cuda_add2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine cuda_add2
   end interface
-  
+
   interface
      subroutine cuda_add2s1(a_d, b_d, c1, n) &
           bind(c, name='cuda_add2s1')
@@ -430,7 +457,7 @@ end subroutine cuda_addsqr2s2
      subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) &
           bind(c, name='cuda_add3s2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d, c_d
        real(c_rp) :: c1, c2
@@ -451,13 +478,13 @@ end subroutine cuda_invcol1
   interface
      subroutine cuda_invcol2(a_d, b_d, n) &
           bind(c, name='cuda_invcol2')
-       use, intrinsic :: iso_c_binding       
+       use, intrinsic :: iso_c_binding
        implicit none
        type(c_ptr), value :: a_d, b_d
        integer(c_int) :: n
      end subroutine cuda_invcol2
   end interface
-  
+
   interface
      subroutine cuda_col2(a_d, b_d, n) &
           bind(c, name='cuda_col2')
@@ -467,7 +494,7 @@ subroutine cuda_col2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine cuda_col2
   end interface
-  
+
   interface
      subroutine cuda_col3(a_d, b_d, c_d, n) &
           bind(c, name='cuda_col3')
@@ -497,7 +524,7 @@ subroutine cuda_sub2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine cuda_sub2
   end interface
-  
+
   interface
      subroutine cuda_sub3(a_d, b_d, c_d, n) &
           bind(c, name='cuda_sub3')
@@ -537,7 +564,18 @@ subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) &
        integer(c_int) :: n
      end subroutine cuda_vdot3
   end interface
-  
+
+  interface
+     real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) &
+          bind(c, name='cuda_vlsc3')
+       use, intrinsic :: iso_c_binding
+       import c_rp
+       implicit none
+       type(c_ptr), value :: u_d, v_d, w_d
+       integer(c_int) :: n
+     end function cuda_vlsc3
+  end interface
+
   interface
      subroutine cuda_add2s2_many(y_d,x_d_d,a_d,j,n) &
           bind(c, name='cuda_add2s2_many')
@@ -593,7 +631,7 @@ real(c_rp) function cuda_glsum(a_d, n) &
      end function cuda_glsum
   end interface
 #elif HAVE_OPENCL
-    interface
+  interface
      subroutine opencl_copy(a_d, b_d, n) &
           bind(c, name='opencl_copy')
        use, intrinsic :: iso_c_binding
@@ -602,6 +640,15 @@ subroutine opencl_copy(a_d, b_d, n) &
      end subroutine opencl_copy
   end interface
 
+  interface
+     subroutine opencl_masked_copy(a_d, b_d, mask_d, n, m) &
+          bind(c, name='opencl_masked_copy')
+       use, intrinsic :: iso_c_binding
+       type(c_ptr), value :: a_d, b_d, mask_d
+       integer(c_int) :: n, m
+     end subroutine opencl_masked_copy
+  end interface
+
   interface
      subroutine opencl_cmult(a_d, c, n) &
           bind(c, name='opencl_cmult')
@@ -672,12 +719,12 @@ subroutine opencl_add2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine opencl_add2
   end interface
-  
+
   interface
      subroutine opencl_add2s1(a_d, b_d, c1, n) &
           bind(c, name='opencl_add2s1')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d
        real(c_rp) :: c1
@@ -689,7 +736,7 @@ end subroutine opencl_add2s1
      subroutine opencl_add2s2(a_d, b_d, c1, n) &
           bind(c, name='opencl_add2s2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d
        real(c_rp) :: c1
@@ -712,7 +759,7 @@ end subroutine opencl_add2s2_many
      subroutine opencl_addsqr2s2(a_d, b_d, c1, n) &
           bind(c, name='opencl_addsqr2s2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d
        real(c_rp) :: c1
@@ -724,7 +771,7 @@ end subroutine opencl_addsqr2s2
      subroutine opencl_add3s2(a_d, b_d, c_d, c1, c2, n) &
           bind(c, name='opencl_add3s2')
        use, intrinsic :: iso_c_binding
-       import c_rp                     
+       import c_rp
        implicit none
        type(c_ptr), value :: a_d, b_d, c_d
        real(c_rp) :: c1, c2
@@ -745,13 +792,13 @@ end subroutine opencl_invcol1
   interface
      subroutine opencl_invcol2(a_d, b_d, n) &
           bind(c, name='opencl_invcol2')
-       use, intrinsic :: iso_c_binding       
+       use, intrinsic :: iso_c_binding
        implicit none
        type(c_ptr), value :: a_d, b_d
        integer(c_int) :: n
      end subroutine opencl_invcol2
   end interface
-  
+
   interface
      subroutine opencl_col2(a_d, b_d, n) &
           bind(c, name='opencl_col2')
@@ -791,7 +838,7 @@ subroutine opencl_sub2(a_d, b_d, n) &
        integer(c_int) :: n
      end subroutine opencl_sub2
   end interface
-  
+
   interface
      subroutine opencl_sub3(a_d, b_d, c_d, n) &
           bind(c, name='opencl_sub3')
@@ -811,7 +858,7 @@ subroutine opencl_addcol3(a_d, b_d, c_d, n) &
        integer(c_int) :: n
      end subroutine opencl_addcol3
   end interface
-  
+
   interface
      subroutine opencl_addcol4(a_d, b_d, c_d, d_d, n) &
           bind(c, name='opencl_addcol4')
@@ -882,8 +929,9 @@ end function opencl_glsum
        device_cadd, device_cfill, device_add2, device_add2s1, device_add2s2, &
        device_addsqr2s2, device_add3s2, device_invcol1, device_invcol2, &
        device_col2, device_col3, device_subcol3, device_sub2, device_sub3, &
-       device_addcol3, device_addcol4, device_vdot3, device_glsc3, &
-       device_glsc3_many, device_add2s2_many, device_glsc2, device_glsum
+       device_addcol3, device_addcol4, device_vdot3, device_vlsc3, device_glsc3, &
+       device_glsc3_many, device_add2s2_many, device_glsc2, device_glsum, &
+       device_masked_copy
   
 contains
 
@@ -897,10 +945,25 @@ subroutine device_copy(a_d, b_d, n)
 #elif HAVE_OPENCL
     call opencl_copy(a_d, b_d, n)
 #else
-    call neko_error('No device backend configured')
+    call neko_error('no device backend configured')
 #endif
   end subroutine device_copy
 
+  subroutine device_masked_copy(a_d, b_d, mask_d, n, m)
+    type(c_ptr) :: a_d, b_d, mask_d
+    integer :: n, m
+#ifdef HAVE_HIP
+    call hip_masked_copy(a_d, b_d, mask_d, n, m)
+#elif HAVE_CUDA
+    call cuda_masked_copy(a_d, b_d, mask_d, n, m)
+#elif HAVE_OPENCL
+    call opencl_masked_copy(a_d, b_d, mask_d, n, m)
+#else
+    call neko_error('no device backend configured')
+#endif
+  end subroutine device_masked_copy
+
+
   subroutine device_rzero(a_d, n)
     type(c_ptr) :: a_d
     integer :: n
@@ -1002,7 +1065,7 @@ subroutine device_add2(a_d, b_d, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_add2
-  
+
   subroutine device_add2s1(a_d, b_d, c1, n)
     type(c_ptr) :: a_d, b_d
     real(kind=rp) :: c1
@@ -1032,7 +1095,7 @@ subroutine device_add2s2(a_d, b_d, c1, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_add2s2
-  
+
   subroutine device_addsqr2s2(a_d, b_d, c1, n)
     type(c_ptr) :: a_d, b_d
     real(kind=rp) :: c1
@@ -1104,7 +1167,7 @@ subroutine device_col2(a_d, b_d, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_col2
-  
+
   subroutine device_col3(a_d, b_d, c_d, n)
     type(c_ptr) :: a_d, b_d, c_d
     integer :: n
@@ -1146,7 +1209,7 @@ subroutine device_sub2(a_d, b_d, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_sub2
-  
+
   subroutine device_sub3(a_d, b_d, c_d, n)
     type(c_ptr) :: a_d, b_d, c_d
     integer :: n
@@ -1202,7 +1265,24 @@ subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_vdot3
-  
+
+  function device_vlsc3(u_d, v_d, w_d, n) result(res)
+    type(c_ptr) :: u_d, v_d, w_d
+    integer :: n
+    real(kind=rp) :: res
+    res = 0.0_rp
+#ifdef HAVE_HIP
+    res = hip_vlsc3(u_d, v_d, w_d, n)
+#elif HAVE_CUDA
+    res = cuda_vlsc3(u_d, v_d, w_d, n)
+#elif HAVE_OPENCL
+    ! Same kernel as glsc3 (currently no device MPI for OpenCL)
+    res = opencl_glsc3(u_d, v_d, w_d, n)
+#else
+    call neko_error('No device backend configured')
+#endif
+  end function device_vlsc3
+
   function device_glsc3(a_d, b_d, c_d, n) result(res)
     type(c_ptr) :: a_d, b_d, c_d
     integer :: n, ierr
@@ -1224,7 +1304,7 @@ function device_glsc3(a_d, b_d, c_d, n) result(res)
     end if
 #endif
   end function device_glsc3
-  
+
   subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
     type(c_ptr), value :: w_d, v_d_d, mult_d
     integer(c_int) :: j, n
@@ -1247,7 +1327,7 @@ subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n)
     end if
 #endif
   end subroutine device_glsc3_many
-  
+
   subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n)
     type(c_ptr), value :: y_d, x_d_d, a_d
     integer(c_int) :: j, n
@@ -1261,7 +1341,7 @@ subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n)
     call neko_error('No device backend configured')
 #endif
   end subroutine device_add2s2_many
- 
+
   function device_glsc2(a_d, b_d, n) result(res)
     type(c_ptr) :: a_d, b_d
     integer :: n, ierr
@@ -1305,6 +1385,6 @@ function device_glsum(a_d, n) result(res)
     end if
 #endif
   end function device_glsum
-  
- 
+
+
 end module device_math
diff --git a/src/math/bcknd/device/device_mathops.F90 b/src/math/bcknd/device/device_mathops.F90
index 3ad0a70e9a4..a7cda7896c9 100644
--- a/src/math/bcknd/device/device_mathops.F90
+++ b/src/math/bcknd/device/device_mathops.F90
@@ -36,7 +36,7 @@ module device_mathops
   use, intrinsic :: iso_c_binding, only : c_int, c_ptr
   implicit none
   private
-  
+
 #ifdef HAVE_HIP
   interface
      subroutine hip_opchsign(a1_d, a2_d, a3_d, gdim, n) &
@@ -188,7 +188,7 @@ end subroutine opencl_opadd2col
 
   public :: device_opchsign, device_opcolv, device_opcolv3c, &
        device_opadd2cm, device_opadd2col
-  
+
 contains
 
   !> \f$ a = -a \f$
@@ -221,7 +221,7 @@ subroutine device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
 #endif
   end subroutine device_opcolv
 
-  !> \f$ a(i) = b(i) * c(i) * d \f$ 
+  !> \f$ a(i) = b(i) * c(i) * d \f$
   subroutine device_opcolv3c(a1_d, a2_d, a3_d, &
                              b1_d, b2_d, b3_d, c_d, d, n, gdim)
     type(c_ptr) :: a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d
@@ -238,7 +238,7 @@ subroutine device_opcolv3c(a1_d, a2_d, a3_d, &
 #endif
   end subroutine device_opcolv3c
 
-  !> \f$ a(i) = a + b(i) * c \f$ 
+  !> \f$ a(i) = a + b(i) * c \f$
   subroutine device_opadd2cm (a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c, n, gdim)
     type(c_ptr) :: a1_d, a2_d, a3_d, b1_d, b2_d, b3_d
     real(kind=rp) :: c
diff --git a/src/math/bcknd/device/device_mpi_op.h b/src/math/bcknd/device/device_mpi_op.h
index 248f75cd423..b575647c61a 100644
--- a/src/math/bcknd/device/device_mpi_op.h
+++ b/src/math/bcknd/device/device_mpi_op.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_DEVICE_MPI_OP_H__
+#define __MATH_DEVICE_MPI_OP_H__
 /** 
  * Dummy defintion of device MPI reduce operations
  * @note we can't put this in device_mpi_reduce.h and include it in device_mpi_reduce.c 
@@ -6,3 +8,5 @@
 
 #define DEVICE_MPI_SUM 0
 #define DEVICE_MPI_MAX 1
+
+#endif // __MATH_DEVICE_MPI_OP_H__
diff --git a/src/math/bcknd/device/device_mpi_reduce.h b/src/math/bcknd/device/device_mpi_reduce.h
index 4b2be3528c6..e871267318c 100644
--- a/src/math/bcknd/device/device_mpi_reduce.h
+++ b/src/math/bcknd/device/device_mpi_reduce.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_DEVICE_MPI_REDUCE_H__
+#define __MATH_DEVICE_MPI_REDUCE_H__
 
 /**
  * C wrapper for MPI calls, until we integrate with NCCL/RCCL
@@ -10,3 +12,5 @@ extern "C" {
   void device_mpi_allreduce_inplace(void *buf_d, int count,
                                     size_t nbytes, int op);
 }
+
+#endif // __MATH_DEVICE_MPI_REDUCE_H__
diff --git a/src/math/bcknd/device/device_schwarz.F90 b/src/math/bcknd/device/device_schwarz.F90
index bee04432246..da09500671d 100644
--- a/src/math/bcknd/device/device_schwarz.F90
+++ b/src/math/bcknd/device/device_schwarz.F90
@@ -37,7 +37,7 @@ module device_schwarz
   use, intrinsic :: iso_c_binding, only : c_ptr, c_int
   implicit none
   private
-  
+
 #ifdef HAVE_HIP
   interface
      subroutine hip_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) &
@@ -55,7 +55,7 @@ subroutine hip_schwarz_toext3d(a_d,b_d,nx, nelv, stream) &
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: a_d, b_d, stream 
+       type(c_ptr), value :: a_d, b_d, stream
        integer(c_int) :: nx, nelv
      end subroutine hip_schwarz_toext3d
      subroutine hip_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) &
@@ -63,7 +63,7 @@ subroutine hip_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) &
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: a_d, b_d, stream 
+       type(c_ptr), value :: a_d, b_d, stream
        integer(c_int) :: nx, nelv
      end subroutine hip_schwarz_toreg3d
   end interface
@@ -74,7 +74,7 @@ subroutine cuda_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) &
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: arr1_d, arr2_d, stream 
+       type(c_ptr), value :: arr1_d, arr2_d, stream
 
        integer(c_int) :: l1, l2, nx, nelv
        real(c_rp) :: f1, f2
@@ -92,35 +92,35 @@ subroutine cuda_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) &
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: a_d, b_d, stream 
+       type(c_ptr), value :: a_d, b_d, stream
        integer(c_int) :: nx, nelv
      end subroutine cuda_schwarz_toreg3d
   end interface
 #elif HAVE_OPENCL
   interface
-     subroutine opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv) &
+     subroutine opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) &
           bind(c, name='opencl_schwarz_extrude')
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: arr1_d, arr2_d
+       type(c_ptr), value :: arr1_d, arr2_d, stream
        integer(c_int) :: l1, l2, nx, nelv
        real(c_rp) :: f1, f2
      end subroutine opencl_schwarz_extrude
-     subroutine opencl_schwarz_toext3d(a_d,b_d,nx, nelv) &
+     subroutine opencl_schwarz_toext3d(a_d,b_d,nx, nelv, stream) &
           bind(c, name='opencl_schwarz_toext3d')
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: a_d, b_d
+       type(c_ptr), value :: a_d, b_d, stream
        integer(c_int) :: nx, nelv
      end subroutine opencl_schwarz_toext3d
-     subroutine opencl_schwarz_toreg3d(b_d,a_d,nx, nelv) &
+     subroutine opencl_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) &
           bind(c, name='opencl_schwarz_toreg3d')
        use, intrinsic :: iso_c_binding
        import c_rp
        implicit none
-       type(c_ptr), value :: a_d, b_d
+       type(c_ptr), value :: a_d, b_d, stream
        integer(c_int) :: nx, nelv
      end subroutine opencl_schwarz_toreg3d
   end interface
@@ -128,7 +128,7 @@ end subroutine opencl_schwarz_toreg3d
 
   public :: device_schwarz_extrude, device_schwarz_toext3d, &
        device_schwarz_toreg3d
-  
+
 contains
   subroutine device_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,ny,nz, nelv, stream)
     integer, intent(in) :: l1,l2,nx,ny,nz, nelv
@@ -143,7 +143,7 @@ subroutine device_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,ny,nz, nelv, stre
 #elif HAVE_CUDA
     call cuda_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv, stream)
 #elif HAVE_OPENCL
-    call opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv)
+    call opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv, stream)
 #else
     call neko_error('No device backend configured')
 #endif
@@ -162,7 +162,7 @@ subroutine device_schwarz_toext3d(a_d,b_d,nx, nelv, stream)
 #elif HAVE_CUDA
     call cuda_schwarz_toext3d(a_d,b_d,nx,nelv, stream)
 #elif HAVE_OPENCL
-    call opencl_schwarz_toext3d(a_d,b_d,nx,nelv)
+    call opencl_schwarz_toext3d(a_d,b_d,nx,nelv, stream)
 #else
     call neko_error('No device backend configured')
 #endif
@@ -180,7 +180,7 @@ subroutine device_schwarz_toreg3d(b_d,a_d,nx, nelv, stream)
 #elif HAVE_CUDA
     call cuda_schwarz_toreg3d(b_d,a_d,nx,nelv, stream)
 #elif HAVE_OPENCL
-    call opencl_schwarz_toreg3d(b_d,a_d,nx,nelv)
+    call opencl_schwarz_toreg3d(b_d,a_d,nx,nelv, stream)
 #else
     call neko_error('No device backend configured')
 #endif
diff --git a/src/math/bcknd/device/fdm_device.F90 b/src/math/bcknd/device/fdm_device.F90
index 2750e8097d0..a88ba3e1be9 100644
--- a/src/math/bcknd/device/fdm_device.F90
+++ b/src/math/bcknd/device/fdm_device.F90
@@ -37,7 +37,7 @@ module fdm_device
   use, intrinsic :: iso_c_binding, only : c_ptr, c_int
   implicit none
   private
-  
+
 #ifdef HAVE_HIP
   interface
      subroutine hip_fdm_do_fast(e_d, r_d, s_d, d_d, nl, nelv, stream) &
@@ -68,7 +68,7 @@ end subroutine opencl_fdm_do_fast
 #endif
 
   public :: fdm_do_fast_device
-  
+
 contains
 
   subroutine fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
@@ -76,7 +76,7 @@ subroutine fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream)
     real(kind=rp), intent(inout) :: e(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: r(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv)
-    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)    
+    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)
     type(c_ptr) :: e_d, r_d, s_d, d_d
     type(c_ptr), optional :: stream
 
diff --git a/src/math/bcknd/device/hip/ax_helm_kernel.h b/src/math/bcknd/device/hip/ax_helm_kernel.h
index 5336e02fcf1..67dec5a935c 100644
--- a/src/math/bcknd/device/hip/ax_helm_kernel.h
+++ b/src/math/bcknd/device/hip/ax_helm_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_AX_HELM_KERNEL_H__
+#define __MATH_AX_HELM_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -346,3 +348,5 @@ __global__ void __launch_bounds__(LX*LX,3)
     w[ij + k*LX*LX + ele] = rw[k]; 
   }
 }
+
+#endif // __MATH_AX_HELM_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/cdtp_kernel.h b/src/math/bcknd/device/hip/cdtp_kernel.h
index 1858ecebef6..6c3e958e404 100644
--- a/src/math/bcknd/device/hip/cdtp_kernel.h
+++ b/src/math/bcknd/device/hip/cdtp_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CDTP_KERNEL_H__
+#define __MATH_CDTP_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -124,7 +126,6 @@ __global__ void __launch_bounds__(LX*LX,3)
   T rtar[LX];
   T rtas[LX];
   T rtat[LX];
-  T rjac[LX];
 
   const int e = blockIdx.x;
   const int j = threadIdx.y;
@@ -174,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3)
   }
 }
 
+
+#endif // __MATH_CDTP_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/cfl_kernel.h b/src/math/bcknd/device/hip/cfl_kernel.h
index 239242e90b7..b51fded435e 100644
--- a/src/math/bcknd/device/hip/cfl_kernel.h
+++ b/src/math/bcknd/device/hip/cfl_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CFL_KERNEL_H__
+#define __MATH_CFL_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -180,3 +182,5 @@ __global__ void cfl_kernel(const T dt,
     cfl_h[blockIdx.x] = cfl_tmp;  
 }
 
+
+#endif // __MATH_CFL_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/conv1_kernel.h b/src/math/bcknd/device/hip/conv1_kernel.h
index c094ab98fe1..d77c1cde6aa 100644
--- a/src/math/bcknd/device/hip/conv1_kernel.h
+++ b/src/math/bcknd/device/hip/conv1_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_CONV1_KERNEL_H__
+#define __MATH_CONV1_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -210,3 +212,5 @@ __global__ void __launch_bounds__(LX*LX,3)
   }
 }
 
+
+#endif // __MATH_CONV1_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/dudxyz_kernel.h b/src/math/bcknd/device/hip/dudxyz_kernel.h
index 6265d28d574..d2c4ef25d20 100644
--- a/src/math/bcknd/device/hip/dudxyz_kernel.h
+++ b/src/math/bcknd/device/hip/dudxyz_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_DUDXYZ_KERNEL_H__
+#define __MATH_DUDXYZ_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -174,3 +176,5 @@ __global__ void __launch_bounds__(LX*LX,3)
   }
 }
 
+
+#endif // __MATH_DUDXYZ_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/fdm_kernel.h b/src/math/bcknd/device/hip/fdm_kernel.h
index 0db84fbed01..f4baf90a89f 100644
--- a/src/math/bcknd/device/hip/fdm_kernel.h
+++ b/src/math/bcknd/device/hip/fdm_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_FDM_KERNEL_H__
+#define __MATH_FDM_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -135,3 +137,5 @@ __global__ void fdm_do_fast_kernel(T  * __restrict__ e,
 }
 
 
+
+#endif // __MATH_FDM_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/lambda2_kernel.h b/src/math/bcknd/device/hip/lambda2_kernel.h
index 983589b2b0a..600b13e09ef 100644
--- a/src/math/bcknd/device/hip/lambda2_kernel.h
+++ b/src/math/bcknd/device/hip/lambda2_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_LAMBDA2_KERNEL_H__
+#define __MATH_LAMBDA2_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -357,3 +359,5 @@ lambda2_kernel_kstep(T * __restrict__ lambda2,
   }
 }
 
+
+#endif // __MATH_LAMBDA2_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/math.hip b/src/math/bcknd/device/hip/math.hip
index 9dcb13a4216..7f217f1e759 100644
--- a/src/math/bcknd/device/hip/math.hip
+++ b/src/math/bcknd/device/hip/math.hip
@@ -51,6 +51,21 @@ extern "C" {
                              (hipStream_t) glb_cmd_queue));
   }
 
+  /** Fortran wrapper for masked copy
+   * Copy a vector \f$ a(mask) = b(mask) \f$
+   */
+  void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m) {
+
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1);
+
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(masked_copy_kernel<real>),
+                       nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue,
+                       (real *) a, (real *) b, (int *) mask, *n, *m);
+
+    HIP_CHECK(hipGetLastError());
+
+  }
   /** Fortran wrapper for rzero
    * Zero a real vector
    */
@@ -358,6 +373,44 @@ extern "C" {
   real * bufred = NULL;
   real * bufred_d = NULL;
   
+  /**
+   * Fortran wrapper vlsc3
+   * Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$
+   */
+  real hip_vlsc3(void *u, void *v, void *w, int *n) {
+        
+    const dim3 nthrds(1024, 1, 1);
+    const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1);
+    const int nb = ((*n) + 1024 - 1)/ 1024;
+    const hipStream_t stream = (hipStream_t) glb_cmd_queue;
+    
+    if ( nb > red_s){
+      red_s = nb;
+      if (bufred != NULL) {
+        HIP_CHECK(hipHostFree(bufred));
+        HIP_CHECK(hipFree(bufred_d));        
+      }
+      HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault));
+      HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real)));
+    }
+     
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel<real>),
+                       nblcks, nthrds, 0, stream,
+                       (real *) u, (real *) v,
+                       (real *) w, bufred_d, *n);
+    HIP_CHECK(hipGetLastError());
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel<real>),
+                       1, 1024, 0, stream, bufred_d, nb);
+    HIP_CHECK(hipGetLastError());
+
+    HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real),
+                             hipMemcpyDeviceToHost, stream));
+    hipStreamSynchronize(stream);
+
+    return bufred[0];
+  }
+  
+
   /**
    * Fortran wrapper glsc3
    * Weighted inner product \f$ a^T b c \f$
diff --git a/src/math/bcknd/device/hip/math_kernel.h b/src/math/bcknd/device/hip/math_kernel.h
index 04bdd1573e8..42673b026c8 100644
--- a/src/math/bcknd/device/hip/math_kernel.h
+++ b/src/math/bcknd/device/hip/math_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_MATH_KERNEL_H__
+#define __MATH_MATH_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -48,6 +50,24 @@ __global__ void cmult_kernel(T * __restrict__ a,
   }
 }
 
+/**
+ * Device kernel for masked copy
+ */
+template< typename T >
+__global__ void masked_copy_kernel(T * __restrict__ a,
+                                   T * __restrict__ b,
+                                   int * __restrict__ mask,                    
+                                   const int n,
+                                   const int m) {
+
+  const int idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const int str = blockDim.x * gridDim.x;
+
+  for (int i = idx; i < m; i += str) {
+    a[mask[i+1]-1] = b[mask[i+1]-1];
+  }
+}
+
 /**
  * Device kernel for cmult2
  */
@@ -605,3 +625,5 @@ __global__ void glsum_kernel(const T * a,
     buf_h[blockIdx.x] = sum;
 
 }
+
+#endif // __MATH_MATH_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/mathops_kernel.h b/src/math/bcknd/device/hip/mathops_kernel.h
index 0df7a1a31fa..b7be1d88022 100644
--- a/src/math/bcknd/device/hip/mathops_kernel.h
+++ b/src/math/bcknd/device/hip/mathops_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_MATHOPS_KERNEL_H__
+#define __MATH_MATHOPS_KERNEL_H__
 /*
  Copyright (c) 2021, The Neko Authors
  All rights reserved.
@@ -192,3 +194,5 @@ __global__ void opadd2col_kernel(T * __restrict__ a1,
 
 }
 
+
+#endif // __MATH_MATHOPS_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/opgrad_kernel.h b/src/math/bcknd/device/hip/opgrad_kernel.h
index 1667a4ed00b..1eda4cfd4de 100644
--- a/src/math/bcknd/device/hip/opgrad_kernel.h
+++ b/src/math/bcknd/device/hip/opgrad_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_OPGRAD_KERNEL_H__
+#define __MATH_OPGRAD_KERNEL_H__
 /*
  Copyright (c) 2021-2023, The Neko Authors
  All rights reserved.
@@ -197,3 +199,5 @@ opgrad_kernel_kstep(T * __restrict__ ux,
   }
 }
 
+
+#endif // __MATH_OPGRAD_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/schwarz_kernel.h b/src/math/bcknd/device/hip/schwarz_kernel.h
index c3079d6231a..2cba07ab9b5 100644
--- a/src/math/bcknd/device/hip/schwarz_kernel.h
+++ b/src/math/bcknd/device/hip/schwarz_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_SCHWARZ_KERNEL_H__
+#define __MATH_SCHWARZ_KERNEL_H__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -159,3 +161,5 @@ __global__ void schwarz_toreg3d_kernel(T * __restrict__ b,
     b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2];
   }
 }
+
+#endif // __MATH_SCHWARZ_KERNEL_H__
diff --git a/src/math/bcknd/device/hip/tensor_kernel.h b/src/math/bcknd/device/hip/tensor_kernel.h
index c785f778d3f..7e56b6dc484 100644
--- a/src/math/bcknd/device/hip/tensor_kernel.h
+++ b/src/math/bcknd/device/hip/tensor_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __MATH_TENSOR_KERNEL_H__
+#define __MATH_TENSOR_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -150,3 +152,5 @@ __global__ void tnsr3d_kernel(T  * __restrict__  v,
 }
 
 
+
+#endif // __MATH_TENSOR_KERNEL_H__
diff --git a/src/math/bcknd/device/opencl/ax_helm_kernel.cl b/src/math/bcknd/device/opencl/ax_helm_kernel.cl
index d095ab14590..c08e4ce15ad 100644
--- a/src/math/bcknd/device/opencl/ax_helm_kernel.cl
+++ b/src/math/bcknd/device/opencl/ax_helm_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_AX_HELM_KERNEL_CL__
+#define __MATH_AX_HELM_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -152,3 +154,5 @@ DEFINE_AX_HELM_KERNEL(7, 256)
 DEFINE_AX_HELM_KERNEL(8, 256)
 DEFINE_AX_HELM_KERNEL(9, 256)
 
+
+#endif // __MATH_AX_HELM_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/cdtp_kernel.cl b/src/math/bcknd/device/opencl/cdtp_kernel.cl
index 7a6cfabda75..4c1349471e3 100644
--- a/src/math/bcknd/device/opencl/cdtp_kernel.cl
+++ b/src/math/bcknd/device/opencl/cdtp_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_CDTP_KERNEL_CL__
+#define __MATH_CDTP_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -117,3 +119,5 @@ DEFINE_CDTP_KERNEL(10, 256)
 
 
 
+
+#endif // __MATH_CDTP_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/cfl_kernel.cl b/src/math/bcknd/device/opencl/cfl_kernel.cl
index 3a6cf5e98fe..2cc1496755c 100644
--- a/src/math/bcknd/device/opencl/cfl_kernel.cl
+++ b/src/math/bcknd/device/opencl/cfl_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_CFL_KERNEL_CL__
+#define __MATH_CFL_KERNEL_CL__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -145,3 +147,5 @@ DEFINE_CFL_KERNEL(7, 256)
 DEFINE_CFL_KERNEL(8, 256)
 DEFINE_CFL_KERNEL(9, 256)
 
+
+#endif // __MATH_CFL_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/conv1_kernel.cl b/src/math/bcknd/device/opencl/conv1_kernel.cl
index 782e239088b..648ca2765d0 100644
--- a/src/math/bcknd/device/opencl/conv1_kernel.cl
+++ b/src/math/bcknd/device/opencl/conv1_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_CONV1_KERNEL_CL__
+#define __MATH_CONV1_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -135,3 +137,5 @@ DEFINE_CONV1_KERNEL(7, 256)
 DEFINE_CONV1_KERNEL(8, 256)
 DEFINE_CONV1_KERNEL(9, 256)
 
+
+#endif // __MATH_CONV1_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/dudxyz_kernel.cl b/src/math/bcknd/device/opencl/dudxyz_kernel.cl
index cff01a0bdd1..6326b3bb584 100644
--- a/src/math/bcknd/device/opencl/dudxyz_kernel.cl
+++ b/src/math/bcknd/device/opencl/dudxyz_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_DUDXYZ_KERNEL_CL__
+#define __MATH_DUDXYZ_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -115,3 +117,5 @@ DEFINE_DUDXYZ_KERNEL(7, 256)
 DEFINE_DUDXYZ_KERNEL(8, 256)
 DEFINE_DUDXYZ_KERNEL(9, 256)
 
+
+#endif // __MATH_DUDXYZ_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/fdm_kernel.cl b/src/math/bcknd/device/opencl/fdm_kernel.cl
index c81957ba49f..9d1578ffbf2 100644
--- a/src/math/bcknd/device/opencl/fdm_kernel.cl
+++ b/src/math/bcknd/device/opencl/fdm_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_FDM_KERNEL_CL__
+#define __MATH_FDM_KERNEL_CL__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -161,3 +163,5 @@ DEFINE_FDM_DO_FAST_KERNEL(13)
 DEFINE_FDM_DO_FAST_KERNEL(14)
 DEFINE_FDM_DO_FAST_KERNEL(15)
 
+
+#endif // __MATH_FDM_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/lambda2_kernel.cl b/src/math/bcknd/device/opencl/lambda2_kernel.cl
index 8b8e5468296..652765931b3 100644
--- a/src/math/bcknd/device/opencl/lambda2_kernel.cl
+++ b/src/math/bcknd/device/opencl/lambda2_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_LAMBDA2_KERNEL_CL__
+#define __MATH_LAMBDA2_KERNEL_CL__
 /*
  Copyright (c) 2023, The Neko Authors
  All rights reserved.
@@ -250,3 +252,5 @@ DEFINE_LAMBDA2_KERNEL(10, 256)
 DEFINE_LAMBDA2_KERNEL(11, 256)
 DEFINE_LAMBDA2_KERNEL(12, 256)
 
+
+#endif // __MATH_LAMBDA2_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/math.c b/src/math/bcknd/device/opencl/math.c
index 7b7da7f7c88..b0885c8388d 100644
--- a/src/math/bcknd/device/opencl/math.c
+++ b/src/math/bcknd/device/opencl/math.c
@@ -1,5 +1,5 @@
 /*
- Copyright (c) 2021-2022, The Neko Authors
+ Copyright (c) 2021-2024, The Neko Authors
  All rights reserved.
 
  Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,7 @@
 #endif
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <device/device_config.h>
 #include <device/opencl/jit.h>
 #include <device/opencl/prgm_lib.h>
@@ -55,6 +56,34 @@ void opencl_copy(void *a, void *b, int *n) {
                                0, NULL, NULL));
 }
 
+/** Fortran wrapper for masked copy
+ * Copy a vector \f$ a = b \f$
+ */
+void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m) {
+  cl_int err;
+
+  if (math_program == NULL)
+    opencl_kernel_jit(math_kernel, (cl_program *) &math_program);
+  
+  cl_kernel kernel = clCreateKernel(math_program, "masked_copy_kernel", &err);
+  CL_CHECK(err);
+
+  CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a));
+  CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &b));
+  CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &mask));
+  CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), n));
+  CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), m));
+  
+  const int nb = ((*n) + 256 - 1) / 256;
+  const size_t global_item_size = 256 * nb;
+  const size_t local_item_size = 256;
+
+  CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel, 1,
+                                  NULL, &global_item_size, &local_item_size,
+                                  0, NULL, NULL));  
+
+}
+
 /** Fortran wrapper for rzero
  * Zero a real vector
  */
diff --git a/src/math/bcknd/device/opencl/math_kernel.cl b/src/math/bcknd/device/opencl/math_kernel.cl
index c264f6c30d2..b6becea870c 100644
--- a/src/math/bcknd/device/opencl/math_kernel.cl
+++ b/src/math/bcknd/device/opencl/math_kernel.cl
@@ -1,5 +1,7 @@
+#ifndef __MATH_MATH_KERNEL_CL__
+#define __MATH_MATH_KERNEL_CL__
 /*
- Copyright (c) 2021-2022, The Neko Authors
+ Copyright (c) 2021-2024, The Neko Authors
  All rights reserved.
 
  Redistribution and use in source and binary forms, with or without
@@ -32,6 +34,23 @@
  POSSIBILITY OF SUCH DAMAGE.
 */
 
+/**
+ * Device kernel for masked copy
+ */
+__kernel void masked_copy_kernel(__global real * __restrict__ a,
+                                 __global real * __restrict__ b,
+                                 __global int * __restrict__ mask,
+                                 const int n,
+                                 const int m) {
+
+  const int idx = get_global_id(0);
+  const int str = get_global_size(0);
+
+  for (int i = idx; i < n; i += str) {
+    a[mask[i+1]-1] = b[mask[i+1]-1];
+  }  
+}
+
 /**
  * Device kernel for cmult
  */
@@ -503,3 +522,5 @@ __kernel void glsum_kernel(__global const real * __restrict__ a,
   }
 
 }
+
+#endif // __MATH_MATH_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/mathops_kernel.cl b/src/math/bcknd/device/opencl/mathops_kernel.cl
index e23866dc52e..a33959cb0f6 100644
--- a/src/math/bcknd/device/opencl/mathops_kernel.cl
+++ b/src/math/bcknd/device/opencl/mathops_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_MATHOPS_KERNEL_CL__
+#define __MATH_MATHOPS_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -187,3 +189,5 @@ __kernel void opadd2col_kernel(__global real *a1,
 
 }
 
+
+#endif // __MATH_MATHOPS_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/opgrad_kernel.cl b/src/math/bcknd/device/opencl/opgrad_kernel.cl
index 3db362f955f..9f901d078b2 100644
--- a/src/math/bcknd/device/opencl/opgrad_kernel.cl
+++ b/src/math/bcknd/device/opencl/opgrad_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_OPGRAD_KERNEL_CL__
+#define __MATH_OPGRAD_KERNEL_CL__
 /*
  Copyright (c) 2021-2022, The Neko Authors
  All rights reserved.
@@ -127,3 +129,5 @@ DEFINE_OPGRAD_KERNEL(5, 256)
 DEFINE_OPGRAD_KERNEL(4, 256)
 DEFINE_OPGRAD_KERNEL(3, 256)
 DEFINE_OPGRAD_KERNEL(2, 256)
+
+#endif // __MATH_OPGRAD_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/opr_cfl.c b/src/math/bcknd/device/opencl/opr_cfl.c
index 9481b8fc7c5..701a077bb83 100644
--- a/src/math/bcknd/device/opencl/opr_cfl.c
+++ b/src/math/bcknd/device/opencl/opr_cfl.c
@@ -39,6 +39,7 @@
 #endif
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <math.h>
 #include <device/device_config.h>
 #include <device/opencl/jit.h>
diff --git a/src/math/bcknd/device/opencl/opr_opgrad.c b/src/math/bcknd/device/opencl/opr_opgrad.c
index 86b421f53a3..035c9c20c2c 100644
--- a/src/math/bcknd/device/opencl/opr_opgrad.c
+++ b/src/math/bcknd/device/opencl/opr_opgrad.c
@@ -39,6 +39,7 @@
 #endif
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <device/device_config.h>
 #include <device/opencl/jit.h>
 #include <device/opencl/prgm_lib.h>
diff --git a/src/math/bcknd/device/opencl/schwarz_kernel.cl b/src/math/bcknd/device/opencl/schwarz_kernel.cl
index 59911952dca..73e2343c909 100644
--- a/src/math/bcknd/device/opencl/schwarz_kernel.cl
+++ b/src/math/bcknd/device/opencl/schwarz_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_SCHWARZ_KERNEL_CL__
+#define __MATH_SCHWARZ_KERNEL_CL__
 /*
  Copyright (c) 2022-2023, The Neko Authors
  All rights reserved.
@@ -175,3 +177,5 @@ __kernel void schwarz_toreg3d_kernel(__global real * __restrict__ b,
     b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2];
   }
 }
+
+#endif // __MATH_SCHWARZ_KERNEL_CL__
diff --git a/src/math/bcknd/device/opencl/tensor_kernel.cl b/src/math/bcknd/device/opencl/tensor_kernel.cl
index 1c1ed56108a..be04719536d 100644
--- a/src/math/bcknd/device/opencl/tensor_kernel.cl
+++ b/src/math/bcknd/device/opencl/tensor_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __MATH_TENSOR_KERNEL_CL__
+#define __MATH_TENSOR_KERNEL_CL__
 /*
  Copyright (c) 2022-2023, The Neko Authors
  All rights reserved.
@@ -147,3 +149,5 @@ __kernel void tnsr3d_el_kernel(__global real  * __restrict__  v,
 }
 
 
+
+#endif // __MATH_TENSOR_KERNEL_CL__
diff --git a/src/math/bcknd/device/opr_device.F90 b/src/math/bcknd/device/opr_device.F90
index 2ffaeeb2761..6c9a08ccf97 100644
--- a/src/math/bcknd/device/opr_device.F90
+++ b/src/math/bcknd/device/opr_device.F90
@@ -31,12 +31,12 @@
 ! POSSIBILITY OF SUCH DAMAGE.
 !
 !> Operators accelerator backends
-module opr_device  
+module opr_device
   use gather_scatter
   use num_types, only : rp, c_rp
   use device_math
   use device_mathops
-  use device, only : device_get_ptr 
+  use device, only : device_get_ptr
   use space, only : space_t
   use coefs, only : coef_t
   use mesh, only : mesh_t
@@ -46,7 +46,7 @@ module opr_device
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   public :: opr_device_dudxyz, opr_device_opgrad, opr_device_cdtp, &
        opr_device_conv1, opr_device_curl, opr_device_cfl, opr_device_lambda2
 
@@ -105,7 +105,7 @@ subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, &
        integer(c_int) :: nel, lx
      end subroutine hip_opgrad
   end interface
- 
+
   interface
      subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, &
           dx_d, dy_d, dz_d, &
@@ -131,7 +131,7 @@ real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, &
           jacinv_d, nel, lx) &
           bind(c, name='hip_cfl')
        use, intrinsic :: iso_c_binding
-       import c_rp       
+       import c_rp
        type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
        type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
        type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
@@ -176,7 +176,7 @@ subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
        integer(c_int) :: nel, gdim, lx
      end subroutine cuda_conv1
   end interface
-  
+
   interface
      subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
           dx_d, dy_d, dz_d, &
@@ -194,7 +194,7 @@ subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, &
        integer(c_int) :: nel, lx
      end subroutine cuda_opgrad
   end interface
- 
+
   interface
      subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, &
           dx_d, dy_d, dz_d, &
@@ -221,7 +221,7 @@ real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, &
           jacinv_d, nel, lx) &
           bind(c, name='cuda_cfl')
        use, intrinsic :: iso_c_binding
-       import c_rp       
+       import c_rp
        type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
        type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
        type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
@@ -292,7 +292,7 @@ real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
           jacinv_d, nel, lx) &
           bind(c, name='opencl_cfl')
        use, intrinsic :: iso_c_binding
-       import c_rp       
+       import c_rp
        type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d
        type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d
        type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d
@@ -301,7 +301,7 @@ real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, &
      end function opencl_cfl
   end interface
 
-    interface
+  interface
      subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
           dx_d, dy_d, dz_d, &
           drdx_d, dsdx_d, dtdx_d, &
@@ -318,8 +318,8 @@ subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, &
        integer(c_int) :: nel, lx
      end subroutine opencl_lambda2
   end interface
-#endif  
-  
+#endif
+
 contains
 
   subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
@@ -337,7 +337,7 @@ subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
     ds_d = device_get_ptr(ds)
     dt_d = device_get_ptr(dt)
 
-    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)    
+    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)
 #ifdef HAVE_HIP
       call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, &
            Xh%dx_d, Xh%dy_d, Xh%dz_d, coef%jacinv_d, &
@@ -354,11 +354,11 @@ subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef)
       call neko_error('No device backend configured')
 #endif
     end associate
-  
+
   end subroutine opr_device_dudxyz
 
-  subroutine opr_device_opgrad(ux, uy, uz, u, coef) 
-    type(coef_t), intent(in) :: coef  
+  subroutine opr_device_opgrad(ux, uy, uz, u, coef)
+    type(coef_t), intent(in) :: coef
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
@@ -370,7 +370,7 @@ subroutine opr_device_opgrad(ux, uy, uz, u, coef)
     uz_d = device_get_ptr(uz)
 
     u_d = device_get_ptr(u)
-    
+
     associate(Xh => coef%Xh, msh => coef%msh)
 #ifdef HAVE_HIP
       call hip_opgrad(ux_d, uy_d, uz_d, u_d, &
@@ -397,35 +397,35 @@ subroutine opr_device_opgrad(ux, uy, uz, u, coef)
       call neko_error('No device backend configured')
 #endif
     end associate
-    
+
   end subroutine opr_device_opgrad
   subroutine opr_device_lambda2(lambda2, u, v, w, coef)
-    type(coef_t), intent(in) :: coef  
-    type(field_t), intent(inout) :: lambda2 
+    type(coef_t), intent(in) :: coef
+    type(field_t), intent(inout) :: lambda2
     type(field_t), intent(in) :: u, v, w
 #ifdef HAVE_HIP
-      call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
+    call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
            coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
            coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
            coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
            coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
            coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
 #elif HAVE_CUDA
-      call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
+    call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
            coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
            coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
            coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
            coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
            coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
 #elif HAVE_OPENCL
-      call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
+    call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, &
            coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, &
            coef%drdx_d, coef%dsdx_d, coef%dtdx_d, &
            coef%drdy_d, coef%dsdy_d, coef%dtdy_d, &
            coef%drdz_d, coef%dsdz_d, coef%dtdz_d, &
            coef%jacinv_d, coef%msh%nelv, coef%Xh%lx)
 #else
-      call neko_error('No device backend configured')
+    call neko_error('No device backend configured')
 #endif
   end subroutine opr_device_lambda2
 
@@ -444,8 +444,8 @@ subroutine opr_device_cdtp(dtx, x, dr,ds, dt, coef)
     dr_d = device_get_ptr(dr)
     ds_d = device_get_ptr(ds)
     dt_d = device_get_ptr(dt)
-    
-    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)    
+
+    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)
 #ifdef HAVE_HIP
       call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, &
            Xh%dxt_d, Xh%dyt_d, Xh%dzt_d, coef%B_d, &
@@ -461,11 +461,11 @@ subroutine opr_device_cdtp(dtx, x, dr,ds, dt, coef)
 #else
       call neko_error('No device backend configured')
 #endif
-  end associate
+    end associate
 
   end subroutine opr_device_cdtp
 
-  subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)  
+  subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
     type(space_t), intent(in) :: Xh
     type(coef_t), intent(in) :: coef
     integer, intent(in) :: nelv, gdim
@@ -482,8 +482,8 @@ subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
     vx_d = device_get_ptr(vx)
     vy_d = device_get_ptr(vy)
     vz_d = device_get_ptr(vz)
-    
-    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)    
+
+    associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof)
 #ifdef HAVE_HIP
       call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, &
                      Xh%dx_d, Xh%dy_d, Xh%dz_d, &
@@ -509,7 +509,7 @@ subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
       call neko_error('No device backend configured')
 #endif
     end associate
-    
+
   end subroutine opr_device_conv1
 
   subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
@@ -650,16 +650,16 @@ subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
     call device_sub3(w3%x_d, work1%x_d, work2%x_d, n)
     !!    BC dependent, Needs to change if cyclic
 
-    call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%B_d, gdim, n)    
-    call c_Xh%gs_h%op(w1, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w2, GS_OP_ADD) 
+    call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%B_d, gdim, n)
+    call c_Xh%gs_h%op(w1, GS_OP_ADD)
+    call c_Xh%gs_h%op(w2, GS_OP_ADD)
     call c_Xh%gs_h%op(w3, GS_OP_ADD)
-    call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%Binv_d, gdim, n)    
+    call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%Binv_d, gdim, n)
 
 #else
     call neko_error('No device backend configured')
 #endif
-        
+
   end subroutine opr_device_curl
 
   function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
@@ -667,7 +667,7 @@ function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
     type(coef_t) :: coef
     integer :: nelv, gdim
     real(kind=rp) :: dt
-    real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) ::  u, v, w  
+    real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) ::  u, v, w
     real(kind=rp) :: cfl
     type(c_ptr) :: u_d, v_d, w_d
 
diff --git a/src/math/bcknd/device/tensor_device.F90 b/src/math/bcknd/device/tensor_device.F90
index 63a3a179aaf..422209d3ea2 100644
--- a/src/math/bcknd/device/tensor_device.F90
+++ b/src/math/bcknd/device/tensor_device.F90
@@ -37,9 +37,9 @@ module tensor_device
   private
 
   public :: tnsr3d_device, tnsr3d_el_list_device
-  
+
 #ifdef HAVE_HIP
-   interface
+  interface
      subroutine hip_tnsr3d_el_list(v_d, nv, u_d, nu, A_d, Bt_d, Ct_d, elements, n_points) &
           bind(c, name='hip_tnsr3d_el_list')
        use, intrinsic :: iso_c_binding
diff --git a/src/math/bcknd/sx/ax_helm_sx.f90 b/src/math/bcknd/sx/ax_helm_sx.f90
index c64dc5df469..238202ad674 100644
--- a/src/math/bcknd/sx/ax_helm_sx.f90
+++ b/src/math/bcknd/sx/ax_helm_sx.f90
@@ -35,7 +35,6 @@ module ax_helm_sx
   use num_types, only : rp
   use coefs, only : coef_t
   use space, only : space_t
-  use field, only : field_t
   use mesh, only : mesh_t
   use math, only : addcol4
   implicit none
@@ -46,7 +45,7 @@ module ax_helm_sx
      procedure, nopass :: compute => ax_helm_sx_compute
   end type ax_helm_sx_t
 
-contains 
+contains
 
   subroutine ax_helm_sx_compute(w, u, coef, msh, Xh)
     type(mesh_t), intent(inout) :: msh
@@ -104,7 +103,7 @@ subroutine ax_helm_sx_compute(w, u, coef, msh, Xh)
     if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%size())
 
   end subroutine ax_helm_sx_compute
-  
+
   subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n, lx)
     integer, intent(in) :: n, lx
@@ -223,7 +222,7 @@ subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -231,7 +230,7 @@ subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
 
 
   end subroutine sx_ax_helm_lx
-  
+
   subroutine sx_ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 14
@@ -351,7 +350,7 @@ subroutine sx_ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -479,7 +478,7 @@ subroutine sx_ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -487,7 +486,7 @@ subroutine sx_ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
 
 
   end subroutine sx_ax_helm_lx13
-  
+
   subroutine sx_ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 12
@@ -607,7 +606,7 @@ subroutine sx_ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -635,7 +634,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -655,9 +654,9 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -735,7 +734,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -743,7 +742,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
 
 
   end subroutine sx_ax_helm_lx11
-  
+
   subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 10
@@ -763,7 +762,7 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -783,9 +782,9 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -863,7 +862,7 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -891,7 +890,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -911,9 +910,9 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -991,7 +990,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -999,7 +998,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
 
 
   end subroutine sx_ax_helm_lx9
-  
+
   subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        h1, G11, G22, G33, G12, G13, G23, n)
     integer, parameter :: lx = 8
@@ -1019,7 +1018,7 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -1039,9 +1038,9 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1119,7 +1118,7 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -1147,7 +1146,7 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -1167,9 +1166,9 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1247,7 +1246,7 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -1275,7 +1274,7 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -1295,9 +1294,9 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1375,7 +1374,7 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -1403,7 +1402,7 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     real(kind=rp), intent(in) :: Dxt(lx, lx)
     real(kind=rp), intent(in) :: Dyt(lx, lx)
     real(kind=rp), intent(in) :: Dzt(lx, lx)
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
     real(kind=rp) :: ur(lx, lx, lx, n)
     real(kind=rp) :: us(lx, lx, lx, n)
     real(kind=rp) :: ut(lx, lx, lx, n)
@@ -1423,9 +1422,9 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
     end do
 
     do k = 1, lx
-       do i = 1, lx                
-          do j = 1, lx       
-             do e = 1, n                                   
+       do i = 1, lx
+          do j = 1, lx
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1503,7 +1502,7 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 do kk = 1, lx
                    wt = wt + dzt(k, kk)*uut(i,j,kk,e)
                 end do
-                w(i,j,k,e) = w(i,j,k,e) + wt 
+                w(i,j,k,e) = w(i,j,k,e) + wt
              end do
           end do
        end do
@@ -1544,7 +1543,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           ur(i,jj,1,1) = Dx(i,1)*u(1,jj,1,1) &
                        + Dx(i,2)*u(2,jj,1,1) &
                        + Dx(i,3)*u(3,jj,1,1) &
-                       + Dx(i,4)*u(4,jj,1,1) 
+                       + Dx(i,4)*u(4,jj,1,1)
        end do
     end do
 
@@ -1556,7 +1555,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 us(i,j,k,e) = Dy(j,1) * u(i,1,k,e) &
                             + Dy(j,2) * u(i,2,k,e) &
                             + Dy(j,3) * u(i,3,k,e) &
-                            + Dy(j,4) * u(i,4,k,e) 
+                            + Dy(j,4) * u(i,4,k,e)
              end do
           end do
        end do
@@ -1571,7 +1570,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 ut(i,j,k,e) = Dz(k,1) * u(i,j,1,e) &
                             + Dz(k,2) * u(i,j,2,e) &
                             + Dz(k,3) * u(i,j,3,e) &
-                            + Dz(k,4) * u(i,j,4,e) 
+                            + Dz(k,4) * u(i,j,4,e)
              end do
           end do
        end do
@@ -1599,7 +1598,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
           w(i,jj,1,1) = Dxt(i,1) * uur(1,jj,1,1) &
                       + Dxt(i,2) * uur(2,jj,1,1) &
                       + Dxt(i,3) * uur(3,jj,1,1) &
-                      + Dxt(i,4) * uur(4,jj,1,1) 
+                      + Dxt(i,4) * uur(4,jj,1,1)
        end do
     end do
 
@@ -1610,7 +1609,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 w(i,j,k,e) = w(i,j,k,e) + Dyt(j,1) * uus(i,1,k,e) &
                                         + Dyt(j,2) * uus(i,2,k,e) &
                                         + Dyt(j,3) * uus(i,3,k,e) &
-                                        + Dyt(j,4) * uus(i,4,k,e) 
+                                        + Dyt(j,4) * uus(i,4,k,e)
              end do
           end do
        end do
@@ -1623,7 +1622,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
                 w(i,j,k,e) = w(i,j,k,e) + Dzt(k,1) * uut(i,j,1,e) &
                                         + Dzt(k,2) * uut(i,j,2,e) &
                                         + Dzt(k,3) * uut(i,j,3,e) &
-                                        + Dzt(k,4) * uut(i,j,4,e) 
+                                        + Dzt(k,4) * uut(i,j,4,e)
              end do
           end do
        end do
@@ -1662,7 +1661,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        do jj = 1, lx * lx * n
           ur(i,jj,1,1) = Dx(i,1)*u(1,jj,1,1) &
                        + Dx(i,2)*u(2,jj,1,1) &
-                       + Dx(i,3)*u(3,jj,1,1) 
+                       + Dx(i,3)*u(3,jj,1,1)
        end do
     end do
 
@@ -1673,7 +1672,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do e = 1, n
                 us(i,j,k,e) = Dy(j,1) * u(i,1,k,e) &
                             + Dy(j,2) * u(i,2,k,e) &
-                            + Dy(j,3) * u(i,3,k,e) 
+                            + Dy(j,3) * u(i,3,k,e)
              end do
           end do
        end do
@@ -1687,7 +1686,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do e = 1, n
                 ut(i,j,k,e) = Dz(k,1) * u(i,j,1,e) &
                             + Dz(k,2) * u(i,j,2,e) &
-                            + Dz(k,3) * u(i,j,3,e) 
+                            + Dz(k,3) * u(i,j,3,e)
              end do
           end do
        end do
@@ -1714,7 +1713,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
        do jj = 1, lx * lx * n
           w(i,jj,1,1) = Dxt(i,1) * uur(1,jj,1,1) &
                       + Dxt(i,2) * uur(2,jj,1,1) &
-                      + Dxt(i,3) * uur(3,jj,1,1) 
+                      + Dxt(i,3) * uur(3,jj,1,1)
        end do
     end do
 
@@ -1724,7 +1723,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do e = 1, n
                 w(i,j,k,e) = w(i,j,k,e) + Dyt(j,1) * uus(i,1,k,e) &
                                         + Dyt(j,2) * uus(i,2,k,e) &
-                                        + Dyt(j,3) * uus(i,3,k,e) 
+                                        + Dyt(j,3) * uus(i,3,k,e)
              end do
           end do
        end do
@@ -1736,7 +1735,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, &
              do e = 1, n
                 w(i,j,k,e) = w(i,j,k,e) + Dzt(k,1) * uut(i,j,1,e) &
                                         + Dzt(k,2) * uut(i,j,2,e) &
-                                        + Dzt(k,3) * uut(i,j,3,e) 
+                                        + Dzt(k,3) * uut(i,j,3,e)
              end do
           end do
        end do
diff --git a/src/math/bcknd/sx/fdm_sx.f90 b/src/math/bcknd/sx/fdm_sx.f90
index e91182e17ba..0c01f44eb27 100644
--- a/src/math/bcknd/sx/fdm_sx.f90
+++ b/src/math/bcknd/sx/fdm_sx.f90
@@ -36,7 +36,7 @@ module fdm_sx
   use tensor_sx
   implicit none
   private
-  
+
   public :: fdm_do_fast_sx
 
 contains
@@ -46,7 +46,7 @@ subroutine fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
     real(kind=rp), intent(inout) :: e(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: r(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv)
-    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)    
+    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)
     integer ::  ie, nn, i
 
     nn = nl**ldim
@@ -58,7 +58,7 @@ subroutine fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv)
           end do
           call tnsr2d_el_sx(e(1,ie), nl, r(1,ie), nl, s(1,1,1,ie), s(1,2,2,ie))
        end do
-    else       
+    else
        select case (nl)
        case (14)
           call fdm_do_fast_sx_nl14(e, r, s, d, nelv)
@@ -104,7 +104,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
     nn = n**2
     nnn = n**3
 
-    
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -117,7 +117,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -133,7 +133,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -141,7 +141,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
              tmp = 0.0_rp
              do k = 1, n
                 tmp = tmp + wrk2(i + nn * (k - 1), ie) * s(k, j, 1, 3, ie)
-             end do                     
+             end do
              e(jj,ie) = tmp
           end do
        end do
@@ -150,7 +150,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -163,7 +163,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -179,7 +179,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -192,7 +192,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n)
           end do
        end do
     end do
-    
+
   end subroutine fdm_do_fast_sx_nl
 
   subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
@@ -206,7 +206,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -224,11 +224,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                          + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) &
                          + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) &
                          + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie) &
-                         + s(i,14,2,1,ie) * r(14 + n * (j - 1), ie)    
+                         + s(i,14,2,1,ie) * r(14 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -261,12 +261,12 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                             + wrk(l + n * (13 - 1) + nn * (i - 1), ie) &
                               * s(13,j,1,2,ie) &
                             + wrk(l + n * (14 - 1) + nn * (i - 1), ie) &
-                              * s(14,j,1,2,ie) 
+                              * s(14,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -284,7 +284,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) &
                       + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) &
                       + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie) &
-                      + wrk2(i + nn * (14 - 1), ie) * s(14, j, 1, 3, ie) 
+                      + wrk2(i + nn * (14 - 1), ie) * s(14, j, 1, 3, ie)
           end do
        end do
     end do
@@ -292,7 +292,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -310,11 +310,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                          + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) &
                          + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) &
                          + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie) &
-                         + s(i,14,1,1,ie) * r(14 + n * (j - 1), ie)    
+                         + s(i,14,1,1,ie) * r(14 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -347,12 +347,12 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                             + wrk(l + n * (13 - 1) + nn * (i - 1), ie) &
                               * s(13,j,2,2,ie) &
                             + wrk(l + n * (14 - 1) + nn * (i - 1), ie) &
-                              * s(14,j,2,2,ie) 
+                              * s(14,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -370,11 +370,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv)
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) &
                       + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) &
                       + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie) &
-                      + wrk2(i + nn * (14 - 1), ie) * s(14, j, 2, 3, ie) 
+                      + wrk2(i + nn * (14 - 1), ie) * s(14, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl14
 
@@ -389,7 +389,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -406,11 +406,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                          + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) &
                          + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) &
                          + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) &
-                         + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie) 
+                         + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -441,12 +441,12 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                             + wrk(l + n * (12 - 1) + nn * (i - 1), ie) &
                               * s(12,j,1,2,ie) &
                             + wrk(l + n * (13 - 1) + nn * (i - 1), ie) &
-                              * s(13,j,1,2,ie) 
+                              * s(13,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -463,7 +463,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) &
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) &
                       + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) &
-                      + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie) 
+                      + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie)
           end do
        end do
     end do
@@ -471,7 +471,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -488,11 +488,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                          + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) &
                          + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) &
                          + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) &
-                         + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie) 
+                         + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -523,12 +523,12 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                             + wrk(l + n * (12 - 1) + nn * (i - 1), ie) &
                               * s(12,j,2,2,ie) &
                             + wrk(l + n * (13 - 1) + nn * (i - 1), ie) &
-                              * s(13,j,2,2,ie) 
+                              * s(13,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -545,11 +545,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv)
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) &
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) &
                       + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) &
-                      + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie) 
+                      + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl13
 
@@ -564,7 +564,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -580,11 +580,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                          + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) &
                          + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) &
                          + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) &
-                         + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) 
+                         + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -613,12 +613,12 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                             + wrk(l + n * (11 - 1) + nn * (i - 1), ie) &
                               * s(11,j,1,2,ie) &
                             + wrk(l + n * (12 - 1) + nn * (i - 1), ie) &
-                              * s(12,j,1,2,ie) 
+                              * s(12,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -634,7 +634,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) &
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) &
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) &
-                      + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) 
+                      + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie)
           end do
        end do
     end do
@@ -642,7 +642,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -658,11 +658,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                          + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) &
                          + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) &
                          + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) &
-                         + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) 
+                         + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -691,12 +691,12 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                             + wrk(l + n * (11 - 1) + nn * (i - 1), ie) &
                               * s(11,j,2,2,ie) &
                             + wrk(l + n * (12 - 1) + nn * (i - 1), ie) &
-                              * s(12,j,2,2,ie) 
+                              * s(12,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -712,11 +712,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv)
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) &
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) &
                       + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) &
-                      + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) 
+                      + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl12
 
@@ -731,7 +731,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -746,11 +746,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                          + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) &
                          + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) &
                          + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) &
-                         + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) 
+                         + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -777,12 +777,12 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                             + wrk(l + n * (10 - 1) + nn * (i - 1), ie) &
                               * s(10,j,1,2,ie) &
                             + wrk(l + n * (11 - 1) + nn * (i - 1), ie) &
-                              * s(11,j,1,2,ie) 
+                              * s(11,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -797,7 +797,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) &
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) &
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) &
-                      + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) 
+                      + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie)
           end do
        end do
     end do
@@ -805,7 +805,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -820,11 +820,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                          + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) &
                          + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) &
                          + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) &
-                         + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) 
+                         + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -851,12 +851,12 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                             + wrk(l + n * (10 - 1) + nn * (i - 1), ie) &
                               * s(10,j,2,2,ie) &
                             + wrk(l + n * (11 - 1) + nn * (i - 1), ie) &
-                              * s(11,j,2,2,ie) 
+                              * s(11,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -871,11 +871,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv)
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) &
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) &
                       + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) &
-                      + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) 
+                      + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl11
 
@@ -890,7 +890,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -904,11 +904,11 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                          + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) &
                          + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) &
                          + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) &
-                         + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) 
+                         + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -933,12 +933,12 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                             + wrk(l + n * (9 - 1) + nn * (i - 1), ie) &
                               * s(9,j,1,2,ie) &
                             + wrk(l + n * (10 - 1) + nn * (i - 1), ie) &
-                              * s(10,j,1,2,ie) 
+                              * s(10,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -952,7 +952,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) &
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) &
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) &
-                      + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) 
+                      + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie)
           end do
        end do
     end do
@@ -960,7 +960,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -974,11 +974,11 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                          + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) &
                          + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) &
                          + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) &
-                         + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) 
+                         + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1003,12 +1003,12 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                             + wrk(l + n * (9 - 1) + nn * (i - 1), ie) &
                               * s(9,j,2,2,ie) &
                             + wrk(l + n * (10 - 1) + nn * (i - 1), ie) &
-                              * s(10,j,2,2,ie) 
+                              * s(10,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1022,14 +1022,14 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv)
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) &
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) &
                       + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) &
-                      + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) 
+                      + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl10
-  
+
   subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
     integer, parameter :: n = 9
     integer, parameter :: nn = n**2
@@ -1041,7 +1041,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1054,11 +1054,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                          + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) &
                          + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) &
                          + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) &
-                         + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) 
+                         + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1081,12 +1081,12 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                             + wrk(l + n * (8 - 1) + nn * (i - 1), ie) &
                               * s(8,j,1,2,ie) &
                             + wrk(l + n * (9 - 1) + nn * (i - 1), ie) &
-                              * s(9,j,1,2,ie) 
+                              * s(9,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1099,7 +1099,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) &
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) &
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) &
-                      + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) 
+                      + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1107,7 +1107,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1120,11 +1120,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                          + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) &
                          + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) &
                          + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) &
-                         + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) 
+                         + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1147,12 +1147,12 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                             + wrk(l + n * (8 - 1) + nn * (i - 1), ie) &
                               * s(8,j,2,2,ie) &
                             + wrk(l + n * (9 - 1) + nn * (i - 1), ie) &
-                              * s(9,j,2,2,ie) 
+                              * s(9,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1165,11 +1165,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv)
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) &
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) &
                       + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) &
-                      + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) 
+                      + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl9
 
@@ -1184,7 +1184,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1196,11 +1196,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                          + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) &
                          + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) &
                          + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) &
-                         + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) 
+                         + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1221,12 +1221,12 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                             + wrk(l + n * (7 - 1) + nn * (i - 1), ie) &
                               * s(7,j,1,2,ie) &
                             + wrk(l + n * (8 - 1) + nn * (i - 1), ie) &
-                              * s(8,j,1,2,ie) 
+                              * s(8,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1238,7 +1238,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) &
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) &
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) &
-                      + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) 
+                      + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1246,7 +1246,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1258,11 +1258,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                          + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) &
                          + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) &
                          + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) &
-                         + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) 
+                         + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1283,12 +1283,12 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                             + wrk(l + n * (7 - 1) + nn * (i - 1), ie) &
                               * s(7,j,2,2,ie) &
                             + wrk(l + n * (8 - 1) + nn * (i - 1), ie) &
-                              * s(8,j,2,2,ie) 
+                              * s(8,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1300,11 +1300,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv)
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) &
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) &
                       + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) &
-                      + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) 
+                      + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl8
 
@@ -1319,7 +1319,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1330,11 +1330,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                          + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) &
                          + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) &
                          + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) &
-                         + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) 
+                         + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1353,12 +1353,12 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                             + wrk(l + n * (6 - 1) + nn * (i - 1), ie) &
                               * s(6,j,1,2,ie) &
                             + wrk(l + n * (7 - 1) + nn * (i - 1), ie) &
-                              * s(7,j,1,2,ie) 
+                              * s(7,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1369,7 +1369,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) &
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) &
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) &
-                      + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) 
+                      + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1377,7 +1377,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1388,11 +1388,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                          + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) &
                          + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) &
                          + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) &
-                         + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) 
+                         + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1411,12 +1411,12 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                             + wrk(l + n * (6 - 1) + nn * (i - 1), ie) &
                               * s(6,j,2,2,ie) &
                             + wrk(l + n * (7 - 1) + nn * (i - 1), ie) &
-                              * s(7,j,2,2,ie) 
+                              * s(7,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1427,11 +1427,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv)
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) &
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) &
                       + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) &
-                      + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) 
+                      + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl7
 
@@ -1446,7 +1446,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1456,11 +1456,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                          + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) &
                          + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) &
                          + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) &
-                         + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) 
+                         + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1477,12 +1477,12 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                             + wrk(l + n * (5 - 1) + nn * (i - 1), ie) &
                               * s(5,j,1,2,ie) &
                             + wrk(l + n * (6 - 1) + nn * (i - 1), ie) &
-                              * s(6,j,1,2,ie) 
+                              * s(6,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1492,7 +1492,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) &
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) &
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) &
-                      + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) 
+                      + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1500,7 +1500,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1510,11 +1510,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                          + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) &
                          + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) &
                          + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) &
-                         + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) 
+                         + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1531,12 +1531,12 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                             + wrk(l + n * (5 - 1) + nn * (i - 1), ie) &
                               * s(5,j,2,2,ie) &
                             + wrk(l + n * (6 - 1) + nn * (i - 1), ie) &
-                              * s(6,j,2,2,ie) 
+                              * s(6,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1546,11 +1546,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv)
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) &
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) &
                       + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) &
-                      + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) 
+                      + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl6
 
@@ -1565,7 +1565,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1574,11 +1574,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                          + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) &
                          + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) &
                          + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) &
-                         + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) 
+                         + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1593,12 +1593,12 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                             + wrk(l + n * (4 - 1) + nn * (i - 1), ie) &
                               * s(4,j,1,2,ie) &
                             + wrk(l + n * (5 - 1) + nn * (i - 1), ie) &
-                              * s(5,j,1,2,ie) 
+                              * s(5,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1607,7 +1607,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) &
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) &
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) &
-                      + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) 
+                      + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1615,7 +1615,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1624,11 +1624,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                          + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) &
                          + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) &
                          + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) &
-                         + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) 
+                         + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1643,12 +1643,12 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                             + wrk(l + n * (4 - 1) + nn * (i - 1), ie) &
                               * s(4,j,2,2,ie) &
                             + wrk(l + n * (5 - 1) + nn * (i - 1), ie) &
-                              * s(5,j,2,2,ie) 
+                              * s(5,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1657,11 +1657,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv)
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) &
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) &
                       + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) &
-                      + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) 
+                      + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl5
 
@@ -1676,7 +1676,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1684,11 +1684,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
              wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) &
                          + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) &
                          + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) &
-                         + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) 
+                         + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1701,12 +1701,12 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
                             + wrk(l + n * (3 - 1) + nn * (i - 1), ie) &
                               * s(3,j,1,2,ie) &
                             + wrk(l + n * (4 - 1) + nn * (i - 1), ie) &
-                              * s(4,j,1,2,ie) 
+                              * s(4,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1714,7 +1714,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) &
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) &
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) &
-                      + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) 
+                      + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1722,7 +1722,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
@@ -1730,11 +1730,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
              wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) &
                          + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) &
                          + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) &
-                         + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) 
+                         + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1747,12 +1747,12 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
                             + wrk(l + n * (3 - 1) + nn * (i - 1), ie) &
                               * s(3,j,2,2,ie) &
                             + wrk(l + n * (4 - 1) + nn * (i - 1), ie) &
-                              * s(4,j,2,2,ie) 
+                              * s(4,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
@@ -1760,11 +1760,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) &
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) &
                       + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) &
-                      + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) 
+                      + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl4
 
@@ -1779,18 +1779,18 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
              ii = i + n * (j - 1)
              wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) &
                          + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) &
-                         + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) 
+                         + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1801,19 +1801,19 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv)
                             + wrk(l + n * (2 - 1) + nn * (i - 1), ie) &
                               * s(2,j,1,2,ie) &
                             + wrk(l + n * (3 - 1) + nn * (i - 1), ie) &
-                              * s(3,j,1,2,ie) 
+                              * s(3,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
              jj = i + nn * (j - 1)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) &
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) &
-                      + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) 
+                      + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1821,18 +1821,18 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
              ii = i + n * (j - 1)
              wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) &
                          + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) &
-                         + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) 
+                         + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1843,23 +1843,23 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv)
                             + wrk(l + n * (2 - 1) + nn * (i - 1), ie) &
                               * s(2,j,2,2,ie) &
                             + wrk(l + n * (3 - 1) + nn * (i - 1), ie) &
-                              * s(3,j,2,2,ie) 
+                              * s(3,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
              jj = i + nn * (j - 1)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) &
                       + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) &
-                      + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) 
+                      + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl3
 
@@ -1874,17 +1874,17 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv)
     real(kind=rp), intent(inout) :: d(n**3, nelv)
     real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv)
     integer ::  ie, i, j, l, ii, jj
-       
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
              ii = i + n * (j - 1)
              wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) &
-                         + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) 
+                         + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1893,18 +1893,18 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv)
                 wrk2(ii,ie) = wrk(l + n * (1 - 1) + nn * (i - 1), ie) &
                               * s(1,j,1,2,ie) &
                             + wrk(l + n * (2 - 1) + nn * (i - 1), ie) &
-                              * s(2,j,1,2,ie) 
+                              * s(2,j,1,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
              jj = i + nn * (j - 1)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) &
-                      + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) 
+                      + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie)
           end do
        end do
     end do
@@ -1912,17 +1912,17 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv)
     do i = 1, nnn * nelv
        r(i,1) = d(i,1) * e(i,1)
     end do
-   
+
     do j = 1, nn
        do i = 1, n
           do ie = 1, nelv
              ii = i + n * (j - 1)
              wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) &
-                         + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) 
+                         + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, n
        do j = 1, n
           do l = 1, n
@@ -1931,23 +1931,23 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv)
                 wrk2(ii,ie) = wrk(l + n * (1 - 1) + nn * (i - 1), ie) &
                               * s(1,j,2,2,ie) &
                             + wrk(l + n * (2 - 1) + nn * (i - 1), ie) &
-                              * s(2,j,2,2,ie) 
+                              * s(2,j,2,2,ie)
              end do
           end do
        end do
     end do
-     
+
     do j = 1, n
        do i = 1, nn
           do ie = 1, nelv
              jj = i + nn * (j - 1)
              e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) &
-                      + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) 
+                      + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie)
           end do
        end do
     end do
-    
+
 
   end subroutine fdm_do_fast_sx_nl2
-    
+
 end module fdm_sx
diff --git a/src/math/bcknd/sx/opr_sx.f90 b/src/math/bcknd/sx/opr_sx.f90
index 075bce1bb84..dc5bd99e2b0 100644
--- a/src/math/bcknd/sx/opr_sx.f90
+++ b/src/math/bcknd/sx/opr_sx.f90
@@ -5,19 +5,19 @@ module opr_sx
   use sx_conv1
   use sx_cdtp
   use sx_cfl
+  use sx_lambda2
   use gather_scatter
   use num_types, only : rp
   use space, only : space_t
   use coefs, only : coef_t
   use math
-  use mesh, only : mesh_t
   use field, only : field_t
   use mathops
   implicit none
   private
 
   public :: opr_sx_dudxyz, opr_sx_opgrad, opr_sx_cdtp, opr_sx_conv1, &
-       opr_sx_curl, opr_sx_cfl
+       opr_sx_curl, opr_sx_cfl, opr_sx_lambda2
 
 contains
 
@@ -75,8 +75,8 @@ subroutine opr_sx_dudxyz(du, u, dr, ds, dt, coef)
 
   end subroutine opr_sx_dudxyz
 
-  subroutine opr_sx_opgrad(ux,uy,uz,u,coef) 
-    type(coef_t), intent(in) :: coef  
+  subroutine opr_sx_opgrad(ux,uy,uz,u,coef)
+    type(coef_t), intent(in) :: coef
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
@@ -286,7 +286,7 @@ subroutine opr_sx_cdtp(dtx,x,dr,ds,dt, coef)
 
   end subroutine opr_sx_cdtp
 
-  subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)  
+  subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
     type(space_t), intent(inout) :: Xh
     type(coef_t), intent(inout) :: coef
     integer, intent(in) :: nelv, gdim
@@ -308,7 +308,7 @@ subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
             coef%drdx, coef%dsdx, coef%dtdx, &
             coef%drdy, coef%dsdy, coef%dtdy, &
             coef%drdz, coef%dsdz, coef%dtdz, &
-            coef%jacinv, nelv, gdim)   
+            coef%jacinv, nelv, gdim)
     case(12)
        call sx_conv1_lx12(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, &
             coef%drdx, coef%dsdx, coef%dtdx, &
@@ -425,9 +425,9 @@ subroutine opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
     !!    BC dependent, Needs to change if cyclic
 
     call opcolv(w1%x,w2%x,w3%x,c_Xh%B, gdim, n)
-    call c_Xh%gs_h%op(w1, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w2, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w3, GS_OP_ADD) 
+    call c_Xh%gs_h%op(w1, GS_OP_ADD)
+    call c_Xh%gs_h%op(w2, GS_OP_ADD)
+    call c_Xh%gs_h%op(w3, GS_OP_ADD)
     call opcolv(w1%x,w2%x,w3%x,c_Xh%Binv, gdim, n)
 
   end subroutine opr_sx_curl
@@ -440,107 +440,245 @@ function opr_sx_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl)
     real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) ::  u, v, w
     real(kind=rp) :: cfl
 
-   select case(Xh%lx)
-   case (14)
-      cfl = sx_cfl_lx14(dt, u, v, w, &
+    select case(Xh%lx)
+    case (14)
+       cfl = sx_cfl_lx14(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (13)
-      cfl = sx_cfl_lx13(dt, u, v, w, &
+    case (13)
+       cfl = sx_cfl_lx13(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (12)
-      cfl = sx_cfl_lx12(dt, u, v, w, &
+    case (12)
+       cfl = sx_cfl_lx12(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (11)
-      cfl = sx_cfl_lx11(dt, u, v, w, &
+    case (11)
+       cfl = sx_cfl_lx11(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (10)
-      cfl = sx_cfl_lx10(dt, u, v, w, &
+    case (10)
+       cfl = sx_cfl_lx10(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (9)
-      cfl = sx_cfl_lx9(dt, u, v, w, &
+    case (9)
+       cfl = sx_cfl_lx9(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (8)
-      cfl = sx_cfl_lx8(dt, u, v, w, &
+    case (8)
+       cfl = sx_cfl_lx8(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (7)
-      cfl = sx_cfl_lx7(dt, u, v, w, &
+    case (7)
+       cfl = sx_cfl_lx7(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (6)
-      cfl = sx_cfl_lx6(dt, u, v, w, &
+    case (6)
+       cfl = sx_cfl_lx6(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (5)
-      cfl = sx_cfl_lx5(dt, u, v, w, &
+    case (5)
+       cfl = sx_cfl_lx5(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (4)
-      cfl = sx_cfl_lx4(dt, u, v, w, &
+    case (4)
+       cfl = sx_cfl_lx4(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (3)
-      cfl = sx_cfl_lx3(dt, u, v, w, &
+    case (3)
+       cfl = sx_cfl_lx3(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case (2)
-      cfl = sx_cfl_lx2(dt, u, v, w, &
+    case (2)
+       cfl = sx_cfl_lx2(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim)
-   case default
-      cfl = sx_cfl_lx(dt, u, v, w, &
+    case default
+       cfl = sx_cfl_lx(dt, u, v, w, &
            coef%drdx, coef%dsdx, coef%dtdx, &
            coef%drdy, coef%dsdy, coef%dtdy, &
            coef%drdz, coef%dsdz, coef%dtdz, &
            Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, &
            coef%jacinv, nelv, gdim, Xh%lx)
-   end select
+    end select
 
   end function opr_sx_cfl
 
+  subroutine opr_sx_lambda2(lambda2, u, v, w, coef)
+    type(coef_t), intent(in) :: coef
+    type(field_t), intent(inout) :: lambda2
+    type(field_t), intent(in) :: u, v, w
+
+    associate(Xh => coef%Xh, msh => coef%msh)
+      select case(Xh%lx)
+      case (18)
+         call sx_lambda2_lx18(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (17)
+         call sx_lambda2_lx17(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (16)
+         call sx_lambda2_lx16(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (15)
+         call sx_lambda2_lx15(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (14)
+         call sx_lambda2_lx14(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (13)
+         call sx_lambda2_lx13(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (12)
+         call sx_lambda2_lx12(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (11)
+         call sx_lambda2_lx11(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (10)
+         call sx_lambda2_lx10(lambda2%x, u%x, v%x, w%x, &
+                              Xh%dx, Xh%dy, Xh%dz, &
+                              coef%drdx, coef%dsdx, coef%dtdx, &
+                              coef%drdy, coef%dsdy, coef%dtdy, &
+                              coef%drdz, coef%dsdz, coef%dtdz, &
+                              Xh%w3, coef%B, msh%nelv)
+      case (9)
+         call sx_lambda2_lx9(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (8)
+         call sx_lambda2_lx8(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (7)
+         call sx_lambda2_lx7(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (6)
+         call sx_lambda2_lx6(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (5)
+         call sx_lambda2_lx5(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (4)
+         call sx_lambda2_lx4(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (3)
+         call sx_lambda2_lx3(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case (2)
+         call sx_lambda2_lx2(lambda2%x, u%x, v%x, w%x, &
+                             Xh%dx, Xh%dy, Xh%dz, &
+                             coef%drdx, coef%dsdx, coef%dtdx, &
+                             coef%drdy, coef%dsdy, coef%dtdy, &
+                             coef%drdz, coef%dsdz, coef%dtdz, &
+                             Xh%w3, coef%B, msh%nelv)
+      case default
+         call sx_lambda2_lx(lambda2%x, u%x, v%x, w%x, &
+                            Xh%dx, Xh%dy, Xh%dz, &
+                            coef%drdx, coef%dsdx, coef%dtdx, &
+                            coef%drdy, coef%dsdy, coef%dtdy, &
+                            coef%drdz, coef%dsdz, coef%dtdz, &
+                            Xh%w3, coef%B, msh%nelv, Xh%lx)
+      end select
+    end associate
+    
+  end subroutine opr_sx_lambda2  
+
 end module opr_sx
diff --git a/src/math/bcknd/sx/sx_cdtp.f90 b/src/math/bcknd/sx/sx_cdtp.f90
index b34208829f4..ab0e1d14853 100644
--- a/src/math/bcknd/sx/sx_cdtp.f90
+++ b/src/math/bcknd/sx/sx_cdtp.f90
@@ -48,7 +48,7 @@ subroutine sx_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd, lx)
     integer, intent(in) :: nel, nd, lx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -108,7 +108,7 @@ subroutine sx_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -162,13 +162,13 @@ subroutine sx_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     end do
 
   end subroutine sx_cdtp_lx14
-  
+
   subroutine sx_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, parameter :: lx = 13
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -222,13 +222,13 @@ subroutine sx_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     end do
 
   end subroutine sx_cdtp_lx13
-  
+
   subroutine sx_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, parameter :: lx = 12
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -288,7 +288,7 @@ subroutine sx_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -348,7 +348,7 @@ subroutine sx_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -408,7 +408,7 @@ subroutine sx_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -468,7 +468,7 @@ subroutine sx_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -528,7 +528,7 @@ subroutine sx_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -588,7 +588,7 @@ subroutine sx_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -648,7 +648,7 @@ subroutine sx_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -708,7 +708,7 @@ subroutine sx_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -768,7 +768,7 @@ subroutine sx_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
@@ -828,7 +828,7 @@ subroutine sx_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd)
     integer, intent(in) :: nel, nd
     real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx
     real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B
-    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)   
+    real(kind=rp), intent(in)  :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx)
     real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1
     real(kind=rp) :: tmp
     integer :: e, i, j, k, kk, jj
diff --git a/src/math/bcknd/sx/sx_cfl.f90 b/src/math/bcknd/sx/sx_cfl.f90
index 64518fc829b..7014497468e 100644
--- a/src/math/bcknd/sx/sx_cfl.f90
+++ b/src/math/bcknd/sx/sx_cfl.f90
@@ -53,7 +53,7 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
@@ -73,11 +73,11 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -86,7 +86,7 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     end do
 
   end function sx_cfl_lx
-  
+
   function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
        drdz, dsdz, dtdz, dr_inv, ds_inv, dt_inv, &
        jacinv,nelv, gdim) result(cfl)
@@ -98,13 +98,13 @@ function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -118,11 +118,11 @@ function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -143,13 +143,13 @@ function sx_cfl_lx13(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -163,11 +163,11 @@ function sx_cfl_lx13(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -188,13 +188,13 @@ function sx_cfl_lx12(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -208,11 +208,11 @@ function sx_cfl_lx12(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -233,13 +233,13 @@ function sx_cfl_lx11(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -253,11 +253,11 @@ function sx_cfl_lx11(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -278,13 +278,13 @@ function sx_cfl_lx10(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -298,11 +298,11 @@ function sx_cfl_lx10(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -323,13 +323,13 @@ function sx_cfl_lx9(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -343,11 +343,11 @@ function sx_cfl_lx9(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -368,13 +368,13 @@ function sx_cfl_lx8(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -388,11 +388,11 @@ function sx_cfl_lx8(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -413,13 +413,13 @@ function sx_cfl_lx7(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -433,11 +433,11 @@ function sx_cfl_lx7(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -458,13 +458,13 @@ function sx_cfl_lx6(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -478,11 +478,11 @@ function sx_cfl_lx6(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -503,13 +503,13 @@ function sx_cfl_lx5(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -523,11 +523,11 @@ function sx_cfl_lx5(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -548,13 +548,13 @@ function sx_cfl_lx4(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -568,11 +568,11 @@ function sx_cfl_lx4(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -593,13 +593,13 @@ function sx_cfl_lx3(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -613,11 +613,11 @@ function sx_cfl_lx3(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
@@ -638,13 +638,13 @@ function sx_cfl_lx2(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv
-    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv    
+    real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
     real(kind=rp) :: cflr, cfls, cflt, cflm
     real(kind=rp) :: ur, us, ut
     real(kind=rp) :: cfl
     integer :: i, j, k, e
     cfl = 0d0
-    
+
     do k = 1, lx
        do j = 1, lx
           do i = 1, lx
@@ -658,11 +658,11 @@ function sx_cfl_lx2(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, &
                 ut = ( u(i,j,k,e)*dtdx(i,j,k,e) &
                    +   v(i,j,k,e)*dtdy(i,j,k,e) &
                    +   w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e)
-                
+
                 cflr = abs(dt*ur*dr_inv(i))
                 cfls = abs(dt*us*ds_inv(j))
                 cflt = abs(dt*ut*dt_inv(k))
- 
+
                 cflm = cflr + cfls + cflt
                 cfl  = max(cfl,cflm)
              end do
diff --git a/src/math/bcknd/sx/sx_conv1.f90 b/src/math/bcknd/sx/sx_conv1.f90
index 084cd4ad6a2..95230bfe623 100644
--- a/src/math/bcknd/sx/sx_conv1.f90
+++ b/src/math/bcknd/sx/sx_conv1.f90
@@ -53,15 +53,15 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -73,7 +73,7 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -88,7 +88,7 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -115,9 +115,9 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx
-  
+
   subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -129,15 +129,15 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -149,7 +149,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -164,7 +164,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -191,7 +191,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx14
 
   subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
@@ -205,15 +205,15 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -225,7 +225,7 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -240,7 +240,7 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -267,9 +267,9 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx13
-  
+
   subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -281,15 +281,15 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -301,7 +301,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -316,7 +316,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -343,7 +343,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx12
 
   subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
@@ -357,15 +357,15 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -377,7 +377,7 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -392,7 +392,7 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -419,9 +419,9 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx11
-  
+
   subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -433,15 +433,15 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -453,7 +453,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -468,7 +468,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -495,7 +495,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx10
 
   subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
@@ -509,15 +509,15 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -529,7 +529,7 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -544,7 +544,7 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -571,9 +571,9 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx9
-  
+
   subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -585,15 +585,15 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -605,7 +605,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -620,7 +620,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -647,7 +647,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx8
 
   subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
@@ -661,15 +661,15 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -681,7 +681,7 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -696,7 +696,7 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -723,9 +723,9 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx7
-  
+
   subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -737,15 +737,15 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -757,7 +757,7 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -772,7 +772,7 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -799,9 +799,9 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx6
-  
+
   subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, &
        jacinv, nelv, gdim)
@@ -813,15 +813,15 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -833,7 +833,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -848,7 +848,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -875,7 +875,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx5
 
   subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
@@ -889,15 +889,15 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -909,7 +909,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -924,7 +924,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -951,7 +951,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx4
 
   subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
@@ -965,15 +965,15 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -985,7 +985,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1000,7 +1000,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
     do j=1, lx
        do i=1, lx
           do k=1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk=1, lx
@@ -1027,7 +1027,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx3
 
   subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
@@ -1041,15 +1041,15 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz
     real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv
-    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz        
+    real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr
     real(kind=rp), dimension(lx,lx,lx,nelv) :: duds
     real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt
     real(kind=rp) :: wr, ws, wt
-    integer :: e, i, j, k, jj, kk    
+    integer :: e, i, j, k, jj, kk
 
     do i = 1, lx
-       do jj = 1, lx * lx * nelv     
+       do jj = 1, lx * lx * nelv
           wr = 0d0
           do kk = 1, lx
              wr = wr + dx(i,kk)*u(kk,jj,1,1)
@@ -1061,7 +1061,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1076,7 +1076,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
     do j = 1, lx
        do i = 1, lx
           do k = 1, lx
-             do e = 1, nelv     
+             do e = 1, nelv
                 wt = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1103,7 +1103,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, &
             + dsdz(i,1,1,1)*duds(i,1,1,1) &
             + dtdz(i,1,1,1)*dudt(i,1,1,1)))
     end do
-    
+
   end subroutine sx_conv1_lx2
-  
+
 end module sx_conv1
diff --git a/src/math/bcknd/sx/sx_dudxyz.f90 b/src/math/bcknd/sx/sx_dudxyz.f90
index 787f831adea..ccdcedc868b 100644
--- a/src/math/bcknd/sx/sx_dudxyz.f90
+++ b/src/math/bcknd/sx/sx_dudxyz.f90
@@ -52,7 +52,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -70,7 +70,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -102,7 +102,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx)
     call addcol3(du, drst, dt, nd)
     call col2 (du, jacinv, nd)
   end subroutine sx_dudxyz_lx
-  
+
   subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     integer, parameter :: lx = 14
     integer, intent(in) :: nel, nd
@@ -112,7 +112,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -130,7 +130,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -162,7 +162,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     call addcol3(du, drst, dt, nd)
     call col2 (du, jacinv, nd)
   end subroutine sx_dudxyz_lx14
-  
+
   subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     integer, parameter :: lx = 13
     integer, intent(in) :: nel, nd
@@ -172,7 +172,7 @@ subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -190,7 +190,7 @@ subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -232,7 +232,7 @@ subroutine sx_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -250,7 +250,7 @@ subroutine sx_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -292,7 +292,7 @@ subroutine sx_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -310,7 +310,7 @@ subroutine sx_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -352,7 +352,7 @@ subroutine sx_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -370,7 +370,7 @@ subroutine sx_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -412,7 +412,7 @@ subroutine sx_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -430,7 +430,7 @@ subroutine sx_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -472,7 +472,7 @@ subroutine sx_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -490,7 +490,7 @@ subroutine sx_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -532,7 +532,7 @@ subroutine sx_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -550,7 +550,7 @@ subroutine sx_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -592,7 +592,7 @@ subroutine sx_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -610,7 +610,7 @@ subroutine sx_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -652,7 +652,7 @@ subroutine sx_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -670,7 +670,7 @@ subroutine sx_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -712,7 +712,7 @@ subroutine sx_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -730,7 +730,7 @@ subroutine sx_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -772,7 +772,7 @@ subroutine sx_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -790,7 +790,7 @@ subroutine sx_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -832,7 +832,7 @@ subroutine sx_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,nel) :: drst
     integer :: e, k
-    integer :: i, j, jj, kk 
+    integer :: i, j, jj, kk
     real(kind=rp) :: wr, ws, wt
 
     do i = 1, lx
@@ -850,7 +850,7 @@ subroutine sx_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd)
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, nel     
+             do e = 1, nel
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
diff --git a/src/math/bcknd/sx/sx_lambda2.f90 b/src/math/bcknd/sx/sx_lambda2.f90
new file mode 100644
index 00000000000..eb49aa00fec
--- /dev/null
+++ b/src/math/bcknd/sx/sx_lambda2.f90
@@ -0,0 +1,3282 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Lambda2 kernels for SX-Aurora
+module sx_lambda2
+  use num_types, only : rp
+  use math, only : pi
+  implicit none
+
+contains
+
+  subroutine sx_lambda2_lx(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n, lx)
+    integer, intent(in) :: n, lx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx
+
+  subroutine sx_lambda2_lx18(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 18
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx18
+
+  subroutine sx_lambda2_lx17(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 17
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx17
+
+  subroutine sx_lambda2_lx16(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 16
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx16
+
+  subroutine sx_lambda2_lx15(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 15
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx15
+
+  subroutine sx_lambda2_lx14(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 14
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx14
+
+  subroutine sx_lambda2_lx13(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 13
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx13
+
+  subroutine sx_lambda2_lx12(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 12
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx12
+
+  subroutine sx_lambda2_lx11(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 11
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx11
+
+  subroutine sx_lambda2_lx10(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 10
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx10
+
+  subroutine sx_lambda2_lx9(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 9
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx9
+
+  subroutine sx_lambda2_lx8(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 8
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx8
+
+  subroutine sx_lambda2_lx7(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 7
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx7
+
+  subroutine sx_lambda2_lx6(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 6
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx6
+
+  subroutine sx_lambda2_lx5(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 5
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx5
+
+  subroutine sx_lambda2_lx4(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 4
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx4
+
+  subroutine sx_lambda2_lx3(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 3
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx3
+
+  subroutine sx_lambda2_lx2(lambda2, u, v, w, dx, dy, dz, &
+       drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n)
+    integer, parameter :: lx = 2
+    integer, intent(in) :: n
+    real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u    
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w
+    real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
+    real(kind=rp) :: grad(lx*lx*lx,3,3)
+    integer :: temp_indices(9), ind_sort(3)
+    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
+    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
+    real(kind=rp) :: a11, a22, a33, a12, a13, a23
+    real(kind=rp) :: msk1, msk2, msk3
+    real(kind=rp) :: ur(lx,lx,lx)
+    real(kind=rp) :: vr(lx,lx,lx)
+    real(kind=rp) :: wr(lx,lx,lx)
+    real(kind=rp) :: us(lx,lx,lx)
+    real(kind=rp) :: vs(lx,lx,lx)
+    real(kind=rp) :: ws(lx,lx,lx)
+    real(kind=rp) :: ut(lx,lx,lx)
+    real(kind=rp) :: vt(lx,lx,lx)
+    real(kind=rp) :: wt(lx,lx,lx)
+    real(kind=rp) :: tmp1, tmp2, tmp3
+    integer :: e, i, j, k, l
+    
+    do e = 1, n
+       do j = 1, lx * lx
+          do i = 1, lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do k = 1, lx
+                tmp1 = tmp1 + dx(i,k) * u(k,j,1,e)
+                tmp2 = tmp2 + dx(i,k) * v(k,j,1,e)
+                tmp3 = tmp3 + dx(i,k) * w(k,j,1,e)
+             end do
+             ur(i,j,1) = tmp1
+             vr(i,j,1) = tmp2
+             wr(i,j,1) = tmp3
+          end do
+       end do
+
+       do k = 1, lx
+          do j = 1, lx
+             do i = 1, lx
+                tmp1 = 0.0_rp
+                tmp2 = 0.0_rp
+                tmp3 = 0.0_rp
+                do l = 1, lx
+                   tmp1 = tmp1 + dy(j,l) * u(i,l,k,e)
+                   tmp2 = tmp2 + dy(j,l) * v(i,l,k,e)
+                   tmp3 = tmp3 + dy(j,l) * w(i,l,k,e)
+                end do
+                us(i,j,k) = tmp1
+                vs(i,j,k) = tmp2
+                ws(i,j,k) = tmp3
+             end do
+          end do
+       end do
+
+       do k = 1, lx
+          do i = 1, lx*lx
+             tmp1 = 0.0_rp
+             tmp2 = 0.0_rp
+             tmp3 = 0.0_rp
+             do l = 1, lx
+                tmp1 = tmp1 + dz(k,l) * u(i,1,l,e)
+                tmp2 = tmp2 + dz(k,l) * v(i,1,l,e)
+                tmp3 = tmp3 + dz(k,l) * w(i,1,l,e)
+             end do
+             ut(i,1,k) = tmp1
+             vt(i,1,k) = tmp2
+             wt(i,1,k) = tmp3
+          end do
+       end do
+
+       do i = 1, lx * lx * lx
+          grad(1,1,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * ur(i,1,1) &
+                        + dsdx(i,1,1,e) * us(i,1,1) &
+                        + dtdx(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * us(i,1,1) &
+                        + drdy(i,1,1,e) * ur(i,1,1) &
+                        + dtdy(i,1,1,e) * ut(i,1,1) )
+          grad(1,1,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * ut(i,1,1) &
+                        + drdz(i,1,1,e) * ur(i,1,1) &
+                        + dsdz(i,1,1,e) * us(i,1,1) )
+
+          grad(1,2,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * vr(i,1,1) &
+                        + dsdx(i,1,1,e) * vs(i,1,1) &
+                        + dtdx(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * vs(i,1,1) &
+                        + drdy(i,1,1,e) * vr(i,1,1) &
+                        + dtdy(i,1,1,e) * vt(i,1,1) )
+          grad(1,2,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * vt(i,1,1) &
+                        + drdz(i,1,1,e) * vr(i,1,1) &
+                        + dsdz(i,1,1,e) * vs(i,1,1) )
+
+          grad(1,3,1) = w3(i,1,1) &
+                      * ( drdx(i,1,1,e) * wr(i,1,1) &
+                        + dsdx(i,1,1,e) * ws(i,1,1) &
+                        + dtdx(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,2) = w3(i,1,1) &
+                      * ( dsdy(i,1,1,e) * ws(i,1,1) &
+                        + drdy(i,1,1,e) * wr(i,1,1) &
+                        + dtdy(i,1,1,e) * wt(i,1,1) )
+          grad(1,3,3) = w3(i,1,1) &
+                      * ( dtdz(i,1,1,e) * wt(i,1,1) &
+                        + drdz(i,1,1,e) * wr(i,1,1) &
+                        + dsdz(i,1,1,e) * ws(i,1,1) )          
+       end do
+       
+
+       do i = 1, lx * lx * lx
+          s11 = grad(i,1,1)
+          s22 = grad(i,2,2)
+          s33 = grad(i,3,3)
+
+          
+          s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
+          s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
+          s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
+          
+          o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
+          o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
+          o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
+
+          a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
+          a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
+          a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
+
+          a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
+          a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
+          a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
+
+
+          B = -(a11 + a22 + a33)
+          C = -(a12*a12 + a13*a13 + a23*a23 &
+              - a11 * a22 - a11 * a33 - a22 * a33)
+          D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
+              - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
+
+
+          q = (3.0 * C - B*B) / 9.0
+          r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
+          theta = acos( r / sqrt(-q*q*q) )
+          
+          eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
+          eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
+          eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
+
+          msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) &
+                       .and. eigen(1) .le. eigen(3) .or.  eigen(3) &
+                       .le. eigen(1) .and. eigen(1) .le. eigen(2) )
+          msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) &
+                       .and. eigen(2) .le. eigen(3) .or. eigen(3) &
+                       .le. eigen(2) .and. eigen(2) .le. eigen(1))
+          msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) &
+                       .and. eigen(3) .le. eigen(2) .or. eigen(2) &
+                       .le. eigen(3) .and. eigen(3) .le. eigen(1))
+
+          l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3)
+
+          lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2)
+       end do
+    end do
+  end subroutine sx_lambda2_lx2
+  
+end module sx_lambda2
+
+
diff --git a/src/math/bcknd/sx/sx_opgrad.f90 b/src/math/bcknd/sx/sx_opgrad.f90
index 94a4f17f941..d08e1ce7896 100644
--- a/src/math/bcknd/sx/sx_opgrad.f90
+++ b/src/math/bcknd/sx/sx_opgrad.f90
@@ -45,8 +45,8 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -62,11 +62,11 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -92,7 +92,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -104,7 +104,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -112,7 +112,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx
-  
+
   subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 18
@@ -122,8 +122,8 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -139,11 +139,11 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -169,7 +169,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -181,7 +181,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -189,7 +189,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx18
-  
+
   subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 17
@@ -199,8 +199,8 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -216,11 +216,11 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -246,7 +246,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -258,7 +258,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -266,7 +266,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx17
-  
+
   subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 16
@@ -276,8 +276,8 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -293,11 +293,11 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -323,7 +323,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -335,7 +335,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -343,7 +343,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx16
-  
+
   subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 15
@@ -353,8 +353,8 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -370,11 +370,11 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -400,7 +400,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -412,7 +412,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -420,7 +420,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx15
-  
+
   subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 14
@@ -430,8 +430,8 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -447,11 +447,11 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -477,7 +477,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -489,7 +489,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -497,7 +497,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx14
-  
+
   subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 13
@@ -507,8 +507,8 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -524,11 +524,11 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -554,7 +554,7 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -566,7 +566,7 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -584,8 +584,8 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -601,11 +601,11 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1,lx
@@ -631,7 +631,7 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -643,7 +643,7 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -661,8 +661,8 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -678,11 +678,11 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -708,7 +708,7 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -720,7 +720,7 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -738,8 +738,8 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -755,11 +755,11 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -785,7 +785,7 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -797,7 +797,7 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -815,8 +815,8 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -832,11 +832,11 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -862,7 +862,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -874,7 +874,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -882,7 +882,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx9
-  
+
   subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 8
@@ -892,8 +892,8 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -909,11 +909,11 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -939,7 +939,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -951,7 +951,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -960,7 +960,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, &
     end do
   end subroutine sx_opgrad_lx8
 
-    subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
+  subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 7
     integer, intent(in) :: n
@@ -969,8 +969,8 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -986,11 +986,11 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1016,7 +1016,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1028,7 +1028,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1036,7 +1036,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx7
-  
+
   subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 6
@@ -1046,8 +1046,8 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -1063,11 +1063,11 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1093,7 +1093,7 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1105,7 +1105,7 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1123,8 +1123,8 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -1140,11 +1140,11 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1170,7 +1170,7 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1182,7 +1182,7 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1200,8 +1200,8 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -1217,11 +1217,11 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1247,7 +1247,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1259,7 +1259,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1267,7 +1267,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx4
-  
+
   subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
        drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n)
     integer, parameter :: lx = 3
@@ -1277,8 +1277,8 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -1294,11 +1294,11 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1324,7 +1324,7 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1336,7 +1336,7 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1354,8 +1354,8 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
     real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx
     real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy
-    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz    
-    real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx)
+    real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz
+    real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3
     real(kind=rp) :: ur(lx,lx,lx, n)
     real(kind=rp) :: us(lx,lx,lx, n)
     real(kind=rp) :: ut(lx,lx,lx, n)
@@ -1371,11 +1371,11 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
           ur(i,jj,1,1) = wr
        end do
     end do
- 
+
     do k = 1, lx
        do i = 1, lx
           do j = 1, lx
-             do e = 1, n  
+             do e = 1, n
                 ws = 0d0
                 !NEC$ unroll_completely
                 do kk = 1, lx
@@ -1401,7 +1401,7 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
           end do
        end do
     end do
-    
+
     do i = 1, lx * lx * lx
        do e = 1, n
           ux(i,1,1,e) = w3(i,1,1) * &
@@ -1413,7 +1413,7 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
                ( dsdy(i,1,1,e) * us(i,1,1,e) &
                + drdy(i,1,1,e) * ur(i,1,1,e) &
                + dtdy(i,1,1,e) * ut(i,1,1,e) )
-          
+
           uz(i,1,1,e) = w3(i,1,1) * &
                ( dtdz(i,1,1,e) * ut(i,1,1,e) &
                + drdz(i,1,1,e) * ur(i,1,1,e) &
@@ -1421,5 +1421,5 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, &
        end do
     end do
   end subroutine sx_opgrad_lx2
-  
+
 end module sx_opgrad
diff --git a/src/math/bcknd/sx/tensor_sx.f90 b/src/math/bcknd/sx/tensor_sx.f90
index 94a005693bd..beca41e214a 100644
--- a/src/math/bcknd/sx/tensor_sx.f90
+++ b/src/math/bcknd/sx/tensor_sx.f90
@@ -17,9 +17,9 @@ subroutine tnsr2d_el_sx(v, nv, u, nu, A, Bt)
 
     call mxm(A, nv, u, nu, work, nu)
     call mxm(work, nv, Bt, nu, v, nv)
-    
+
   end subroutine tnsr2d_el_sx
-  
+
   subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
     integer, intent(in) :: nv, nu
     real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu)
@@ -29,7 +29,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
     integer :: i, j, k, l, nunu, nvnu, nvnv
     integer :: ii, jj
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
 
     do j = 1, nunu
@@ -42,7 +42,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
           work(ii) = tmp
        end do
     end do
-    
+
     do i = 1, nu
        do j = 1, nv
           do l = 1, nv
@@ -56,7 +56,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
           end do
        end do
     end do
-     
+
     do j = 1, nv
        do i = 1, nvnv
           jj = i + nvnv * (j - 1)
@@ -68,7 +68,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct)
           v(jj) = tmp
        end do
     end do
-    
+
   end subroutine tnsr3d_el_sx
 
   subroutine tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
@@ -95,7 +95,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
     integer :: nunu, nvnu, nvnv
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
 
     do ie = 1, nelv
@@ -109,7 +109,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
              work(ii) = tmp
           end do
        end do
-          
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -123,7 +123,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv)
              end do
           end do
        end do
-       
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1)
@@ -150,18 +150,18 @@ subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
     real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv)
     real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv), tmp
     integer :: ie, i, j, k, l, ii, jj
-    
+
 
     do j = 1, nunu
        do i = 1, nv
           do ie = 1, nelv
              ii = i + nv * (j - 1)
              work(ii, ie) = A(i,1) * u(1 + nu * (j - 1), ie) &
-                          + A(i,2) * u(2 + nu * (j - 1), ie)             
+                          + A(i,2) * u(2 + nu * (j - 1), ie)
           end do
        end do
     end do
-    
+
     do i = 1, nu
        do j = 1, nv
           do l = 1, nv
@@ -178,13 +178,13 @@ subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
     do j = 1, nv
        do i = 1, nvnv
           do ie = 1, nelv
              jj = i + nvnv * (j - 1)
              v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) &
-                       + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) 
+                       + work2(i + nvnv * (2 - 1),ie) * Ct(2, j)
           end do
        end do
     end do
@@ -203,15 +203,15 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
 
     nvnu = nv * nu
     nvnv = nv * nv
-    
+
     do j = 1, nunu
        do i = 1, nv
-          do ie = 1, nelv                 
+          do ie = 1, nelv
              ii = i + nv * (j - 1)
              work(ii, ie) = A(i,1) * u(1 + nu * (j - 1), ie) &
                           + A(i,2) * u(2 + nu * (j - 1), ie) &
                           + A(i,3) * u(3 + nu * (j - 1), ie) &
-                          + A(i,4) * u(4 + nu * (j - 1), ie)             
+                          + A(i,4) * u(4 + nu * (j - 1), ie)
           end do
        end do
     end do
@@ -219,7 +219,7 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
     do i = 1, nu
        do j = 1, nv
           do l = 1, nv
-             do ie = 1, nelv                
+             do ie = 1, nelv
                 ii = l + nv * (j - 1) + nvnv * (i - 1)
                 tmp = 0.0_rp
                 !NEC$ unroll_completely
@@ -232,15 +232,15 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
     do j = 1, nv
        do i = 1, nvnv
-          do ie = 1, nelv                 
+          do ie = 1, nelv
              jj = i + nvnv * (j - 1)
              v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) &
                        + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) &
                        + work2(i + nvnv * (3 - 1),ie) * Ct(3, j) &
-                       + work2(i + nvnv * (4 - 1),ie) * Ct(4, j) 
+                       + work2(i + nvnv * (4 - 1),ie) * Ct(4, j)
           end do
        end do
     end do
@@ -258,7 +258,7 @@ subroutine tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv)
     else
        call tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
     end if
-    
+
   end subroutine tnsr1_3d_sx
 
   subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
@@ -272,26 +272,26 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
     real(kind=rp) :: tmp
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
-    
+
     e0 = 1
     es = 1
     ee = nelv
-    
+
     if (nv.gt.nu) then
        e0 = nelv
        es = -1
        ee = 1
     endif
-    
+
     nu3 = nu**3
     nv3 = nv**3
-    
+
     do e = e0,ee,es
        iu = (e-1)*nu3
        iv = (e-1)*nv3
-          
+
        do j = 1, nunu
           do i = 1, nv
              ii = i + nv * (j - 1)
@@ -303,7 +303,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
              work(ii) = tmp
           end do
        end do
-       
+
        do i = 1, nu
           do j = 1, nv
              do l = 1, nv
@@ -317,7 +317,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
              end do
           end do
        end do
-          
+
        do j = 1, nv
           do i = 1, nvnv
              jj = i + nvnv * (j - 1) + iv
@@ -330,7 +330,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
   end subroutine tnsr1_3d_nvnu_sx
 
   subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
@@ -361,14 +361,14 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
           end do
        end do
     end do
-    
+
     do i = 1, nu
        do j = 1, nv
           do l = 1, nv
              do ie = 1, nelv
                 ii = l + nv * (j - 1) + nvnv * (i - 1)
                 tmp = 0.0_rp
-                !NEC$ unroll_completely                
+                !NEC$ unroll_completely
                 do k = 1, nu
                    jj = l + nv * (k - 1) + nvnu * (i - 1)
                    tmp = tmp + work(jj,ie) * Bt(k,j)
@@ -378,7 +378,7 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
           end do
        end do
     end do
-          
+
     do j = 1, nv
        do i = 1, nvnv
           do ie = 1, nelv
@@ -387,11 +387,11 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv)
              v(jj) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) &
                    + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) &
                    + work2(i + nvnv * (3 - 1),ie) * Ct(3, j) &
-                   + work2(i + nvnv * (4 - 1),ie) * Ct(4, j)                
+                   + work2(i + nvnv * (4 - 1),ie) * Ct(4, j)
           end do
        end do
     end do
-    
+
   end subroutine tnsr1_3d_nu4nv2_sx
-  
+
 end module tensor_sx
diff --git a/src/math/bcknd/xsmm/ax_helm_xsmm.F90 b/src/math/bcknd/xsmm/ax_helm_xsmm.F90
index 198ab6e1a47..38b39d2599e 100644
--- a/src/math/bcknd/xsmm/ax_helm_xsmm.F90
+++ b/src/math/bcknd/xsmm/ax_helm_xsmm.F90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 module ax_helm_xsmm
@@ -62,7 +62,6 @@ module ax_helm_xsmm
   use num_types, only : rp
   use coefs, only : coef_t
   use space, only : space_t
-  use field, only : field_t
   use mesh, only : mesh_t
   use mxm_wrapper
   use num_types
@@ -71,21 +70,21 @@ module ax_helm_xsmm
 #endif
   implicit none
   private
-  
+
   type, public, extends(ax_t) :: ax_helm_xsmm_t
    contains
      procedure, nopass :: compute => ax_helm_xsmm_compute
   end type ax_helm_xsmm_t
 
-contains 
-  
+contains
+
   subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
     type(mesh_t), intent(inout) :: msh
     type(space_t), intent(inout) :: Xh
     type(coef_t), intent(inout) :: coef
     real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
     real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv)
-  
+
     real(kind=rp) :: dudr(Xh%lx,Xh%ly,Xh%lz)
     real(kind=rp) :: duds(Xh%lx,Xh%ly,Xh%lz)
     real(kind=rp) :: dudt(Xh%lx,Xh%ly,Xh%lz)
@@ -119,56 +118,56 @@ subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh)
        ax_helm_xsmm_lx = Xh%lx
     end if
 
-  
+
     do e = 1, msh%nelv
        if(msh%gdim .eq. 2) then
-         call mxm(Xh%dx, Xh%lx,u(1,1,1,e), Xh%lx, dudr, lyz)
-         call mxm(u(1,1,1,e), Xh%lx, Xh%dyt, Xh%ly, duds, Xh%ly)
-         call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
-         call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
-         if (msh%dfrmd_el(e)) then
-            call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
-            call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
-         end if
-         call col2(tmp1, coef%h1(1,1,1,e), lxyz)
-         call col2(tmp2, coef%h1(1,1,1,e), lxyz)
-         call mxm(Xh%dxt, Xh%lx, tmp1, Xh%lx, tm1, lyz)
-         call mxm(tmp2, Xh%lx, Xh%dy, Xh%ly, tm2, Xh%ly)
-         call add3(w(1,1,1,e), tm1, tm2, lxyz)
+          call mxm(Xh%dx, Xh%lx,u(1,1,1,e), Xh%lx, dudr, lyz)
+          call mxm(u(1,1,1,e), Xh%lx, Xh%dyt, Xh%ly, duds, Xh%ly)
+          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
+          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
+          if (msh%dfrmd_el(e)) then
+             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
+             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
+          end if
+          call col2(tmp1, coef%h1(1,1,1,e), lxyz)
+          call col2(tmp2, coef%h1(1,1,1,e), lxyz)
+          call mxm(Xh%dxt, Xh%lx, tmp1, Xh%lx, tm1, lyz)
+          call mxm(tmp2, Xh%lx, Xh%dy, Xh%ly, tm2, Xh%ly)
+          call add3(w(1,1,1,e), tm1, tm2, lxyz)
 
-       ! 3D evaluation!
-      else
-         call libxsmm_mmcall(ax_helm_xmm1, Xh%dx, u(1,1,1,e), dudr)
-         do k = 1,Xh%lz
-            call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), Xh%dyt, duds(1,1,k))
-         end do
-         call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), Xh%dzt, dudt)
-         call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
-         call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
-         call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
-         if (msh%dfrmd_el(e)) then
-            call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
-            call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
-            call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
-            call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
-            call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
-            call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
-         end if
-         call col2(tmp1, coef%h1(1,1,1,e), lxyz)
-         call col2(tmp2, coef%h1(1,1,1,e), lxyz)
-         call col2(tmp3, coef%h1(1,1,1,e), lxyz)
-         call libxsmm_mmcall(ax_helm_xmm1, Xh%dxt, tmp1, tm1)
-         do k = 1,Xh%lz
-            call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), Xh%dy, tm2(1,1,k))
-         end do
-         call libxsmm_mmcall(ax_helm_xmm3, tmp3, Xh%dz, tm3)
-         call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
-       end if 
+          ! 3D evaluation!
+       else
+          call libxsmm_mmcall(ax_helm_xmm1, Xh%dx, u(1,1,1,e), dudr)
+          do k = 1,Xh%lz
+             call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), Xh%dyt, duds(1,1,k))
+          end do
+          call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), Xh%dzt, dudt)
+          call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz)
+          call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz)
+          call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz)
+          if (msh%dfrmd_el(e)) then
+             call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz)
+             call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz)
+             call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz)
+             call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz)
+             call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz)
+             call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz)
+          end if
+          call col2(tmp1, coef%h1(1,1,1,e), lxyz)
+          call col2(tmp2, coef%h1(1,1,1,e), lxyz)
+          call col2(tmp3, coef%h1(1,1,1,e), lxyz)
+          call libxsmm_mmcall(ax_helm_xmm1, Xh%dxt, tmp1, tm1)
+          do k = 1,Xh%lz
+             call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), Xh%dy, tm2(1,1,k))
+          end do
+          call libxsmm_mmcall(ax_helm_xmm3, tmp3, Xh%dz, tm3)
+          call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz)
+       end if
     end do
-    
+
     if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs)
 #endif
 
   end subroutine ax_helm_xsmm_compute
-  
+
 end module ax_helm_xsmm
diff --git a/src/math/bcknd/xsmm/fdm_xsmm.f90 b/src/math/bcknd/xsmm/fdm_xsmm.f90
index dfe6bbe0c04..74a3e36075b 100644
--- a/src/math/bcknd/xsmm/fdm_xsmm.f90
+++ b/src/math/bcknd/xsmm/fdm_xsmm.f90
@@ -5,13 +5,13 @@ module fdm_xsmm
   implicit none
 
 contains
-  
+
   subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
     integer, intent(in) :: nl, nelv, ldim
     real(kind=rp), intent(inout) :: e(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: r(nl**ldim, nelv)
     real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv)
-    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)    
+    real(kind=rp), intent(inout) :: d(nl**ldim, nelv)
     integer ::  ie, nn, i
 
     nn = nl**ldim
@@ -35,5 +35,5 @@ subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv)
        end do
     end if
   end subroutine fdm_do_fast_xsmm
-  
+
 end module fdm_xsmm
diff --git a/src/math/bcknd/xsmm/opr_xsmm.F90 b/src/math/bcknd/xsmm/opr_xsmm.F90
index 4eaefc8d873..c2e00a061ea 100644
--- a/src/math/bcknd/xsmm/opr_xsmm.F90
+++ b/src/math/bcknd/xsmm/opr_xsmm.F90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Operators libxsmm backend
@@ -77,11 +77,11 @@ module opr_xsmm
   public :: opr_xsmm_dudxyz, opr_xsmm_opgrad, opr_xsmm_cdtp, opr_xsmm_conv1, opr_xsmm_curl
 
 #ifdef HAVE_LIBXSMM
-    type(libxsmm_dmmfunction), private :: lgrad_xmm1
-    type(libxsmm_dmmfunction), private :: lgrad_xmm2
-    type(libxsmm_dmmfunction), private :: lgrad_xmm3
+  type(libxsmm_dmmfunction), private :: lgrad_xmm1
+  type(libxsmm_dmmfunction), private :: lgrad_xmm2
+  type(libxsmm_dmmfunction), private :: lgrad_xmm3
 #endif
-    
+
 contains
 
   subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
@@ -89,7 +89,7 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
     real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), intent(inout) ::  du
     real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), intent(in) ::  u, dr, ds, dt
     real(kind=rp) :: drst(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz)
-    type(space_t), pointer :: Xh 
+    type(space_t), pointer :: Xh
     type(mesh_t), pointer :: msh
     integer :: e, k, lxy, lyz, lxyz
 #ifdef HAVE_LIBXSMM
@@ -97,9 +97,9 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
     type(libxsmm_dmmfunction), save :: dudxyz_xmm2
     type(libxsmm_dmmfunction), save :: dudxyz_xmm3
     logical, save :: dudxyz_xsmm_init = .false.
-  
+
     Xh => coef%Xh
-    msh => coef%msh 
+    msh => coef%msh
     lxy  = Xh%lx*Xh%ly
     lyz  = Xh%ly*Xh%lz
     lxyz = Xh%lx*Xh%ly*Xh%lz
@@ -133,11 +133,11 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
     end do
     call col2(du, coef%jacinv, coef%dof%n_dofs)
 
-#endif    
+#endif
   end subroutine opr_xsmm_dudxyz
 
-  subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef) 
-    type(coef_t), intent(in) :: coef  
+  subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef)
+    type(coef_t), intent(in) :: coef
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
@@ -215,7 +215,7 @@ subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt)
   end subroutine local_grad3_xsmm
 
   subroutine local_grad2(ur, us, u, n, D, Dt)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), intent(inout) :: ur(0:n, 0:n)
     real(kind=rp), intent(inout) :: us(0:n, 0:n)
     real(kind=rp), intent(in) :: u(0:n, 0:n)
@@ -284,7 +284,7 @@ subroutine opr_xsmm_cdtp(dtx,x,dr,ds,dt, coef)
 #endif
   end subroutine opr_xsmm_cdtp
 
-  subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)  
+  subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
     type(space_t), intent(in) :: Xh
     type(coef_t), intent(in) :: coef
     integer, intent(in) :: nelv, gdim
@@ -301,7 +301,7 @@ subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
     type(libxsmm_dmmfunction), save :: conv1_xmm2
     type(libxsmm_dmmfunction), save :: conv1_xmm3
     logical, save :: conv1_xsmm_init = .false.
-    
+
     if (.not. conv1_xsmm_init) then
        call libxsmm_dispatch(conv1_xmm1, Xh%lx, Xh%ly*Xh%lx, Xh%lx, &
             alpha=1d0, beta=0d0, prefetch=LIBXSMM_PREFETCH_AUTO)
@@ -352,7 +352,7 @@ subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim)
     end do
 
 #endif
-    
+
   end subroutine opr_xsmm_conv1
 
   subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
@@ -395,9 +395,9 @@ subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
     !!    BC dependent, Needs to change if cyclic
 
     call opcolv(w1%x,w2%x,w3%x,c_Xh%B, gdim, n)
-    call c_Xh%gs_h%op(w1, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w2, GS_OP_ADD) 
-    call c_Xh%gs_h%op(w3, GS_OP_ADD) 
+    call c_Xh%gs_h%op(w1, GS_OP_ADD)
+    call c_Xh%gs_h%op(w2, GS_OP_ADD)
+    call c_Xh%gs_h%op(w3, GS_OP_ADD)
     call opcolv(w1%x, w2%x, w3%x, c_Xh%Binv, gdim, n)
 
   end subroutine opr_xsmm_curl
diff --git a/src/math/bcknd/xsmm/tensor_xsmm.F90 b/src/math/bcknd/xsmm/tensor_xsmm.F90
index 88d3e8341c4..3ac6f0af5c0 100644
--- a/src/math/bcknd/xsmm/tensor_xsmm.F90
+++ b/src/math/bcknd/xsmm/tensor_xsmm.F90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Tensor operations libxsmm backend
@@ -65,9 +65,9 @@ module tensor_xsmm
   private
 
   public :: tnsr2d_el_xsmm, tnsr3d_el_xsmm, tnsr3d_xsmm, tnsr1_3d_xsmm
-  
+
 contains
-  
+
   subroutine tnsr2d_el_xsmm(v, nv, u, nu, A, Bt)
     integer, intent(in) :: nv, nu
     real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu)
@@ -76,7 +76,7 @@ subroutine tnsr2d_el_xsmm(v, nv, u, nu, A, Bt)
 
     call mxm(A, nv, u, nu, work, nu)
     call mxm(work, nv, Bt, nu, v, nv)
-    
+
   end subroutine tnsr2d_el_xsmm
 
   subroutine tnsr3d_el_xsmm(v, nv, u, nu, A, Bt, Ct)
@@ -87,17 +87,17 @@ subroutine tnsr3d_el_xsmm(v, nv, u, nu, A, Bt, Ct)
     integer :: i, nunu, nvnu, nvnv
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
-    
+
     call mxm(A, nv, u(1), nu ,work, nunu)
     do i = 0,nu-1
        call mxm(work(nvnu*i), nv, Bt, nu, work2(nv*nv*i), nv)
     end do
     call mxm(work2, nvnv, Ct, nu, v(1), nv)
-    
+
   end subroutine tnsr3d_el_xsmm
- 
+
   subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv)
     integer, intent(inout) :: nv, nu, nelv
     real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
@@ -106,9 +106,9 @@ subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv)
     integer :: ie, i, nunu, nvnu, nvnv
 
     nvnu = nv * nu
-    nunu = nu * nu 
+    nunu = nu * nu
     nvnv = nv * nv
-    
+
     do ie = 1,nelv
        call mxm(A, nv, u(1,ie), nu, work, nunu)
        do i = 0,nu-1
@@ -116,10 +116,10 @@ subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv)
        end do
        call mxm(work2, nvnv, Ct, nu, v(1,ie), nv)
     end do
-    
+
   end subroutine tnsr3d_xsmm
 
-  subroutine tnsr1_3d_xsmm(v, nv, nu, A, Bt, Ct, nelv) 
+  subroutine tnsr1_3d_xsmm(v, nv, nu, A, Bt, Ct, nelv)
     integer, intent(in) :: nv, nu, nelv
     real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv)
     real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv)
diff --git a/src/math/fast3d.f90 b/src/math/fast3d.f90
index 09c3bdc44d6..45b7fa38e29 100644
--- a/src/math/fast3d.f90
+++ b/src/math/fast3d.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Fast diagonalization methods from NEKTON
@@ -78,7 +78,7 @@ module fast3d
   !!
   !! Given gridpoints \f$ x_0, x_1, \dots x_n \f$ and some point \f$\xi\f$
   !! (not necessarily a grid point!) find weights \f$ c_{j, k} \f$, such that
-  !! the expansions 
+  !! the expansions
   !! \f$ \frac{d^k f}{d x^k}|_{x=\xi} \approx \sum_{j=0}^n c_{j,k} f(x_j)\f$,
   !! \f$k=0, \dots m\f$ are optimal.
   !! Note that finite-difference stencils are exactly such type of expansions.
@@ -93,7 +93,7 @@ module fast3d
   !!
   !! @warning The calculation of the wieghts is numerically stable.
   !! But applying the weights to a function can be ill-conditioned in the case
-  !! of high-order derivatives. 
+  !! of high-order derivatives.
   !!
   !! @param xi Point at which the approximations are to be accurate
   !! @param x The coordinates for the grid points
@@ -121,29 +121,29 @@ subroutine fd_weights_full(xi, x, n, m, c)
 
     c(0,0) = 1d0
 
-    do i = 1, n                                                              
+    do i = 1, n
        mn = min(i,m)
-       c2 = 1d0  
-       c5 = c4                                                       
+       c2 = 1d0
+       c5 = c4
        c4 = x(i) - xi
-       do j = 0, i - 1                                                  
+       do j = 0, i - 1
           c3 = x(i) - x(j)
-          c2 = c2 * c3                                                    
-          do k = mn, 1, -1                    
+          c2 = c2 * c3
+          do k = mn, 1, -1
              c(i,k) = c1 * (k * c(i-1,k-1) - c5 * c(i-1,k)) / c2
           end do
           c(i,0) = -c1 * c5 * c(i-1,0) / c2
-          do k = mn, 1, -1                           
+          do k = mn, 1, -1
              c(j,k) = (c4 * c(j,k) - k * c(j,k-1)) / c3
           end do
           c(j,0) = c4 * c(j,0) / c3
        end do
        c1 = c2
     end do
-    
+
   end subroutine fd_weights_full
-     
-  
+
+
 !>  Generate matrices for single element, 1D operators:
 !!        a    = Laplacian
 !!        b    = diagonal mass matrix
@@ -200,12 +200,12 @@ subroutine semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
     end do
     call rzero(a, np*np)
     do j = 0,n
-    do i = 0,n
-       do k = 0,n
-          a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j)
+       do i = 0,n
+          do k = 0,n
+             a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j)
+          end do
+          c(i,j) = b(i)*d(i,j)
        end do
-       c(i,j) = b(i)*d(i,j)
-    end do
     end do
     call zwgl(zgl, bgl, nm)
     do i = 1,n-1
@@ -219,7 +219,7 @@ end subroutine semhat
 
   !> Compute interpolation weights for points `z_to` using values at points
   !! `z_from`.
-  !! @details 
+  !! @details
   !! This is essentially a wrapper for calling fd_weights_full() for several
   !! points. For each point in `z_to`, we get a set of interpolation weights of
   !! size `n_from`.
@@ -227,7 +227,7 @@ end subroutine semhat
   !! in `z_to` and each column the weight of a point in `z_from`.
   !!
   !! This routine is used for interpolating between elements of different
-  !! polynomial order. In other words, belonging to different 
+  !! polynomial order. In other words, belonging to different
   !! \ref space::space_t . The points are then GL, GLL, etc., depending on the
   !! space.
   !!
diff --git a/src/math/fdm.f90 b/src/math/fdm.f90
index 2f70dc4784b..3e7ee9b4e83 100644
--- a/src/math/fdm.f90
+++ b/src/math/fdm.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Type for the Fast Diagonalization connected with the schwarz overlapping solves.
@@ -69,7 +69,7 @@ module fdm
   use gather_scatter
   use fast3d
   use tensor
-  use fdm_sx    
+  use fdm_sx
   use fdm_xsmm
   use fdm_cpu
   use fdm_device
@@ -94,7 +94,7 @@ module fdm
      type(dofmap_t), pointer :: dof
      type(gs_t), pointer :: gs_h
      type(mesh_t), pointer :: msh
-   contains 
+   contains
      procedure, pass(this) :: init => fdm_init
      procedure, pass(this) :: free => fdm_free
      procedure, pass(this) :: compute => fdm_compute
@@ -105,7 +105,7 @@ module fdm
   end interface sygv
 
 contains
-  
+
   subroutine fdm_init(this, Xh, dm, gs_h)
     class(fdm_t), intent(inout) :: this
     type(space_t), target, intent(inout) :: Xh
@@ -119,7 +119,7 @@ subroutine fdm_init(this, Xh, dm, gs_h)
     n = Xh%lx -1 !Polynomnial degree
     nl = Xh%lx + 2 !Schwarz!
     nelv = dm%msh%nelv
-    call fdm_free(this) 
+    call fdm_free(this)
     allocate(this%s(nl*nl,2,dm%msh%gdim, dm%msh%nelv))
     allocate(this%d(nl**3,dm%msh%nelv))
     allocate(this%swplen(Xh%lx, Xh%lx, Xh%lx,dm%msh%nelv))
@@ -130,11 +130,11 @@ subroutine fdm_init(this, Xh, dm, gs_h)
     ! Zeroing here enables easier debugging since then
     ! MPI messages in GS are deterministic
     call rzero(this%swplen, Xh%lxyz * dm%msh%nelv)
- 
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv) 
-       call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv) 
-       call device_map(this%swplen,this%swplen_d, Xh%lxyz*dm%msh%nelv) 
+       call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv)
+       call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv)
+       call device_map(this%swplen,this%swplen_d, Xh%lxyz*dm%msh%nelv)
     end if
 
     call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh)
@@ -149,11 +149,11 @@ subroutine fdm_init(this, Xh, dm, gs_h)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_memcpy(this%s, this%s_d, &
-            nl*nl*2*dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE) 
+            nl*nl*2*dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%d, this%d_d, &
-            nl**dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE) 
+            nl**dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%swplen, this%swplen_d, &
-            Xh%lxyz*dm%msh%nelv, HOST_TO_DEVICE) 
+            Xh%lxyz*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.)
     end if
   end subroutine fdm_init
 
@@ -193,9 +193,11 @@ subroutine swap_lengths(this, x, y, z, nelv, gdim)
             end do
          end do
          if (NEKO_BCKND_DEVICE .eq. 1) then
-            call device_memcpy(l, this%swplen_d, this%dof%size(), HOST_TO_DEVICE)
+            call device_memcpy(l, this%swplen_d, this%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
             call this%gs_h%op(l, this%dof%size(), GS_OP_ADD)
-            call device_memcpy(l, this%swplen_d, this%dof%size(), DEVICE_TO_HOST)
+            call device_memcpy(l, this%swplen_d, this%dof%size(), &
+                               DEVICE_TO_HOST, sync=.false.)
          else
             call this%gs_h%op(l, this%dof%size(), GS_OP_ADD)
          end if
@@ -219,9 +221,11 @@ subroutine swap_lengths(this, x, y, z, nelv, gdim)
          end do
 
          if (NEKO_BCKND_DEVICE .eq. 1) then
-            call device_memcpy(l, this%swplen_d, this%dof%size(),HOST_TO_DEVICE)
+            call device_memcpy(l, this%swplen_d, this%dof%size(), &
+                               HOST_TO_DEVICE, sync=.false.)
             call this%gs_h%op(l, this%dof%size(), GS_OP_ADD)
-            call device_memcpy(l, this%swplen_d, this%dof%size(),DEVICE_TO_HOST, sync=.true.)
+            call device_memcpy(l, this%swplen_d, this%dof%size(), &
+                               DEVICE_TO_HOST, sync=.true.)
          else
             call this%gs_h%op(l, this%dof%size(), GS_OP_ADD)
          end if
@@ -338,7 +342,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n)
     integer :: ie, il, nr, ns, nt
     integer :: lbr, rbr, lbs, rbs, lbt, rbt
     real(kind=rp) :: eps, diag
-    
+
     associate(s => this%s, d => this%d, &
               llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, &
               lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, &
@@ -350,7 +354,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n)
          rbs = this%dof%msh%facet_type(4, ie)
          lbt = this%dof%msh%facet_type(5, ie)
          rbt = this%dof%msh%facet_type(6, ie)
-         
+
          nr = nl
          ns = nl
          nt = nl
@@ -398,7 +402,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n)
     end associate
 
   end subroutine fdm_setup_fast
-  
+
   subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
     integer, intent(in)  :: nl, lbc, rbc, n
     real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr
@@ -408,7 +412,7 @@ subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
 
     lx1 = n + 1
     lxm = lx1 + 2
-     
+
     call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n)
     call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
     call generalev(s, b, lam, nl, lx1)
@@ -416,9 +420,9 @@ subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n)
     if(lbc .eq. 1) call row_zero(s, nl, nl, 2)
     if(rbc .gt. 0) call row_zero(s, nl, nl, nl)
     if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1)
-    
+
     call trsp(s(1,1,2), nl, s, nl)
-    
+
   end subroutine fdm_setup_fast1d
 
   !> Solve the generalized eigenvalue problem /$ A x = lam B x/$
@@ -433,7 +437,7 @@ subroutine generalev(a, b, lam, n, lx)
     lbw = 4*(lx+2)**3
     lw = n*n
     call sygv(a, b, lam, n, lx, bw, lbw)
-    
+
   end subroutine generalev
 
   subroutine sp_sygv(a, b, lam, n, lx, bw, lbw)
@@ -484,7 +488,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
     if(lbc .eq. 1) i0 = 1
     i1 = n
     if(rbc .eq. 1) i1 = n - 1
-    
+
     call rzero(a, (n+3) * (n+3))
 
     fac = 2.0_rp / lm
@@ -496,7 +500,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
           a(i+1,j+1) = fac * ah(i,j)
        enddo
     enddo
-    
+
     if(lbc .eq. 0) then
        fac = 2.0_rp / ll
        a(0,0) = fac * ah(n-1,n-1)
@@ -506,7 +510,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
     else
        a(0,0) = 1.0_rp
     endif
-    
+
     if(rbc .eq. 0) then
        fac = 2.0_rp / lr
        a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0)
@@ -516,21 +520,21 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
     else
        a(n+2,n+2) = 1.0_rp
     endif
-    
+
   end subroutine fdm_setup_fast1d_a
 
   subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
     integer, intent(in) :: lbc, rbc, n
     real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr
-    real(kind=rp), intent(inout) :: bh(0:n)    
+    real(kind=rp), intent(inout) :: bh(0:n)
     real(kind=rp) :: fac
     integer :: i, i0, i1
-    
+
     i0 = 0
     if(lbc .eq. 1) i0 = 1
     i1 = n
     if(rbc .eq. 1) i1 = n - 1
-    
+
     call rzero(b, (n + 3) * (n + 3))
 
     fac = 0.5_rp * lm
@@ -556,60 +560,60 @@ subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
     else
        b(n+2,n+2) = 1.0_rp
     end if
-    
+
   end subroutine fdm_setup_fast1d_b
 
   subroutine fdm_free(this)
     class(fdm_t), intent(inout) :: this
-    
+
     if(allocated(this%s)) then
        deallocate(this%s)
     end if
-   
+
     if(allocated(this%d)) then
        deallocate(this%d)
     end if
-    
+
     if(allocated(this%len_lr)) then
        deallocate(this%len_lr)
     end if
-    
+
     if(allocated(this%len_ls)) then
        deallocate(this%len_ls)
     end if
-    
+
     if(allocated(this%len_lt)) then
        deallocate(this%len_lt)
     end if
-    
+
     if(allocated(this%len_mr)) then
        deallocate(this%len_mr)
     end if
-    
+
     if(allocated(this%len_ms)) then
        deallocate(this%len_ms)
     end if
-    
+
     if(allocated(this%len_mt)) then
        deallocate(this%len_mt)
     end if
-    
+
     if(allocated(this%len_rr)) then
        deallocate(this%len_rr)
     end if
-    
+
     if(allocated(this%len_rs)) then
        deallocate(this%len_rs)
     end if
-    
+
     if(allocated(this%len_rt)) then
        deallocate(this%len_rt)
     end if
-    
+
     if(allocated(this%swplen)) then
        deallocate(this%swplen)
     end if
-    
+
     nullify(this%Xh)
     nullify(this%dof)
     nullify(this%gs_h)
@@ -644,6 +648,6 @@ subroutine fdm_compute(this, e, r, stream)
     end if
 
   end subroutine fdm_compute
- 
+
 
 end module fdm
diff --git a/src/math/math.f90 b/src/math/math.f90
index 3290f48645d..75dec60c61b 100644
--- a/src/math/math.f90
+++ b/src/math/math.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 module math
@@ -76,11 +76,16 @@ module math
      module procedure sabscmp, dabscmp, qabscmp
   end interface abscmp
 
+  interface relcmp
+     module procedure srelcmp, drelcmp, qrelcmp
+  end interface relcmp
+
   public :: abscmp, rzero, izero, row_zero, rone, copy, cmult, cadd, cfill, &
-       glsum, glmax, glmin, chsign, vlmax, invcol1, invcol3, invers2, vcross, &
+       glsum, glmax, glmin, chsign, vlmax, vlmin, invcol1, invcol3, invers2, vcross, &
        vdot2, vdot3, vlsc3, vlsc2, add2, add3, add4, sub2, sub3, add2s1, add2s2, &
        addsqr2s2, cmult2, invcol2, col2, col3, subcol3, add3s2, subcol4, addcol3,&
-       addcol4, ascol5, p_update, x_update, glsc2, glsc3, glsc4, sort
+       addcol4, ascol5, p_update, x_update, glsc2, glsc3, glsc4, sort, &
+       masked_copy, relcmp, glimax, glimin
 
 contains
 
@@ -88,7 +93,7 @@ module math
   pure function sabscmp(x, y)
     real(kind=sp), intent(in) :: x
     real(kind=sp), intent(in) :: y
-    logical :: sabscmp 
+    logical :: sabscmp
 
     sabscmp = abs(x - y) .lt. NEKO_EPS
 
@@ -98,28 +103,71 @@ end function sabscmp
   pure function dabscmp(x, y)
     real(kind=dp), intent(in) :: x
     real(kind=dp), intent(in) :: y
-    logical :: dabscmp 
+    logical :: dabscmp
 
     dabscmp = abs(x - y) .lt. NEKO_EPS
-    
+
   end function dabscmp
 
   !> Return double precision absolute comparison \f$ | x - y | < \epsilon \f$
   pure function qabscmp(x, y)
     real(kind=qp), intent(in) :: x
     real(kind=qp), intent(in) :: y
-    logical :: qabscmp 
+    logical :: qabscmp
 
     qabscmp = abs(x - y) .lt. NEKO_EPS
 
   end function qabscmp
 
+  !> Return single precision relative comparison \f$ | x - y |<= \epsilon*|y| \f$
+  pure function srelcmp(x, y, eps)
+    real(kind=sp), intent(in) :: x
+    real(kind=sp), intent(in) :: y
+    real(kind=sp), intent(in), optional :: eps
+    logical :: srelcmp 
+    if (present(eps)) then
+       srelcmp = abs(x - y) .le. eps*abs(y)
+    else
+       srelcmp = abs(x - y) .le. NEKO_EPS*abs(y)
+    end if
+
+  end function srelcmp
+
+  !> Return double precision relative comparison \f$ | x - y |/|y| < \epsilon \f$
+  pure function drelcmp(x, y, eps)
+    real(kind=dp), intent(in) :: x
+    real(kind=dp), intent(in) :: y
+    real(kind=dp), intent(in), optional :: eps
+    logical :: drelcmp 
+    if (present(eps)) then
+       drelcmp = abs(x - y) .le. eps*abs(y)
+    else
+       drelcmp = abs(x - y) .le. NEKO_EPS*abs(y)
+    end if
+
+  end function drelcmp
+
+
+  !> Return quad precision relative comparison \f$ | x - y |/|y| < \epsilon \f$
+  pure function qrelcmp(x, y, eps)
+    real(kind=qp), intent(in) :: x
+    real(kind=qp), intent(in) :: y
+    real(kind=qp), intent(in), optional :: eps
+    logical :: qrelcmp 
+    if (present(eps)) then
+       qrelcmp = abs(x - y)/abs(y) .lt. eps
+    else
+       qrelcmp = abs(x - y)/abs(y) .lt. NEKO_EPS
+    end if
+
+  end function qrelcmp
+
   !> Zero a real vector
   subroutine rzero(a, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     integer :: i
-    
+
     do i = 1, n
        a(i) = 0.0_rp
     end do
@@ -130,7 +178,7 @@ subroutine izero(a, n)
     integer, intent(in) :: n
     integer, dimension(n), intent(inout) :: a
     integer :: i
-    
+
     do i = 1, n
        a(i) = 0
     end do
@@ -144,7 +192,7 @@ subroutine row_zero(a, m, n, e)
 
     do j = 1,n
        a(e,j) = 0.0_rp
-    end do    
+    end do
   end subroutine row_zero
 
   !> Set all elements to one
@@ -152,7 +200,7 @@ subroutine rone(a, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     integer :: i
-    
+
     do i = 1, n
        a(i) = 1.0_rp
     end do
@@ -170,7 +218,29 @@ subroutine copy(a, b, n)
     end do
 
   end subroutine copy
+
+  !> Copy a masked vector \f$ a(mask) = b(mask) \f$.
+  !! @param a Destination array of size `n`.
+  !! @param b Source array of size `n`.
+  !! @param mask Mask array of length m+1, where `mask(0)=m`
+  !! the length of the mask array.
+  !! @param n Size of the arrays `a` and `b`.
+  !! @param m Size of the mask array `mask`.
+  subroutine masked_copy(a, b, mask, n, m)
+    integer, intent(in) :: n, m
+    real(kind=rp), dimension(n), intent(in) :: b
+    real(kind=rp), dimension(n), intent(inout) :: a
+    integer, dimension(0:m) :: mask
+    integer :: i, j
+
+    do i = 1, m
+       j = mask(i)
+       a(j) = b(j)
+    end do
+
+  end subroutine masked_copy
   
+ 
   !> Multiplication by constant c \f$ a = c \cdot a \f$
   subroutine cmult(a, c, n)
     integer, intent(in) :: n
@@ -182,14 +252,14 @@ subroutine cmult(a, c, n)
        a(i) = c * a(i)
     end do
   end subroutine cmult
-  
+
   !> Add a scalar to vector \f$ a = \sum a_i + s \f$
   subroutine cadd(a, s, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), intent(in) :: s
     integer :: i
-    
+
     do i = 1, n
        a(i) = a(i) + s
     end do
@@ -207,8 +277,8 @@ subroutine cfill(a, c, n)
     end do
   end subroutine cfill
 
-  !>Sum a vector of length n 
-  function glsum(a, n) 
+  !>Sum a vector of length n
+  function glsum(a, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n) :: a
     real(kind=rp) :: tmp, glsum
@@ -219,11 +289,11 @@ function glsum(a, n)
     end do
     call MPI_Allreduce(tmp, glsum, 1, &
          MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
-    
+
   end function glsum
-  
-  !>Max of a vector of length n 
-  function glmax(a, n) 
+
+  !>Max of a vector of length n
+  function glmax(a, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n) :: a
     real(kind=rp) :: tmp, glmax
@@ -235,9 +305,23 @@ function glmax(a, n)
     call MPI_Allreduce(tmp, glmax, 1, &
          MPI_REAL_PRECISION, MPI_MAX, NEKO_COMM, ierr)
   end function glmax
+   
+  !>Max of an integer vector of length n 
+  function glimax(a, n) 
+    integer, intent(in) :: n
+    integer, dimension(n) :: a
+    integer :: tmp, glimax
+    integer :: i, ierr
+    tmp = a(1)
+    do i = 2, n
+       tmp =  max(tmp,a(i))
+    end do
+    call MPI_Allreduce(tmp, glimax, 1, &
+         MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr)
+  end function glimax
   
-  !>Min of a vector of length n 
-  function glmin(a, n) 
+  !>Min of a vector of length n
+  function glmin(a, n)
     integer, intent(in) :: n
     real(kind=rp), dimension(n) :: a
     real(kind=rp) :: tmp, glmin
@@ -250,6 +334,21 @@ function glmin(a, n)
          MPI_REAL_PRECISION, MPI_MIN, NEKO_COMM, ierr)
   end function glmin
 
+  !>Min of an integer vector of length n 
+  function glimin(a, n) 
+    integer, intent(in) :: n
+    integer, dimension(n) :: a
+    integer :: tmp, glimin
+    integer :: i, ierr
+    tmp = a(1)
+    do i = 2, n
+       tmp =  min(tmp,a(i))
+    end do
+    call MPI_Allreduce(tmp, glimin, 1, &
+         MPI_INTEGER, MPI_MIN, NEKO_COMM, ierr)
+  end function glimin
+
+
 
 
   !> Change sign of vector \f$ a = -a \f$
@@ -261,10 +360,10 @@ subroutine chsign(a, n)
     do i = 1, n
        a(i) = -a(i)
     end do
-    
+
   end subroutine chsign
-  
-  !> Maximum value of a vector of length @a n
+
+  !> maximum value of a vector of length @a n
   function vlmax(vec,n) result(tmax)
     integer :: n, i
     real(kind=rp), intent(in) :: vec(n)
@@ -275,6 +374,18 @@ function vlmax(vec,n) result(tmax)
     end do
   end function vlmax
   
+  !> minimun value of a vector of length @a n
+  function vlmin(vec,n) result(tmin)
+    integer, intent(in) :: n
+    real(kind=rp), intent(in) ::  vec(n)
+    real(kind=rp) :: tmin
+    integer :: i
+    tmin = real(99.0e20, rp)
+    do i=1,n
+         tmin = min(tmin,vec(i))
+    end do
+  end function vlmin
+
   !> Invert a vector \f$ a = 1 / a \f$
   subroutine invcol1(a, n)
     integer, intent(in) :: n
@@ -284,9 +395,9 @@ subroutine invcol1(a, n)
     do i = 1, n
        a(i) = 1.0_rp / a(i)
     end do
-    
+
   end subroutine invcol1
- 
+
   !> Invert a vector \f$ a = b / c \f$
   subroutine invcol3(a, b, c, n)
     integer, intent(in) :: n
@@ -297,9 +408,9 @@ subroutine invcol3(a, b, c, n)
     do i = 1, n
        a(i) = b(i) / c(i)
     end do
-    
+
   end subroutine invcol3
-   
+
   !> Compute inverted vector \f$ a = 1 / b \f$
   subroutine invers2(a, b, n)
     integer, intent(in) :: n
@@ -310,13 +421,13 @@ subroutine invers2(a, b, n)
     do i = 1, n
        a(i) = 1.0_rp / b(i)
     end do
-    
+
   end subroutine invers2
 
   !> Compute a cross product \f$ u = v \times w \f$
   !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc.
   subroutine vcross(u1, u2, u3,  v1, v2, v3, w1, w2, w3, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
     real(kind=rp), dimension(n), intent(in) :: w1, w2, w3
     real(kind=rp), dimension(n), intent(out) :: u1, u2, u3
@@ -330,7 +441,7 @@ subroutine vcross(u1, u2, u3,  v1, v2, v3, w1, w2, w3, n)
 
   end subroutine vcross
 
-  !> Compute a dot product \f$ dot = u \cdot v \f$ (2-d version) 
+  !> Compute a dot product \f$ dot = u \cdot v \f$ (2-d version)
   !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc.
   subroutine vdot2(dot, u1, u2, v1, v2, n)
     integer, intent(in) :: n
@@ -338,51 +449,51 @@ subroutine vdot2(dot, u1, u2, v1, v2, n)
     real(kind=rp), dimension(n), intent(in) :: v1, v2
     real(kind=rp), dimension(n), intent(out) :: dot
     integer :: i
-    do i = 1, n 
+    do i = 1, n
        dot(i) = u1(i)*v1(i) + u2(i)*v2(i)
     end do
 
   end subroutine vdot2
 
-  !> Compute a dot product \f$ dot = u \cdot v \f$ (3-d version) 
+  !> Compute a dot product \f$ dot = u \cdot v \f$ (3-d version)
   !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc.
   subroutine vdot3(dot, u1, u2, u3, v1, v2, v3, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(in) :: u1, u2, u3
     real(kind=rp), dimension(n), intent(in) :: v1, v2, v3
     real(kind=rp), dimension(n), intent(out) :: dot
     integer :: i
 
-    do i = 1, n 
+    do i = 1, n
        dot(i) = u1(i)*v1(i) + u2(i)*v2(i) + u3(i)*v3(i)
     end do
 
   end subroutine vdot3
 
-  !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$  
+  !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$
   function vlsc3(u, v, w, n) result(s)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(in) :: u, v, w
     real(kind=rp) :: s
     integer :: i
 
     s = 0.0_rp
-    do i = 1, n 
-      s = s + u(i)*v(i)*w(i)
+    do i = 1, n
+       s = s + u(i)*v(i)*w(i)
     end do
 
   end function vlsc3
-  
-  !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$  
+
+  !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$
   function vlsc2(u, v, n) result(s)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(in) :: u, v
     real(kind=rp) :: s
     integer :: i
 
     s = 0.0_rp
-    do i = 1, n 
-      s = s + u(i)*v(i)
+    do i = 1, n
+       s = s + u(i)*v(i)
     end do
 
   end function vlsc2
@@ -416,7 +527,7 @@ end subroutine add3
 
   !> Vector addition \f$ a = b + c + d\f$
   subroutine add4(a, b, c, d, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: d
     real(kind=rp), dimension(n), intent(inout) :: c
     real(kind=rp), dimension(n), intent(inout) :: b
@@ -439,7 +550,7 @@ subroutine sub2(a, b, n)
     do i = 1, n
        a(i) = a(i) - b(i)
     end do
-    
+
   end subroutine sub2
 
   !> Vector subtraction \f$ a = b - c \f$
@@ -469,13 +580,13 @@ subroutine add2s1(a, b, c1, n)
     do i = 1, n
        a(i) = c1 * a(i) + b(i)
     end do
-    
+
   end subroutine add2s1
 
   !> Vector addition with scalar multiplication  \f$ a = a + c_1 b \f$
   !! (multiplication on second argument)
   subroutine add2s2(a, b, c1, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(inout) :: b
     real(kind=rp), intent(in) :: c1
@@ -484,12 +595,12 @@ subroutine add2s2(a, b, c1, n)
     do i = 1, n
        a(i) = a(i) + c1 * b(i)
     end do
-    
+
   end subroutine add2s2
 
   !> Returns \f$ a = a + c1 * (b * b )\f$
   subroutine addsqr2s2(a, b, c1, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), intent(in) :: c1
@@ -500,10 +611,10 @@ subroutine addsqr2s2(a, b, c1, n)
     end do
 
   end subroutine addsqr2s2
-  
+
   !> Multiplication by constant c \f$ a = c \cdot b \f$
   subroutine cmult2(a, b, c, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), intent(in) :: c
@@ -512,12 +623,12 @@ subroutine cmult2(a, b, c, n)
     do i = 1, n
        a(i) = c * b(i)
     end do
-    
+
   end subroutine cmult2
 
   !> Vector division \f$ a = a / b \f$
   subroutine invcol2(a, b, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     integer :: i
@@ -525,13 +636,13 @@ subroutine invcol2(a, b, n)
     do i = 1, n
        a(i) = a(i) /b(i)
     end do
-    
+
   end subroutine invcol2
 
 
   !> Vector multiplication \f$ a = a \cdot b \f$
   subroutine col2(a, b, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     integer :: i
@@ -539,12 +650,12 @@ subroutine col2(a, b, n)
     do i = 1, n
        a(i) = a(i) * b(i)
     end do
-    
+
   end subroutine col2
 
   !> Vector multiplication with 3 vectors \f$ a =  b \cdot c \f$
   subroutine col3(a, b, c, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -553,26 +664,26 @@ subroutine col3(a, b, c, n)
     do i = 1, n
        a(i) =  b(i) * c(i)
     end do
-    
+
   end subroutine col3
 
   !> Returns \f$ a = a - b*c \f$
   subroutine subcol3(a, b, c, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
     integer :: i
 
     do i = 1,n
-       a(i) = a(i) - b(i) * c(i) 
+       a(i) = a(i) - b(i) * c(i)
     end do
 
   end subroutine subcol3
 
   !> Returns \f$ a = c1 * b + c2 * c \f$
   subroutine add3s2(a, b, c, c1, c2 ,n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -580,7 +691,7 @@ subroutine add3s2(a, b, c, c1, c2 ,n)
     integer :: i
 
     do i = 1,n
-       a(i) = c1 * b(i) + c2 * c(i) 
+       a(i) = c1 * b(i) + c2 * c(i)
     end do
 
   end subroutine add3s2
@@ -588,7 +699,7 @@ end subroutine add3s2
 
   !> Returns \f$ a = a - b*c*d \f$
   subroutine subcol4(a, b, c, d, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -600,24 +711,24 @@ subroutine subcol4(a, b, c, d, n)
     end do
 
   end subroutine subcol4
-  
+
   !> Returns \f$ a = a + b*c \f$
   subroutine addcol3(a, b, c, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
     integer :: i
 
     do i = 1,n
-       a(i) = a(i) + b(i) * c(i) 
+       a(i) = a(i) + b(i) * c(i)
     end do
 
   end subroutine addcol3
 
   !> Returns \f$ a = a + b*c*d \f$
   subroutine addcol4(a, b, c, d, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -632,7 +743,7 @@ end subroutine addcol4
 
   !> Returns \f$ a = b \dot c - d \cdot e \f$
   subroutine ascol5(a, b, c, d, e, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -648,7 +759,7 @@ end subroutine ascol5
 
   !> Returns \f$ a = b \dot c1 ( a - c2 \cdot c )\f$
   subroutine p_update(a, b, c, c1, c2, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -663,7 +774,7 @@ end subroutine p_update
 
   !> Returns \f$ a = b \dot c1 ( a - c2 \cdot c )\f$
   subroutine x_update(a, b, c, c1, c2, n)
-    integer, intent(in) :: n    
+    integer, intent(in) :: n
     real(kind=rp), dimension(n), intent(inout) :: a
     real(kind=rp), dimension(n), intent(in) :: b
     real(kind=rp), dimension(n), intent(in) :: c
@@ -686,14 +797,14 @@ function glsc2(a, b, n)
 
     tmp = 0.0_rp
     do i = 1, n
-       tmp = tmp + a(i) * b(i) 
+       tmp = tmp + a(i) * b(i)
     end do
-    
+
     call MPI_Allreduce(tmp, glsc2, 1, &
          MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
 
   end function glsc2
-  
+
   !> Weighted inner product \f$ a^T b c \f$
   function glsc3(a, b, c, n)
     integer, intent(in) :: n
@@ -707,7 +818,7 @@ function glsc3(a, b, c, n)
     do i = 1, n
        tmp = tmp + a(i) * b(i) * c(i)
     end do
-    
+
     call MPI_Allreduce(tmp, glsc3, 1, &
          MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
 
@@ -725,7 +836,7 @@ function glsc4(a, b, c, d, n)
     do i = 1, n
        tmp = tmp + a(i) * b(i) * c(i) * d(i)
     end do
-    
+
     call MPI_Allreduce(tmp, glsc4, 1, &
          MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
 
@@ -742,34 +853,34 @@ subroutine sort(a,ind,n)
     end do
 
     if (n.le.1) return
-    
+
     l=n/2+1
     ir=n
-    do while (.true.) 
+    do while (.true.)
        if (l.gt.1) then
           l=l-1
           aa  = a  (l)
           ii  = ind(l)
        else
-               aa =   a(ir)
-               ii = ind(ir)
-            a(ir) =   a( 1)
+          aa =   a(ir)
+          ii = ind(ir)
+          a(ir) =   a( 1)
           ind(ir) = ind( 1)
           ir=ir-1
           if (ir.eq.1) then
-               a(1) = aa
+             a(1) = aa
              ind(1) = ii
              return
           endif
        endif
        i=l
        j=l+l
-       do while (j .le. ir) 
+       do while (j .le. ir)
           if (j.lt.ir) then
              if ( a(j).lt.a(j+1) ) j=j+1
           endif
           if (aa.lt.a(j)) then
-               a(i) = a(j)
+             a(i) = a(j)
              ind(i) = ind(j)
              i=j
              j=j+j
@@ -781,5 +892,5 @@ subroutine sort(a,ind,n)
        ind(i) = ii
     end do
   end subroutine sort
-
+  
 end module math
diff --git a/src/math/mathops.f90 b/src/math/mathops.f90
index 81348f2a932..f381c70486b 100644
--- a/src/math/mathops.f90
+++ b/src/math/mathops.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,46 +21,46 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 
-  !> Collection of vector field operations operating on \f$ a_i \f$ and \f$b_i\f$.
-  !! Note that in general the indices \f$i=1 \ldots gdim\f$ and \f$j=1 \ldots n\f$.
-  !! \f$gdim\f$ is assumed to be either 2 or 3 only.
+ !> Collection of vector field operations operating on \f$ a_i \f$ and \f$b_i\f$.
+ !! Note that in general the indices \f$i=1 \ldots gdim\f$ and \f$j=1 \ldots n\f$.
+ !! \f$gdim\f$ is assumed to be either 2 or 3 only.
 
 module mathops
   use num_types, only : rp
@@ -88,7 +88,7 @@ subroutine opchsign(a1, a2, a3, gdim, n)
     end if
 
   end subroutine opchsign
-  
+
   !> \f$ a_i(j) = a_i(j) * c(j) \f$
   subroutine opcolv(a1, a2, a3, c, gdim, n)
     integer, intent(in) :: n, gdim
@@ -111,7 +111,7 @@ subroutine opcolv(a1, a2, a3, c, gdim, n)
 
   end subroutine opcolv
 
-  !> \f$ a_i(j) = b_i(j) * c(j) * d \f$ 
+  !> \f$ a_i(j) = b_i(j) * c(j) * d \f$
   subroutine opcolv3c(a1, a2, a3, b1, b2, b3, c, d, n, gdim)
     integer, intent(in) :: n, gdim
     real(kind=rp), dimension(n), intent(inout) :: a1, a2, a3
@@ -134,7 +134,7 @@ subroutine opcolv3c(a1, a2, a3, b1, b2, b3, c, d, n, gdim)
 
   end subroutine opcolv3c
 
-  !> \f$ a_i(j) = a_i(j) + b_i(j) * c \f$ 
+  !> \f$ a_i(j) = a_i(j) + b_i(j) * c \f$
   subroutine opadd2cm(a1, a2, a3, b1, b2, b3, c, n, gdim)
     integer, intent(in) :: n, gdim
     real(kind=rp), dimension(n), intent(inout) :: a1, a2, a3
@@ -164,7 +164,7 @@ subroutine opadd2col(a1, a2, a3, b1, b2, b3, c, n, gdim)
     real(kind=rp), dimension(n), intent(in) :: b1, b2, b3
     real(kind=rp), intent(in) :: c(n)
     integer :: i
-    
+
     if (gdim .eq. 3) then
        do i = 1, n
           a1(i) = a1(i) + b1(i)*c(i)
@@ -177,7 +177,7 @@ subroutine opadd2col(a1, a2, a3, b1, b2, b3, c, n, gdim)
           a2(i) = a2(i) + b2(i)*c(i)
        end do
     endif
-    
+
   end subroutine opadd2col
-  
+
 end module mathops
diff --git a/src/math/matrix.f90 b/src/math/matrix.f90
index 5e5de61c507..fff18f79317 100644
--- a/src/math/matrix.f90
+++ b/src/math/matrix.f90
@@ -61,7 +61,7 @@ module matrix
      generic :: assignment(=) => matrix_assign_matrix, &
           matrix_assign_scalar
   end type matrix_t
-  
+
 contains
 
   !> Initialise a matrix of size `nrows*ncols`.
diff --git a/src/math/mxm_wrapper.F90 b/src/math/mxm_wrapper.F90
index 2e46c3e2ce2..ef2dbc46ee9 100644
--- a/src/math/mxm_wrapper.F90
+++ b/src/math/mxm_wrapper.F90
@@ -13,7 +13,7 @@ module mxm_wrapper
   interface mxm_blas
      module procedure mxm_blas_sp, mxm_blas_dp, mxm_blas_qp
   end interface mxm_blas
-  
+
   interface mxm_libxsmm
      module procedure mxm_libxsmm_sp, mxm_libxsmm_dp, mxm_libxsmm_qp
   end interface mxm_libxsmm
@@ -48,7 +48,7 @@ subroutine mxm_blas_sp(a,n1,b,n2,c,n3)
     call sgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
 
   end subroutine mxm_blas_sp
-  
+
   subroutine mxm_blas_dp(a,n1,b,n2,c,n3)
     integer, intent(in) :: n1, n2, n3
     real(kind=dp), intent(in) :: a(n1, n2)
@@ -83,9 +83,9 @@ subroutine mxm_libxsmm_sp(a,n1,b,n2,c,n3)
        call libxsmm_smmcall_abc(xmm, a, b, c)
        return
     end if
-#endif       
+#endif
   end subroutine mxm_libxsmm_sp
-  
+
   subroutine mxm_libxsmm_dp(a,n1,b,n2,c,n3)
     integer, intent(in) :: n1, n2, n3
     real(kind=dp), intent(in) :: a(n1, n2)
@@ -112,5 +112,5 @@ subroutine mxm_libxsmm_qp(a,n1,b,n2,c,n3)
     call neko_error('Not implemented yet!')
 
   end subroutine mxm_libxsmm_qp
-  
+
 end module mxm_wrapper
diff --git a/src/math/operators.f90 b/src/math/operators.f90
index d8836ba6fb5..524f1d0b754 100644
--- a/src/math/operators.f90
+++ b/src/math/operators.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2020-2023, The Neko Authors
+! Copyright (c) 2020-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -32,35 +32,48 @@
 !
 !> Operators
 module operators
-  use neko_config
+  use neko_config, only : NEKO_BCKND_SX, NEKO_BCKND_DEVICE, NEKO_BCKND_XSMM,&
+                          NEKO_DEVICE_MPI
   use num_types, only : rp
-  use opr_cpu
-  use opr_sx
-  use opr_xsmm
-  use opr_device
+  use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, opr_cpu_conv1,&
+                      opr_cpu_cdtp, opr_cpu_dudxyz, opr_cpu_lambda2
+  use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_dudxyz, opr_sx_opgrad, &
+                     opr_sx_cdtp, opr_sx_conv1, opr_sx_lambda2
+  use opr_xsmm, only : opr_xsmm_cdtp, opr_xsmm_conv1, opr_xsmm_curl, &
+                       opr_xsmm_dudxyz, opr_xsmm_opgrad
+  use opr_device, only : opr_device_cdtp, opr_device_cfl, opr_device_curl, &
+                         opr_device_conv1, opr_device_dudxyz, &
+                         opr_device_lambda2, opr_device_opgrad
   use space, only : space_t
   use coefs, only : coef_t
-  use field, only: field_t
-  use math
+  use field, only : field_t
+  use math, only : glsum, cmult, add2, cadd
+  use device, only : c_ptr, device_get_ptr
+  use device_math, only : device_add2, device_cmult
   use comm
-  use device
-  use device_math    
   implicit none
   private
 
-  public :: dudxyz, opgrad, ortho, cdtp, conv1, curl, cfl, lambda2op
+  public :: dudxyz, opgrad, ortho, cdtp, conv1, curl, cfl,&
+            lambda2op, strain_rate
   
 contains
-  
-  !> Compute dU/dx or dU/dy or dU/dz 
-  subroutine dudxyz (du,u,dr,ds,dt,coef)
+
+  !> Compute derivative of a scalar field along a single direction.
+  !! @param du Holds the resulting derivative values.
+  !! @param u The values of the field.
+  !! @param dr The derivative of r with respect to the chosen direction.
+  !! @param ds The derivative of s with respect to the chosen direction.
+  !! @param dt The derivative of t with respect to the chosen direction.
+  !! @param coef The SEM coefficients.
+  subroutine dudxyz (du, u, dr, ds, dt, coef)
     type(coef_t), intent(in), target :: coef
     real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
          intent(inout) ::  du
     real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), &
          intent(in) ::  u, dr, ds, dt
 
-    if (NEKO_BCKND_SX .eq. 1) then 
+    if (NEKO_BCKND_SX .eq. 1) then
        call opr_sx_dudxyz(du, u, dr, ds, dt, coef)
     else if (NEKO_BCKND_XSMM .eq. 1) then
        call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef)
@@ -69,17 +82,27 @@ subroutine dudxyz (du,u,dr,ds,dt,coef)
     else
        call opr_cpu_dudxyz(du, u, dr, ds, dt, coef)
     end if
-    
+
   end subroutine dudxyz
 
-  !> Equals wgradm1 in nek5000. Gradient of velocity vectors.
-  subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad     
-    type(coef_t), intent(in) :: coef  
+  !> Compute the gradient of a scalar field.
+  !! @details By providing `es` and `ee`, it is possible to compute only for a
+  !! range of element indices.
+  !! @param ux Will store the x component of the gradient.
+  !! @param uy Will store the y component of the gradient.
+  !! @param uz Will store the z component of the gradient.
+  !! @param u The values of the field.
+  !! @param coef The SEM coefficients.
+  !! @param es Starting element index, optional, defaults to 1.
+  !! @param ee Ending element index, optional, defaults to `nelv`.
+  !! @note Equals wgradm1 in Nek5000, the weak form of the gradient.
+  subroutine opgrad(ux, uy, uz, u, coef, es, ee)
+    type(coef_t), intent(in) :: coef
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: u
-    integer, optional :: es, ee        
+    integer, optional :: es, ee
     integer :: eblk_start, eblk_end
 
     if (present(es)) then
@@ -94,7 +117,7 @@ subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad
        eblk_end = coef%msh%nelv
     end if
 
-    if (NEKO_BCKND_SX .eq. 1) then 
+    if (NEKO_BCKND_SX .eq. 1) then
        call opr_sx_opgrad(ux, uy, uz, u, coef)
     else if (NEKO_BCKND_XSMM .eq. 1) then
        call opr_xsmm_opgrad(ux, uy, uz, u, coef)
@@ -103,24 +126,34 @@ subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad
     else
        call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end)
     end if
-    
+
   end subroutine opgrad
-  
+
   !> Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T.
-  subroutine ortho(x,n ,glb_n)
+  !! @param x The vector to orthogonolize.
+  !! @param n The size of `x`.
+  !! @param glb_n The global number of elements of `x` across all MPI ranks. Be careful with overflow!
+  subroutine ortho(x, n, glb_n)
     integer, intent(in) :: n
     integer, intent(in) :: glb_n
     real(kind=rp), dimension(n), intent(inout) :: x
     real(kind=rp) :: rlam
 
-    rlam = glsum(x,n)/glb_n
-    call cadd(x,-rlam,n)
+    rlam = glsum(x, n)/glb_n
+    call cadd(x, -rlam, n)
 
   end subroutine ortho
-  
-  !> Compute DT*X (entire field)
-  !> This needs to be revised... the loop over n1,n2 is probably unesccssary
-  subroutine cdtp (dtx,x,dr,ds,dt, coef)
+
+  !> Apply D^T to a scalar field, where D is the derivative matrix.
+  !! @param dtx Will store the result.
+  !! @param x The values of the field.
+  !! @param dr The derivative of r with respect to the chosen direction.
+  !! @param ds The derivative of s with respect to the chosen direction.
+  !! @param dt The derivative of t with respect to the chosen direction.
+  !! @param coef The SEM coefficients.
+  !> @note This needs to be revised... the loop over n1,n2 is probably
+  !! unesccssary
+  subroutine cdtp (dtx, x, dr, ds, dt, coef)
     type(coef_t), intent(in) :: coef
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: dtx
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: x
@@ -128,7 +161,7 @@ subroutine cdtp (dtx,x,dr,ds,dt, coef)
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: ds
     real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dt
 
-    if (NEKO_BCKND_SX .eq. 1) then 
+    if (NEKO_BCKND_SX .eq. 1) then
        call opr_sx_cdtp(dtx, x, dr, ds, dt, coef)
     else if (NEKO_BCKND_XSMM .eq. 1) then
        call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef)
@@ -137,9 +170,9 @@ subroutine cdtp (dtx,x,dr,ds,dt, coef)
     else
        call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef)
     end if
-    
+
   end subroutine cdtp
-  
+
   !> Compute the advection term.
   !! @param du Holds the result.
   !! @param u The advected field.
@@ -147,7 +180,7 @@ end subroutine cdtp
   !! @param vy The y component of the advecting velocity.
   !! @param vz The z component of the advecting velocity.
   !! @param Xh The function space for the fields involved.
-  !! @param coeff The coeffients of the (Xh, mesh) pair.
+  !! @param coef The SEM coefficients.
   !! @param es Starting element index, defaults to 1.
   !! @param ee Last element index, defaults to mesh size.
   subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
@@ -167,14 +200,14 @@ subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
       else
          eblk_start = 1
       end if
-      
+
       if (present(ee)) then
          eblk_end = ee
       else
          eblk_end = coef%msh%nelv
       end if
-      
-      if (NEKO_BCKND_SX .eq. 1) then 
+
+      if (NEKO_BCKND_SX .eq. 1) then
          call opr_sx_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
       else if (NEKO_BCKND_XSMM .eq. 1) then
          call opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim)
@@ -187,7 +220,17 @@ subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee)
 
   end subroutine conv1
 
-  subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
+  !! Compute the curl fo a vector field.
+  !! @param w1 Will store the x component of the curl.
+  !! @param w2 Will store the y component of the curl.
+  !! @param w3 Will store the z component of the curl.
+  !! @param u1 The x component of the vector field.
+  !! @param u2 The y component of the vector field.
+  !! @param u3 The z component of the vector field.
+  !! @param work1 A temporary array for computations.
+  !! @param work2 A temporary array for computations.
+  !! @param coef The SEM coefficients.
+  subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
     type(field_t), intent(inout) :: w1
     type(field_t), intent(inout) :: w2
     type(field_t), intent(inout) :: w3
@@ -196,26 +239,35 @@ subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
     type(field_t), intent(inout) :: u3
     type(field_t), intent(inout) :: work1
     type(field_t), intent(inout) :: work2
-    type(coef_t), intent(in)  :: c_Xh
+    type(coef_t), intent(in)  :: coef
 
     if (NEKO_BCKND_SX .eq. 1) then
-       call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
+       call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
     else if (NEKO_BCKND_XSMM .eq. 1) then
-       call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
+       call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
     else if (NEKO_BCKND_DEVICE .eq. 1) then
-       call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
+       call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
     else
-       call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh)
+       call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef)
     end if
 
   end subroutine curl
 
+  !! Compute the CFL number
+  !! @param dt The timestep.
+  !! @param u The x component of velocity.
+  !! @param v The y component of velocity.
+  !! @param w The z component of velocity.
+  !! @param Xh The SEM function space.
+  !! @param coef The SEM coefficients.
+  !! @param nelv The total number of elements.
+  !! @param gdim Number of geometric dimensions.
   function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
-    type(space_t) :: Xh
-    type(coef_t) :: coef
-    integer :: nelv, gdim
-    real(kind=rp) :: dt
-    real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) ::  u, v, w
+    type(space_t), intent(in) :: Xh
+    type(coef_t), intent(in) :: coef
+    integer, intent(in) :: nelv, gdim
+    real(kind=rp), intent(in) :: dt
+    real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv), intent(in) ::  u, v, w
     real(kind=rp) :: cfl
     integer :: ierr
 
@@ -231,13 +283,22 @@ function cfl(dt, u, v, w, Xh, coef, nelv, gdim)
        call MPI_Allreduce(MPI_IN_PLACE, cfl, 1, &
             MPI_REAL_PRECISION, MPI_MAX, NEKO_COMM, ierr)
     end if
-    
+
   end function cfl
-  
+
   !> Compute double the strain rate tensor, i.e du_i/dx_j + du_j/dx_i
-  !! Similar to comp_sij in Nek5000.
-  subroutine strain_rate(s11, s22, s33, s12, s13, s23, &
-                         u, v, w, coef)
+  !! @param s11 Will hold the 1,1 component of the strain rate tensor.
+  !! @param s22 Will hold the 2,2 component of the strain rate tensor.
+  !! @param s33 Will hold the 3,3 component of the strain rate tensor.
+  !! @param s12 Will hold the 1,2 component of the strain rate tensor.
+  !! @param s13 Will hold the 1,3 component of the strain rate tensor.
+  !! @param s23 Will hold the 2,3 component of the strain rate tensor.
+  !! @param u The x component of velocity.
+  !! @param v The y component of velocity.
+  !! @param w The z component of velocity.
+  !! @param coef The SEM coefficients.
+  !! @note Similar to comp_sij in Nek5000.
+  subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef)
     type(field_t), intent(in) :: u, v, w !< velocity components
     type(coef_t), intent(in) :: coef
     real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
@@ -246,9 +307,9 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, &
     real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
     real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
     real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv)
-    
+
     type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d
-    
+
     integer :: nelv, lxyz
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
@@ -259,10 +320,10 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, &
        s23_d = device_get_ptr(s23)
        s13_d = device_get_ptr(s13)
     endif
-    
+
     nelv = u%msh%nelv
     lxyz = u%Xh%lxyz
-    
+
     ! we use s11 as a work array here
     call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef)
     call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef)
@@ -301,94 +362,28 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, &
        call cmult(s22, 2.0_rp, nelv*lxyz)
        call cmult(s33, 2.0_rp, nelv*lxyz)
     endif
-  
+
   end subroutine strain_rate
 
-  !> Lambda2 calcuation. 
-  !! @param lambda2 holds the second eigen values.
-  !! @param u, the x-velocity
-  !! @param v, the y-velocity
-  !! @param w, the z-velocity
-  !! @param coef, the field coefficients
-  subroutine lambda2op(lambda2, u, v, w,coef)      
-    type(coef_t), intent(in) :: coef  
-    type(field_t), intent(inout) :: lambda2 
+  !> Compute the Lambda2 field for a given velocity field.
+  !! @param lambda2 Holds the computed Lambda2 field.
+  !! @param u The x-velocity.
+  !! @param v The y-velocity.
+  !! @param w the z-velocity.
+  !! @param coef The SEM coefficients.
+  subroutine lambda2op(lambda2, u, v, w, coef)
+    type(coef_t), intent(in) :: coef
+    type(field_t), intent(inout) :: lambda2
     type(field_t), intent(in) :: u, v, w
-    real(kind=rp) :: grad(coef%Xh%lxyz,3,3)
-    integer :: temp_indices(9), e, i, ind_sort(3)
-    real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2
-    real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23
-    real(kind=rp) :: a11, a22, a33, a12, a13, a23
 
-    if (NEKO_BCKND_DEVICE .eq. 1) then
+    if (NEKO_BCKND_SX .eq. 1) then
+       call opr_sx_lambda2(lambda2, u, v, w, coef)
+    else if (NEKO_BCKND_DEVICE .eq. 1) then
        call opr_device_lambda2(lambda2, u, v, w, coef)
     else
-       do e = 1, coef%msh%nelv
-          call opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), &
-                              u%x(1,1,1,e),coef,e,e)
-          call opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), &
-                              v%x(1,1,1,e),coef,e,e)
-          call opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), &
-                              w%x(1,1,1,e),coef,e,e)
-
-          do i = 1, coef%Xh%lxyz
-             s11 = grad(i,1,1)
-             s22 = grad(i,2,2)
-             s33 = grad(i,3,3)
-
-             
-             s12 = 0.5*(grad(i,1,2) + grad(i,2,1))
-             s13 = 0.5*(grad(i,1,3) + grad(i,3,1))
-             s23 = 0.5*(grad(i,2,3) + grad(i,3,2))
-
-             o12 = 0.5*(grad(i,1,2) - grad(i,2,1))
-             o13 = 0.5*(grad(i,1,3) - grad(i,3,1))
-             o23 = 0.5*(grad(i,2,3) - grad(i,3,2))
-
-             a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13
-             a12 = s11 * s12  +  s12 * s22  +  s13 * s23 - o13 * o23
-             a13 = s11 * s13  +  s12 * s23  +  s13 * s33 + o12 * o23
-
-             a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23
-             a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13
-             a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23
-
-
-             B = -(a11 + a22 + a33)
-             C = -(a12*a12 + a13*a13 + a23*a23 &
-                  - a11 * a22 - a11 * a33 - a22 * a33)
-             D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 &
-                  - a22 * a13*a13 - a33 * a12*a12  +  a11 * a22 * a33)
-
-
-             q = (3.0 * C - B*B) / 9.0
-             r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0
-             theta = acos( r / sqrt(-q*q*q) )
-
-             eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0
-             eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0
-             eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0
-
-             if (eigen(1) .le. eigen(2) .and. eigen(2) .le. eigen(3)) then
-                l2 = eigen(2)
-             else if (eigen(3) .le. eigen(2) .and. eigen(2) .le. eigen(1)) then
-                l2 = eigen(2)
-             else if (eigen(1) .le. eigen(3) .and. eigen(3) .le. eigen(2)) then
-                l2 = eigen(3)
-             else if (eigen(2) .le. eigen(3) .and. eigen(3) .le. eigen(1)) then
-                l2 = eigen(3)                
-             else if (eigen(2) .le. eigen(1) .and. eigen(1) .le. eigen(3)) then
-                l2 = eigen(1)
-             else if (eigen(3) .le. eigen(1) .and. eigen(1) .le. eigen(2)) then
-                l2 = eigen(1)
-             else
-                l2 = 0.0
-             end if
-             lambda2%x(i,1,1,e) = l2/(coef%B(i,1,1,e)**2)
-          end do
-       end do
+       call opr_cpu_lambda2(lambda2, u, v, w, coef)
     end if
-    
+
   end subroutine lambda2op
-  
+
 end module operators
diff --git a/src/math/schwarz.f90 b/src/math/schwarz.f90
index ed5417fd3c0..a946d0ba04f 100644
--- a/src/math/schwarz.f90
+++ b/src/math/schwarz.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Overlapping schwarz solves
@@ -65,7 +65,6 @@ module schwarz
   use space
   use dofmap, only : dofmap_t
   use bc
-  use dirichlet, only : dirichlet_t
   use gather_scatter
   use device_schwarz
   use device_math
@@ -75,32 +74,32 @@ module schwarz
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   type, public :: schwarz_t
-    real(kind=rp), allocatable :: work1(:)
-    real(kind=rp), allocatable :: work2(:)
-    real(kind=rp), allocatable :: wt(:,:,:,:,:)
-    type(c_ptr) :: work1_d = C_NULL_PTR
-    type(c_ptr) :: work2_d = C_NULL_PTR
-    type(c_ptr) :: wt_d = C_NULL_PTR
-    type(space_t) :: Xh_schwarz !< needed to init gs
-    type(gs_t) :: gs_schwarz !< We are only interested in the gather-scatter!
-    type(dofmap_t) :: dm_schwarz !< needed to init gs
-    type(fdm_t) :: fdm
-    type(space_t), pointer :: Xh
-    type(bc_list_t), pointer :: bclst
-    type(dofmap_t), pointer :: dm
-    type(gs_t), pointer :: gs_h
-    type(mesh_t), pointer :: msh
-    type(c_ptr) :: event
-  contains 
-    procedure, pass(this) :: init => schwarz_init
-    procedure, pass(this) :: free => schwarz_free
-    procedure, pass(this) :: compute => schwarz_compute
+     real(kind=rp), allocatable :: work1(:)
+     real(kind=rp), allocatable :: work2(:)
+     real(kind=rp), allocatable :: wt(:,:,:,:,:)
+     type(c_ptr) :: work1_d = C_NULL_PTR
+     type(c_ptr) :: work2_d = C_NULL_PTR
+     type(c_ptr) :: wt_d = C_NULL_PTR
+     type(space_t) :: Xh_schwarz !< needed to init gs
+     type(gs_t) :: gs_schwarz !< We are only interested in the gather-scatter!
+     type(dofmap_t) :: dm_schwarz !< needed to init gs
+     type(fdm_t) :: fdm
+     type(space_t), pointer :: Xh
+     type(bc_list_t), pointer :: bclst
+     type(dofmap_t), pointer :: dm
+     type(gs_t), pointer :: gs_h
+     type(mesh_t), pointer :: msh
+     type(c_ptr) :: event
+   contains
+     procedure, pass(this) :: init => schwarz_init
+     procedure, pass(this) :: free => schwarz_free
+     procedure, pass(this) :: compute => schwarz_compute
   end type schwarz_t
- 
+
 contains
-  
+
   subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh)
     class(schwarz_t), target, intent(inout) :: this
     type(space_t), target, intent(inout) :: Xh
@@ -110,15 +109,15 @@ subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh)
     type(bc_list_t), target, intent(inout):: bclst
 
     call this%free()
-    
+
     call this%Xh_schwarz%init(GLL, Xh%lx+2, Xh%lx+2, Xh%lx+2)
-    this%dm_schwarz = dofmap_t(msh, this%Xh_schwarz) 
+    this%dm_schwarz = dofmap_t(msh, this%Xh_schwarz)
     call this%gs_schwarz%init(this%dm_schwarz)
 
     allocate(this%work1(this%dm_schwarz%size()))
     allocate(this%work2(this%dm_schwarz%size()))
     allocate(this%wt(Xh%lx, Xh%lx, 4, msh%gdim, msh%nelv))
-    
+
     call this%fdm%init(Xh, dm, gs_h)
 
 
@@ -128,28 +127,29 @@ subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh)
     this%dm => dm
     this%gs_h => gs_h
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_map(this%work1, this%work1_d,this%dm_schwarz%size()) 
-       call device_map(this%work2, this%work2_d,this%dm_schwarz%size()) 
+       call device_map(this%work1, this%work1_d,this%dm_schwarz%size())
+       call device_map(this%work2, this%work2_d,this%dm_schwarz%size())
     end if
 
 
     call schwarz_setup_wt(this)
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_alloc(this%wt_d,int(this%dm%size()*rp, i8)) 
+       call device_alloc(this%wt_d,int(this%dm%size()*rp, i8))
        call rone(this%work1, this%dm%size())
        call schwarz_wt3d(this%work1, this%wt, Xh%lx, msh%nelv)
-       call device_memcpy(this%work1, this%wt_d, this%dm%size(), HOST_TO_DEVICE)
+       call device_memcpy(this%work1, this%wt_d, this%dm%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
        call device_event_create(this%event, 2)
     end if
   end subroutine schwarz_init
- 
+
   subroutine schwarz_free(this)
     class(schwarz_t), intent(inout) :: this
-    
+
     if(allocated(this%work1)) deallocate(this%work1)
     if(allocated(this%work2)) deallocate(this%work2)
     if(allocated(this%wt)) deallocate(this%wt)
-    
+
     call this%Xh_schwarz%free()
     call this%gs_schwarz%free()
     !why cant I do this?
@@ -162,7 +162,7 @@ subroutine schwarz_free(this)
     nullify(this%gs_h)
     nullify(this%msh)
   end subroutine schwarz_free
-  !> setup weights 
+  !> setup weights
   subroutine schwarz_setup_wt(this)
     class(schwarz_t), intent(inout) :: this
     integer :: enx,eny,enz, n, ie, k, ns
@@ -178,19 +178,21 @@ subroutine schwarz_setup_wt(this)
       enz = Xh_schwarz%lz
       if(.not. msh%gdim .eq. 3) enz=1
       ns = enx*eny*enz*msh%nelv
-      
+
       call rone(work2, ns)
       call rzero(work1, ns)
- 
+
       !   Sum overlap region (border excluded)
       !   Cred to PFF for this, very clever
       call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv)
       if (NEKO_BCKND_DEVICE .eq. 1) then
-         call device_memcpy(work2, this%work2_d, ns, HOST_TO_DEVICE)
-         call this%gs_schwarz%op(work2, ns, GS_OP_ADD) 
-         call device_memcpy(work2, this%work2_d, ns, DEVICE_TO_HOST)
+         call device_memcpy(work2, this%work2_d, ns, &
+                            HOST_TO_DEVICE, sync=.false.)
+         call this%gs_schwarz%op(work2, ns, GS_OP_ADD)
+         call device_memcpy(work2, this%work2_d, ns, &
+                            DEVICE_TO_HOST, sync=.false.)
       else
-         call this%gs_schwarz%op(work2, ns, GS_OP_ADD) 
+         call this%gs_schwarz%op(work2, ns, GS_OP_ADD)
       end if
       call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv)
       call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv)
@@ -200,13 +202,15 @@ subroutine schwarz_setup_wt(this)
       ! else
       call schwarz_toreg3d(work1, work2, Xh%lx, msh%nelv)
       ! endif
-      
+
       if (NEKO_BCKND_DEVICE .eq. 1) then
-         call device_memcpy(work1, this%work1_d, n, HOST_TO_DEVICE)
-         call this%gs_h%op(work1, n, GS_OP_ADD) 
-         call device_memcpy(work1, this%work1_d, n, DEVICE_TO_HOST, sync=.true.)
+         call device_memcpy(work1, this%work1_d, n, &
+                            HOST_TO_DEVICE, sync=.false.)
+         call this%gs_h%op(work1, n, GS_OP_ADD)
+         call device_memcpy(work1, this%work1_d, n, &
+                            DEVICE_TO_HOST, sync=.true.)
       else
-          call this%gs_h%op(work1, n, GS_OP_ADD) 
+         call this%gs_h%op(work1, n, GS_OP_ADD)
       end if
 
       k = 1
@@ -248,9 +252,9 @@ end subroutine schwarz_setup_schwarz_wt2d_2
   subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
     integer, intent(in) ::n, nelv, ie
     real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv)
-    real(kind=rp), intent(inout) :: work(n,n,n)      
+    real(kind=rp), intent(inout) :: work(n,n,n)
     integer :: i,j,k
-    
+
     do k = 1, n
        do j = 1, n
           wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k)
@@ -259,7 +263,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
           wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k)
        end do
     end do
-    
+
     do k = 1, n
        do i = 1, n
           wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k)
@@ -268,7 +272,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
           wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k)
        end do
     end do
-    
+
     do j = 1, n
        do i = 1, n
           wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1)
@@ -277,7 +281,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv)
           wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n)
        end do
     end do
-    
+
   end subroutine schwarz_setup_schwarz_wt3d_2
 
   !> convert array a from extended size to regular
@@ -315,7 +319,7 @@ subroutine schwarz_toext3d(a, b, n, nelv)
     end do
   end subroutine schwarz_toext3d
 
-  !> Sum values along rows l1, l2 with weights f1, f2 and store along row l1. 
+  !> Sum values along rows l1, l2 with weights f1, f2 and store along row l1.
   !! Helps us avoid complicated communcation to get neighbor values.
   !! Simply copy interesting values to the boundary and then do gs_op on extended array.
   subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
@@ -325,7 +329,7 @@ subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
     integer :: i, j, k, ie, i0, i1
     i0=2
     i1=nx-1
-    
+
     if(nz .eq. 1) then
        do ie = 1, nelv
           do j = i0, i1
@@ -370,7 +374,7 @@ subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv)
        end do
     endif
   end subroutine schwarz_extrude
-  
+
   subroutine schwarz_compute(this, e, r)
     class(schwarz_t), intent(inout) :: this
     real(kind=rp), dimension(this%dm%size()), intent(inout) :: e, r
@@ -381,80 +385,80 @@ subroutine schwarz_compute(this, e, r)
     associate(work1 => this%work1, work1_d => this%work1_d,&
               work2 => this%work2, work2_d => this%work2_d)
 
-    n  = this%dm%size()
-    enx=this%Xh_schwarz%lx
-    eny=this%Xh_schwarz%ly
-    enz=this%Xh_schwarz%lz
-    if(.not. this%msh%gdim .eq. 3) enz=1
-    ns = enx*eny*enz*this%msh%nelv
-    if (NEKO_BCKND_DEVICE .eq. 1) then
-       r_d = device_get_ptr(r)
-       e_d = device_get_ptr(e)
-       call device_event_record(this%event, glb_cmd_queue)
-       call device_stream_wait_event(aux_cmd_queue, this%event, 0)
-       call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
+      n  = this%dm%size()
+      enx=this%Xh_schwarz%lx
+      eny=this%Xh_schwarz%ly
+      enz=this%Xh_schwarz%lz
+      if(.not. this%msh%gdim .eq. 3) enz=1
+      ns = enx*eny*enz*this%msh%nelv
+      if (NEKO_BCKND_DEVICE .eq. 1) then
+         r_d = device_get_ptr(r)
+         e_d = device_get_ptr(e)
+         call device_event_record(this%event, glb_cmd_queue)
+         call device_stream_wait_event(aux_cmd_queue, this%event, 0)
+         call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, &
                                    this%msh%nelv, aux_cmd_queue)
-       call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
+         call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, &
                                    enx,eny,enz, this%msh%nelv,aux_cmd_queue)
 
-       this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
-       call this%gs_schwarz%op(work1, ns, GS_OP_ADD,this%event) 
-       call device_event_sync(this%event)
-       call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
+         this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue
+         call this%gs_schwarz%op(work1, ns, GS_OP_ADD,this%event)
+         call device_event_sync(this%event)
+         call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, &
                                    enx, eny, enz, this%msh%nelv, aux_cmd_queue)
-       
-       call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves
 
-       call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
+         call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves
+
+         call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, &
                                    enx, eny, enz, this%msh%nelv, aux_cmd_queue)
-       call this%gs_schwarz%op(work2, ns, GS_OP_ADD,this%event)
-       call device_event_sync(this%event)
+         call this%gs_schwarz%op(work2, ns, GS_OP_ADD,this%event)
+         call device_event_sync(this%event)
 
-       call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
+         call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, &
                                    enx, eny, enz, this%msh%nelv, aux_cmd_queue)
-       call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
+         call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, &
                                    enx, eny, enz, this%msh%nelv, aux_cmd_queue)
-       call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
+         call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, &
                                    this%msh%nelv, aux_cmd_queue)
 
-       call device_event_record(this%event,aux_cmd_queue)
-       call device_event_sync(this%event)
+         call device_event_record(this%event,aux_cmd_queue)
+         call device_event_sync(this%event)
 
-       call this%gs_h%op(e, n, GS_OP_ADD, this%event)
-       call bc_list_apply_scalar(this%bclst, e, n)
-       call device_col2(e_d,this%wt_d, n)
-       call device_stream_wait_event(aux_cmd_queue, this%event, 0)
-    else
-       call bc_list_apply_scalar(this%bclst, r, n)
-       call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
+         call this%gs_h%op(e, n, GS_OP_ADD, this%event)
+         call bc_list_apply_scalar(this%bclst, e, n)
+         call device_col2(e_d,this%wt_d, n)
+         call device_stream_wait_event(aux_cmd_queue, this%event, 0)
+      else
+         call bc_list_apply_scalar(this%bclst, r, n)
+         call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv)
 
-       !  exchange interior nodes
-       call schwarz_extrude(work1, 0, zero, work1, 2, one, &
+         !  exchange interior nodes
+         call schwarz_extrude(work1, 0, zero, work1, 2, one, &
                             enx, eny, enz, this%msh%nelv)
-       call this%gs_schwarz%op(work1, ns, GS_OP_ADD) 
-       call schwarz_extrude(work1, 0, one, work1, 2, -one, &
+         call this%gs_schwarz%op(work1, ns, GS_OP_ADD)
+         call schwarz_extrude(work1, 0, one, work1, 2, -one, &
                             enx, eny, enz, this%msh%nelv)
-       
-       call this%fdm%compute(work2, work1) ! do local solves
 
-       !   Sum overlap region (border excluded)
-       call schwarz_extrude(work1, 0, zero, work2, 0, one, &
+         call this%fdm%compute(work2, work1) ! do local solves
+
+         !   Sum overlap region (border excluded)
+         call schwarz_extrude(work1, 0, zero, work2, 0, one, &
                             enx, eny, enz, this%msh%nelv)
-       call this%gs_schwarz%op(work2, ns, GS_OP_ADD) 
-       call schwarz_extrude(work2, 0, one, work1, 0, -one, &
+         call this%gs_schwarz%op(work2, ns, GS_OP_ADD)
+         call schwarz_extrude(work2, 0, one, work1, 0, -one, &
                             enx, eny, enz, this%msh%nelv)
-       call schwarz_extrude(work2, 2, one, work2, 0, one, &
+         call schwarz_extrude(work2, 2, one, work2, 0, one, &
                             enx, eny, enz, this%msh%nelv)
 
-       call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
+         call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv)
 
-       ! sum border nodes
-       call this%gs_h%op(e, n, GS_OP_ADD) 
-       call bc_list_apply_scalar(this%bclst, e, n)
+         ! sum border nodes
+         call this%gs_h%op(e, n, GS_OP_ADD)
+         call bc_list_apply_scalar(this%bclst, e, n)
 
-       call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
-    end if
-  end associate
+         call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv)
+      end if
+    end associate
   end subroutine schwarz_compute
 
   !Apply schwarz weights along the boundary of each element.
diff --git a/src/math/signed_distance.f90 b/src/math/signed_distance.f90
new file mode 100644
index 00000000000..f9c18f9dabf
--- /dev/null
+++ b/src/math/signed_distance.f90
@@ -0,0 +1,431 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> @brief Module containing Signed Distance Functions.
+module signed_distance
+  use num_types, only: dp, rp
+  use field, only: field_t
+  use tri, only: tri_t
+  use tri_mesh, only: tri_mesh_t
+  use aabb_tree, only: aabb_tree_t
+
+  implicit none
+
+contains
+
+  !> @brief Signed distance field
+  !! @details This routine computes the signed distance field for a given
+  !! object.
+  !!
+  !! Currently supported objects are:
+  !! - Triangular mesh (tri_mesh_t)
+  !!
+  !! @param[in,out] field_data Field data
+  !! @param[in] object Object
+  !! @param[in,optional] max_distance Maximum distance outside the mesh
+  subroutine signed_distance_field(field_data, object, max_distance)
+    use utils, only: neko_error
+    implicit none
+
+    type(field_t), intent(inout) :: field_data
+    class(*), intent(in) :: object
+    real(kind=dp), intent(in), optional :: max_distance
+
+    real(kind=dp) :: max_dist
+
+    if (present(max_distance)) then
+       max_dist = max_distance
+    else
+       max_dist = huge(0.0_dp)
+    end if
+
+    select type(object)
+      type is (tri_mesh_t)
+       call signed_distance_field_tri_mesh(field_data, object, max_dist)
+
+      class default
+       call neko_error("signed_distance_field: Object type not supported.")
+    end select
+
+  end subroutine signed_distance_field
+
+  !> @brief Signed distance field for a triangular mesh
+  !! @details This routine computes the signed distance field for a given
+  !! triangular mesh. The algorithm utilizes an AABB tree to accelerate the
+  !! earch for potential elements. The signed distance is computed using the
+  !! brute force approach, where we compute the signed distance to each element
+  !! found through the AABB tree, and return the minimum distance.
+  !!
+  !! @param[in,out] field_data Field data
+  !! @param[in] mesh Triangular mesh
+  !! @param[in] max_distance Maximum distance outside the mesh
+  subroutine signed_distance_field_tri_mesh(field_data, mesh, max_distance)
+    use utils, only: neko_error
+    implicit none
+
+    type(field_t), intent(inout) :: field_data
+    type(tri_mesh_t), intent(in) :: mesh
+    real(kind=dp), intent(in) :: max_distance
+
+    integer :: total_size
+    integer :: id
+    type(aabb_tree_t) :: search_tree
+    real(kind=dp), dimension(3) :: p
+    real(kind=dp) :: distance
+
+    ! Zero the field
+    field_data%x = 0.0_dp
+    total_size = field_data%dof%size()
+
+    call search_tree%init(mesh%nelv)
+    call search_tree%build(mesh%el)
+
+    if (search_tree%get_size() .ne. mesh%nelv) then
+       call neko_error("signed_distance_field_tri_mesh: &
+         & Error building the search tree.")
+    end if
+
+    do id = 1, total_size
+       p(1) = field_data%dof%x(id, 1, 1, 1)
+       p(2) = field_data%dof%y(id, 1, 1, 1)
+       p(3) = field_data%dof%z(id, 1, 1, 1)
+
+       distance = tri_mesh_aabb_tree(search_tree, mesh%el, p, max_distance)
+
+       field_data%x(id, 1, 1, 1) = real(distance, kind=rp)
+    end do
+
+  end subroutine signed_distance_field_tri_mesh
+
+  !> @brief Signed distance function
+  !! @deprecated This routine is deprecated and will be removed in the future.
+  !! @details This routine computes the signed distance function for the
+  !! boundary mesh, to a given point (x, y, z). The algorithm is a
+  !! brute force approach, where we compute the signed distance to each
+  !! element in the mesh, and return the minimum distance.
+  !!
+  !! @param p Point
+  !! @param mesh Boundary mesh
+  !! @return Signed distance value
+  function tri_mesh_brute_force(mesh, p, max_distance) result(distance)
+    use tri, only: tri_t
+    use point, only: point_t
+    use num_types, only: dp
+
+    implicit none
+
+    type(tri_mesh_t), intent(in) :: mesh
+    real(kind=dp), intent(in) :: p(3)
+    real(kind=dp), intent(in) :: max_distance
+
+    integer :: id
+    real(kind=dp) :: distance, weighted_sign
+    real(kind=dp) :: cd, cs
+    real(kind=dp) :: tol = 1e-6_dp
+
+    distance = 1e10_dp
+    weighted_sign = 0.0_dp
+
+    do id = 1, mesh%nelv
+       call element_distance(mesh%el(id), p, cd, cs)
+
+       ! Update the weighted sign, if the relative difference is small
+       if (abs(cd - distance) / distance .lt. tol) then
+          weighted_sign = weighted_sign + cs
+       else if (cd .lt. distance) then
+          weighted_sign = cs
+       end if
+
+       distance = min(cd, distance)
+    end do
+
+    distance = sign(min(distance, max_distance), weighted_sign)
+
+  end function tri_mesh_brute_force
+
+  !> @brief Signed distance function using an AABB tree
+  !! @details This routine computes the signed distance function for the
+  !! boundary mesh, to a given point (x, y, z). The algorithm utilizes an
+  !! AABB tree to accelerate the search for potential elements. The signed
+  !! distance is computed using the brute force approach, where we compute the
+  !! signed distance to each element found through the AABB tree, and return
+  !! the minimum distance.
+  !!
+  !! @param tree AABB tree
+  !! @param object_list List of objects
+  !! @param p Point
+  !! @param max_distance Maximum distance outside the mesh
+  !! @return Signed distance value
+  function tri_mesh_aabb_tree(tree, object_list, p, max_distance) result(distance)
+    use aabb, only: aabb_t
+    use aabb_tree, only: aabb_node_t, AABB_NULL_NODE
+    use stack, only: stack_i4_t
+    implicit none
+
+    class(aabb_tree_t), intent(in) :: tree
+    class(tri_t), dimension(:), intent(in) :: object_list
+    real(kind=dp), dimension(3), intent(in) :: p
+    real(kind=dp), intent(in) :: max_distance
+
+    real(kind=dp) :: distance
+    real(kind=dp) :: weighted_sign
+
+    real(kind=dp), parameter :: tol = 1.0e-6_dp
+
+    type(stack_i4_t) :: simple_stack
+    integer :: current_index
+
+    type(aabb_node_t) :: current_node
+    type(aabb_t) :: current_aabb
+    integer :: current_object_index
+    real(kind=dp) :: current_distance
+    real(kind=dp) :: current_sign
+
+    type(aabb_node_t) :: left_node
+    type(aabb_node_t) :: right_node
+
+    type(aabb_t) :: root_box
+    type(aabb_t) :: search_box
+
+    integer :: root_index, left_index, right_index
+    real(kind=dp) :: random_value
+
+    ! Initialize the stack and the search box
+    call simple_stack%init(size(object_list) * 2)
+    call search_box%init(p - max_distance, p + max_distance)
+
+    ! Check if the root node overlaps the search box, if it does, push it to
+    ! the stack and update the search box to a randomly selected object.
+    root_index = tree%get_root_index()
+    root_box = tree%get_aabb(root_index)
+
+    if (.not. root_box%overlaps(search_box)) then
+       distance = max_distance
+       weighted_sign = 1.0_dp
+       return
+    end if
+
+    ! Grab a random object and compute the distance to it
+    call random_number(random_value)
+    current_object_index = floor(random_value * size(object_list) + 1)
+    call element_distance(object_list(current_object_index), p, distance, weighted_sign)
+    distance = distance + object_list(current_object_index)%diameter()
+
+    ! Update the search box to the new distance and push the root node
+    call search_box%init(p - distance, p + distance)
+    call simple_stack%push(root_index)
+
+    ! Traverse the tree and compute the signed distance to the elements
+    do while (.not. simple_stack%is_empty())
+       current_index = simple_stack%pop()
+       if (current_index .eq. AABB_NULL_NODE) cycle
+
+       current_node = tree%get_node(current_index)
+       current_aabb = current_node%get_aabb()
+
+       if (current_node%is_leaf()) then
+          if (distance .lt. current_node%min_distance(p)) then
+             cycle
+          end if
+
+          current_object_index = current_node%get_object_index()
+          call element_distance(object_list(current_object_index), p, current_distance, current_sign)
+
+          ! Update the weighted sign, if the relative difference is small
+          if (abs(current_distance - distance) / distance .lt. tol) then
+             weighted_sign = weighted_sign + current_sign
+          else if (current_distance .lt. distance) then
+             weighted_sign = current_sign
+          end if
+
+          distance = min(distance, current_distance)
+
+          ! Update the search box to the new distance
+          if (distance .gt. current_aabb%get_diameter()) then
+             call search_box%init(p - distance, p + distance)
+          end if
+       else
+
+          left_index = tree%get_left_index(current_index)
+          if (left_index .ne. AABB_NULL_NODE) then
+             left_node = tree%get_left_node(current_index)
+             if (left_node%aabb%overlaps(search_box)) then
+                call simple_stack%push(left_index)
+             end if
+          end if
+
+          right_index = tree%get_right_index(current_index)
+          if (right_index .ne. AABB_NULL_NODE) then
+             right_node = tree%get_right_node(current_index)
+             if (right_node%aabb%overlaps(search_box)) then
+                call simple_stack%push(right_index)
+             end if
+          end if
+       end if
+    end do
+
+    if (distance .gt. max_distance) then
+       distance = max_distance
+    end if
+    distance = sign(distance, weighted_sign)
+
+  end function tri_mesh_aabb_tree
+
+  ! ========================================================================== !
+  ! Element distance functions
+  ! ========================================================================== !
+
+  !> @brief Main interface for the signed distance function for an element.
+  subroutine element_distance(element, p, distance, weighted_sign)
+    class(*), intent(in) :: element
+    real(kind=dp), dimension(3), intent(in) :: p
+    real(kind=dp), intent(out) :: distance
+    real(kind=dp), intent(out), optional :: weighted_sign
+
+    select type(element)
+      type is (tri_t)
+       call element_distance_triangle(element, p, distance, weighted_sign)
+
+      class default
+       print *, "Error: Element type not supported."
+       stop
+    end select
+  end subroutine element_distance
+
+  ! -------------------------------------------------------------------------- !
+  ! Specific element distance functions
+
+  !> @brief Signed distance function for a triangle
+  !! @details This routine computes the signed distance function for the current
+  !! triangle. We compute the barycentric coordinate of the point projected onto
+  !! the triangle. If the projection is inside the triangle, the distance is the
+  !! distance to the plane. If the projection is outside the triangle, the
+  !! distance is the distance to the nearest edge or vertex.
+  !!
+  !! In order to improve precision of the sign estimation, we also compute the
+  !! weighted sign, which is the perpendicular distance to the plane divided by
+  !! the distance to the nearest point.
+  !!
+  !! @Note The returned distance is signed if the weighted_sign is not present.
+  !!
+  !! @param this Triangle
+  !! @param p Point
+  !! @return Distance value
+  !! @return[optional] Weighted sign
+  subroutine element_distance_triangle(triangle, p, distance, weighted_sign)
+    type(tri_t), intent(in) :: triangle
+    real(kind=dp), dimension(3), intent(in) :: p
+
+    real(kind=dp), intent(out) :: distance
+    real(kind=dp), intent(out), optional :: weighted_sign
+
+    real(kind=dp), dimension(3) :: v1, v2, v3
+    real(kind=dp), dimension(3) :: normal
+    real(kind=dp) :: normal_length
+    real(kind=dp) :: b1, b2, b3
+
+    real(kind=dp), dimension(3) :: projection
+    real(kind=dp), dimension(3) :: edge
+    real(kind=dp) :: tol = 1e-10_dp
+
+    real(kind=dp) :: face_distance
+    real(kind=dp) :: t
+
+    ! Get vertices and the normal vector
+    v1 = triangle%pts(1)%p%x
+    v2 = triangle%pts(2)%p%x
+    v3 = triangle%pts(3)%p%x
+
+    normal = cross(v2 - v1, v3 - v1)
+    normal_length = norm2(normal)
+
+    if (normal_length .lt. tol) then
+       distance = huge(1.0_dp)
+       weighted_sign = 0.0_dp
+       return
+    end if
+    normal = normal / normal_length
+
+    ! Compute Barycentric coordinates to determine if the point is inside the
+    ! triangular prism, of along an edge or by a face.
+    face_distance = dot_product(p - v1, normal)
+
+    projection = p - normal * face_distance
+    b1 = dot_product(normal, cross(v2 - v1, projection - v1)) / normal_length
+    b2 = dot_product(normal, cross(v3 - v2, projection - v2)) / normal_length
+    b3 = dot_product(normal, cross(v1 - v3, projection - v3)) / normal_length
+
+    if (b1 .le. tol) then
+       edge = v2 - v1
+       t = dot_product(p - v1, edge) / norm2(edge)
+       t = max(0.0_dp, min(1.0_dp, t))
+
+       projection = v1 + t * edge
+    else if (b2 .le. tol) then
+       edge = v3 - v2
+       t = dot_product(p - v2, edge) / norm2(edge)
+       t = max(0.0_dp, min(1.0_dp, t))
+
+       projection = v2 + t * edge
+    else if (b3 .le. tol) then
+       edge = v1 - v3
+       t = dot_product(p - v3, edge) / norm2(edge)
+       t = max(0.0_dp, min(1.0_dp, t))
+
+       projection = v3 + t * edge
+    end if
+
+    distance = norm2(projection - p)
+    if (present(weighted_sign)) then
+       weighted_sign = face_distance / distance
+    else
+       distance = sign(distance, face_distance)
+    end if
+
+  end subroutine element_distance_triangle
+
+  !> Compute cross product of two vectors
+  !> @param[in] a First vector
+  !> @param[in] b Second vector
+  !> @return Cross product \f$ a \times b \f$
+  pure function cross(a, b) result(c)
+    real(kind=dp), dimension(3), intent(in) :: a
+    real(kind=dp), dimension(3), intent(in) :: b
+    real(kind=dp), dimension(3) :: c
+
+    c(1) = a(2) * b(3) - a(3) * b(2)
+    c(2) = a(3) * b(1) - a(1) * b(3)
+    c(3) = a(1) * b(2) - a(2) * b(1)
+
+  end function cross
+
+end module signed_distance
diff --git a/src/math/tensor.f90 b/src/math/tensor.f90
index 5d8ad6292ce..79b0e01417d 100644
--- a/src/math/tensor.f90
+++ b/src/math/tensor.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Tensor operations.
@@ -79,7 +79,7 @@ module tensor
      module procedure triple_tensor_product_scalar, triple_tensor_product_vector
   end interface triple_tensor_product
 
-public :: tensr3, transpose, trsp, trsp1, &
+  public :: tensr3, transpose, trsp, trsp1, &
      tnsr2d_el, tnsr3d_el, tnsr3d, tnsr1_3d, addtnsr, &
      triple_tensor_product, tnsr3d_el_list
 
@@ -97,7 +97,7 @@ subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
     real(kind=rp), intent(inout) :: Bt(nu, nv)
     real(kind=rp), intent(inout) :: Ct(nu, nv)
     integer :: j, k, l, nunu, nvnv, nunv
-    
+
     nunu = nu**2
     nvnv = nv**2
     nunv = nu*nv
@@ -113,7 +113,7 @@ subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w)
        l = l + nvnv
     end do
     call mxm(w, nvnv, Ct, nu, v, nv)
-    
+
   end subroutine tensr3
 
   !> Transpose of a rectangular tensor \f$ A = B^T \f$.
@@ -129,7 +129,7 @@ subroutine trsp(a, lda, b, ldb)
           a(i, j) = b(j, i)
        end do
     end do
-    
+
   end subroutine trsp
 
   !> In-place transpose of a square tensor.
@@ -146,7 +146,7 @@ subroutine trsp1(a, n)
           a(j, i) = tmp
        end do
     end do
-    
+
   end subroutine trsp1
 
   !> Computes \f$ v = A u B^T \f$.
@@ -162,7 +162,7 @@ subroutine tnsr2d_el(v, nv, u, nu, A, Bt)
     else
        call tnsr2d_el_cpu(v, nv, u, nu, A, Bt)
     end if
-    
+
   end subroutine tnsr2d_el
 
   !> Tensor product \f$ v =(C \otimes B \otimes A) u \f$
@@ -179,7 +179,7 @@ subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
     else
        call tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct)
     end if
-    
+
   end subroutine tnsr3d_el
 
   !> Tensor product \f$ v =(C \otimes B \otimes A) u \f$
@@ -213,7 +213,7 @@ subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt)
           call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, A(1,1,i), Bt(1,1,i), Ct(1,1,i))
        end do
     end if
-    
+
   end subroutine tnsr3d_el_list
 
 
@@ -224,7 +224,7 @@ subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
     real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv)
     real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv)
     type(c_ptr) :: v_d, u_d, A_d, Bt_d, Ct_d
-   
+
 
     if (NEKO_BCKND_SX .eq. 1) then
        call tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv)
@@ -240,7 +240,7 @@ subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv)
     else
        call tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv)
     end if
-    
+
   end subroutine tnsr3d
 
   !> In place tensor product \f$ v =(C \otimes B \otimes A) v \f$.
@@ -264,11 +264,11 @@ end subroutine tnsr1_3d
   subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
 
     integer, intent(in) :: nx, ny, nz
-    real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz) 
+    real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz)
     real(kind=rp), intent(inout) ::  s(nx, ny, nz)
     real(kind=rp) :: hh
     integer :: ix, iy, iz
-  
+
     do iz = 1,nz
        do iy = 1,ny
           hh = h2(iy)*h3(iz)
@@ -277,7 +277,7 @@ subroutine addtnsr(s, h1, h2, h3, nx, ny, nz)
           end do
        end do
     end do
-    
+
   end subroutine addtnsr
 
   !> Computes the tensor product \f$ v =(H_t \otimes H_s \otimes H_r) u \f$.
diff --git a/src/math/vector.f90 b/src/math/vector.f90
index a927c2f637d..c02acaed82a 100644
--- a/src/math/vector.f90
+++ b/src/math/vector.f90
@@ -40,7 +40,7 @@ module vector
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   type, public ::  vector_t
      real(kind=rp), allocatable :: x(:) !< Vector entries.
      type(c_ptr) :: x_d = C_NULL_PTR    !< Device pointer.
@@ -57,7 +57,7 @@ module vector
 
   type, public :: vector_ptr_t
      type(vector_t), pointer :: v
-  end type
+  end type vector_ptr_t
 
 contains
 
@@ -70,14 +70,14 @@ subroutine vector_init(v, n)
 
     allocate(v%x(n))
     v%x = 0.0_rp
-   
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(v%x, v%x_d, n)
        call device_cfill(v%x_d, 0.0_rp, n)
     end if
 
     v%n = n
-    
+
   end subroutine vector_init
 
   !> Deallocate a vector.
@@ -93,14 +93,14 @@ subroutine vector_free(v)
     end if
 
     v%n = 0
-        
+
   end subroutine vector_free
 
   !> Return the number of entries in the vector.
   function vector_size(v) result(s)
     class(vector_t), intent(inout) :: v
     integer :: s
-    s = v%n    
+    s = v%n
   end function vector_size
 
   !> Assignment \f$ v = w \f$.
@@ -116,11 +116,11 @@ subroutine vector_assign_vector(v, w)
 
        v%n = w%n
        allocate(v%x(v%n))
-       
+
        if (NEKO_BCKND_DEVICE .eq. 1) then
           call device_map(v%x, v%x_d, v%n)
        end if
-       
+
     end if
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
@@ -148,5 +148,5 @@ subroutine vector_assign_scalar(v, s)
 
   end subroutine vector_assign_scalar
 
-  
+
 end module vector
diff --git a/src/mesh/aabb_tree.f90 b/src/mesh/aabb_tree.f90
new file mode 100644
index 00000000000..f7a12c0e6c4
--- /dev/null
+++ b/src/mesh/aabb_tree.f90
@@ -0,0 +1,928 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+! ============================================================================ !
+! Original C++ Implementation from:
+! https://github.com/JamesRandall/SimpleVoxelEngine/blob/master/voxelEngine/src/AABBTree.h
+!
+! Translated to Fortran by:
+! @author Tim Felle Olsen
+! @date 9 Feb 2024
+!
+! C++ Code License:
+! The MIT License (MIT)
+!
+! Copyright (c) 2017 James Randall
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy of
+! this software and associated documentation files (the "Software"), to deal in
+! the Software without restriction, including without limitation the rights to
+! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+! the Software, and to permit persons to whom the Software is furnished to do so,
+! subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in all
+! copies or substantial portions of the Software.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+! ============================================================================ !
+
+!> @brief Axis Aligned Bounding Box (aabb) Tree data structure.
+!! @details
+!! This is a Fortran implementation of an Axis Aligned Bounding Box Tree
+!! data structure.
+!! The purpose of this is to accelerate a Signed Distance Function and other
+!! spatial computations.
+module aabb_tree
+  use aabb
+  use tri, only: tri_t
+  use num_types, only: rp, dp
+
+  implicit none
+  private
+
+  integer, parameter, public :: AABB_NULL_NODE = -1
+
+  ! ========================================================================== !
+  ! Type definitions
+  ! ========================================================================== !
+
+  !> @brief Node type for the Axis Aligned Bounding Box (aabb) Tree
+  type, public :: aabb_node_t
+     private
+     type(aabb_t), public :: aabb
+     integer :: object_index = -1
+
+     ! tree links
+     integer :: parent_node_index = AABB_NULL_NODE
+     integer :: left_node_index = AABB_NULL_NODE
+     integer :: right_node_index = AABB_NULL_NODE
+
+     ! node linked list link
+     integer :: next_node_index = AABB_NULL_NODE
+
+   contains
+     procedure, pass(this), public :: init => aabb_node_init
+
+     ! Getters
+     procedure, pass(this), public :: get_aabb => aabb_node_get_aabb
+     procedure, pass(this), public :: get_object_index => aabb_node_get_object_index
+     procedure, pass(this), public :: get_parent_index => aabb_node_get_parent_index
+     procedure, pass(this), public :: get_left_index => aabb_node_get_left_index
+     procedure, pass(this), public :: get_right_index => aabb_node_get_right_index
+
+     ! Unary operations
+     procedure, pass(this), public :: min_distance => aabb_node_min_distance
+
+     ! Boolean operators
+     procedure, pass(this), public :: is_leaf => aabb_node_is_leaf
+     procedure, pass(this), public :: is_valid => aabb_node_is_valid
+
+     ! Comparison operators
+     generic :: operator(.lt.) => less
+     generic :: operator(.gt.) => greater
+
+     procedure, pass(this) :: less => aabb_node_less
+     procedure, pass(this) :: greater => aabb_node_greater
+
+  end type aabb_node_t
+
+  !> @brief Axis Aligned Bounding Box (aabb) Tree
+  type, public :: aabb_tree_t
+     private
+     type(aabb_node_t), allocatable :: nodes(:)
+     integer :: root_node_index = AABB_NULL_NODE
+     integer :: allocated_node_count = 0
+     integer :: next_free_node_index = AABB_NULL_NODE
+     integer :: node_capacity = 0
+     integer :: growth_size = 1
+
+   contains
+
+     ! Initializers
+     procedure, pass(this), public :: init => aabb_tree_init
+     procedure, pass(this), public :: build => aabb_tree_build_tree
+     procedure, pass(this), public :: insert_object => aabb_tree_insert_object
+
+     ! Getters
+     procedure, pass(this), public :: get_size => aabb_tree_get_size
+
+     procedure, pass(this), public :: get_root_index => aabb_tree_get_root_index
+     procedure, pass(this), public :: get_parent_index => aabb_tree_get_parent_index
+     procedure, pass(this), public :: get_left_index => aabb_tree_get_left_index
+     procedure, pass(this), public :: get_right_index => aabb_tree_get_right_index
+
+     procedure, pass(this), public :: get_node => aabb_tree_get_node
+     procedure, pass(this), public :: get_root_node => aabb_tree_get_root_node
+     procedure, pass(this), public :: get_parent_node => aabb_tree_get_parent_node
+     procedure, pass(this), public :: get_left_node => aabb_tree_get_left_node
+     procedure, pass(this), public :: get_right_node => aabb_tree_get_right_node
+
+     procedure, pass(this), public :: get_aabb => aabb_tree_get_aabb
+
+     procedure, pass(this), public :: query_overlaps => aabb_tree_query_overlaps
+
+     procedure, pass(this), public :: print => aabb_tree_print
+
+     ! ----------------------------------------------------------------------- !
+     ! Internal methods
+
+     procedure, pass(this) :: allocate_node => aabb_tree_allocate_node
+     procedure, pass(this) :: deallocate_node => aabb_tree_deallocate_node
+     procedure, pass(this) :: resize_node_pool => aabb_tree_resize_node_pool
+     procedure, pass(this) :: insert_leaf => aabb_tree_insert_leaf
+
+     procedure, pass(this) :: fix_upwards_tree => aabb_tree_fix_upwards_tree
+
+     procedure, pass(this) :: valid_tree => aabb_tree_valid_tree
+
+  end type aabb_tree_t
+
+contains
+
+  ! ========================================================================== !
+  ! Definitions of node methods
+  ! ========================================================================== !
+
+  !> @brief Initializes the AABB node.
+  subroutine aabb_node_init(this)
+    class(aabb_node_t), intent(inout) :: this
+
+    this%object_index = -1
+    this%parent_node_index = AABB_NULL_NODE
+    this%left_node_index = AABB_NULL_NODE
+    this%right_node_index = AABB_NULL_NODE
+    this%next_node_index = AABB_NULL_NODE
+  end subroutine aabb_node_init
+
+  ! -------------------------------------------------------------------------- !
+  ! Getters
+
+  !> @brief Returns the Axis Aligned Bounding Box (aabb) of the node.
+  pure function aabb_node_get_aabb(this) result(res)
+    class(aabb_node_t), intent(in) :: this
+    type(aabb_t) :: res
+
+    res = this%aabb
+  end function aabb_node_get_aabb
+
+  !> @brief Returns the object index of the node.
+  pure function aabb_node_get_object_index(this) result(object_index)
+    class(aabb_node_t), intent(in) :: this
+    integer :: object_index
+
+    object_index = this%object_index
+  end function aabb_node_get_object_index
+
+  !> @brief Returns the parent index of the node.
+  pure function aabb_node_get_parent_index(this) result(parent_index)
+    class(aabb_node_t), intent(in) :: this
+    integer :: parent_index
+
+    parent_index = this%parent_node_index
+  end function aabb_node_get_parent_index
+
+  !> @brief Returns the left index of the node.
+  pure function aabb_node_get_left_index(this) result(left_index)
+    class(aabb_node_t), intent(in) :: this
+    integer :: left_index
+
+    left_index = this%left_node_index
+  end function aabb_node_get_left_index
+
+  !> @brief Returns the right index of the node.
+  pure function aabb_node_get_right_index(this) result(right_index)
+    class(aabb_node_t), intent(in) :: this
+    integer :: right_index
+
+    right_index = this%right_node_index
+  end function aabb_node_get_right_index
+
+  !> @brief Get the minimum possible distance from the aabb to a point.
+  function aabb_node_min_distance(this, p) result(distance)
+    class(aabb_node_t), intent(in) :: this
+    real(kind=dp), dimension(3), intent(in) :: p
+    real(kind=dp) :: distance
+
+    distance = 0.5_rp * this%aabb%get_diameter() &
+      - norm2(this%aabb%get_center() - p)
+  end function aabb_node_min_distance
+
+  ! -------------------------------------------------------------------------- !
+  ! Boolean operators
+
+  !> @brief Returns true if the node is a leaf node.
+  pure function aabb_node_is_leaf(this) result(res)
+    class(aabb_node_t), intent(in) :: this
+    logical :: res
+
+    res = this%left_node_index == AABB_NULL_NODE .and. &
+      this%right_node_index == AABB_NULL_NODE
+  end function aabb_node_is_leaf
+
+  !> @brief Returns true if the node is a valid node.
+  pure function aabb_node_is_valid(this) result(valid)
+    class(aabb_node_t), intent(in) :: this
+    logical :: valid
+
+    if (this%is_leaf()) then
+       valid = &
+         & this%left_node_index .eq. AABB_NULL_NODE .and. &
+         & this%right_node_index .eq. AABB_NULL_NODE .and. &
+         & this%object_index .gt. 0
+    else
+       valid = &
+         & this%left_node_index .ne. AABB_NULL_NODE .and. &
+         & this%right_node_index .ne. AABB_NULL_NODE .and. &
+         & this%object_index .eq. -1
+    end if
+
+  end function aabb_node_is_valid
+
+  ! -------------------------------------------------------------------------- !
+  ! Comparison operators
+
+  !> @brief Returns true if the node is less than the other node.
+  pure function aabb_node_less(this, other) result(res)
+    class(aabb_node_t), intent(in) :: this
+    class(aabb_node_t), intent(in) :: other
+    logical :: res
+
+    res = this%aabb .lt. other%aabb
+
+  end function aabb_node_less
+
+  !> @brief Returns true if the node is greater than the other node.
+  pure function aabb_node_greater(this, other) result(res)
+    class(aabb_node_t), intent(in) :: this
+    class(aabb_node_t), intent(in) :: other
+    logical :: res
+
+    res = this%aabb .gt. other%aabb
+
+  end function aabb_node_greater
+
+  ! ========================================================================== !
+  ! Definitions of tree methods
+  ! ========================================================================== !
+
+  !> @brief Initializes the AABB tree.
+  subroutine aabb_tree_init(this, initial_capacity)
+    class(aabb_tree_t), intent(inout) :: this
+    integer, intent(in) :: initial_capacity
+
+    integer :: i
+
+    this%root_node_index = AABB_NULL_NODE
+    this%allocated_node_count = 0
+    this%next_free_node_index = 1
+    this%node_capacity = initial_capacity
+    this%growth_size = initial_capacity
+
+    if (allocated(this%nodes)) deallocate(this%nodes)
+    allocate(this%nodes(initial_capacity))
+
+    do i = 1, initial_capacity
+       this%nodes(i)%next_node_index = i + 1
+    end do
+    this%nodes(initial_capacity)%next_node_index = AABB_NULL_NODE
+  end subroutine aabb_tree_init
+
+  !> @brief Builds the tree.
+  subroutine aabb_tree_build_tree(this, objects)
+    use utils, only: neko_error
+    implicit none
+
+    class(aabb_tree_t), intent(inout) :: this
+    class(*), dimension(:), intent(in) :: objects
+
+    integer :: i_obj, i_node, i
+    logical :: done
+
+    integer :: start_layer, end_layer
+
+    type(aabb_t), dimension(:), allocatable :: box_list
+    integer, dimension(:), allocatable :: sorted_indices
+
+    call this%init(size(objects) * 2)
+
+    ! ------------------------------------------------------------------------ !
+    ! Start by sorting the list of objects, then build a balanced binary tree
+    ! from the sorted list
+
+    allocate(box_list(size(objects)))
+
+    do i_obj = 1, size(objects)
+       box_list(i_obj) = get_aabb(objects(i_obj))
+    end do
+    sorted_indices = sort(box_list)
+
+    do i = 1, size(sorted_indices)
+       i_obj = sorted_indices(i)
+       i_node = this%allocate_node()
+       this%nodes(i_node)%aabb = get_aabb(objects(i_obj))
+       this%nodes(i_node)%object_index = i_obj
+    end do
+
+
+    start_layer = 1
+    end_layer = size(objects)
+    done = .false.
+    do while (.not. done)
+
+       ! build the next layer
+       do i = start_layer, end_layer - 1, 2
+          i_node = this%allocate_node()
+
+          this%nodes(i_node)%aabb = merge(this%nodes(i)%aabb, this%nodes(i + 1)%aabb)
+
+          this%nodes(i_node)%left_node_index = i
+          this%nodes(i_node)%right_node_index = i + 1
+
+          this%nodes(i)%parent_node_index = i_node
+          this%nodes(i + 1)%parent_node_index = i_node
+       end do
+
+       ! if the number of nodes is odd, we need to create a new node to hold the
+       ! last node
+       if (mod(end_layer - start_layer, 2) .eq. 0) then
+          i_node = this%allocate_node()
+          this%nodes(i_node)%aabb = this%nodes(end_layer)%aabb
+          this%nodes(i_node)%left_node_index = end_layer
+          this%nodes(i_node)%right_node_index = AABB_NULL_NODE
+
+          this%nodes(end_layer)%parent_node_index = i_node
+       end if
+
+       ! move to the next layer
+       start_layer = end_layer + 1
+       end_layer = this%allocated_node_count
+
+       ! If there is only one node left, we are done
+       done = start_layer .eq. end_layer
+    end do
+
+    ! The last node allocated is the root node
+    this%root_node_index = this%allocated_node_count
+
+    if (this%get_size() .ne. size(objects)) then
+       print *, "this%get_size() = ", this%get_size()
+       print *, "size(objects) = ", size(objects)
+       call neko_error("Invalid tree size")
+    end if
+
+  end subroutine aabb_tree_build_tree
+
+  function sort(array) result(indices)
+    type(aabb_t), dimension(:), intent(in) :: array
+    integer, dimension(:), allocatable :: indices
+    logical, dimension(:), allocatable :: visited
+
+    integer :: i, imin
+    integer :: minidx
+
+    allocate(indices(size(array)))
+    allocate(visited(size(array)))
+
+    visited = .false.
+    indices = 0
+    do i = 1, size(array)
+       minidx = -1
+       do imin = 1, size(array)
+          if (.not. visited(imin) .and. minidx .eq. -1) minidx = imin
+
+          if (visited(imin) .and. array(imin) .lt. array(minidx)) minidx = imin
+       end do
+
+       indices(i) = minidx
+       visited(minidx) = .true.
+    end do
+
+  end function sort
+
+  ! -------------------------------------------------------------------------- !
+  ! Getters
+
+  !> @brief Returns the size of the tree, in number of leaves.
+  function aabb_tree_get_size(this) result(size)
+    use stack, only: stack_i4_t
+    use utils, only: neko_error
+    class(aabb_tree_t), intent(in) :: this
+    integer :: size
+
+    type(stack_i4_t) :: simple_stack
+    integer :: idx, tmp
+
+    call simple_stack%init(this%allocated_node_count)
+    size = 0
+    tmp = this%get_root_index()
+    if (tmp .ne. AABB_NULL_NODE) then
+       call simple_stack%push(tmp)
+    end if
+
+    do while (.not. simple_stack%is_empty())
+       idx = simple_stack%pop()
+       if (idx .eq. AABB_NULL_NODE) cycle
+
+       if (this%nodes(idx)%is_leaf()) then
+          size = size + 1
+       else
+          tmp = this%get_left_index(idx)
+          call simple_stack%push(tmp)
+          tmp = this%get_right_index(idx)
+          call simple_stack%push(tmp)
+       end if
+    end do
+
+  end function aabb_tree_get_size
+
+  ! -------------------------------------------------------------------------- !
+  ! Get index of nodes
+
+  !> @brief Returns the index of the root node.
+  pure function aabb_tree_get_root_index(this) result(root_index)
+    class(aabb_tree_t), intent(in) :: this
+    integer :: root_index
+
+    root_index = this%root_node_index
+  end function aabb_tree_get_root_index
+
+  !> @brief Returns the index of the parent node of the node at the given index.
+  pure function aabb_tree_get_parent_index(this, node_index) result(parent_index)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    integer :: parent_index
+
+    parent_index = this%nodes(node_index)%parent_node_index
+  end function aabb_tree_get_parent_index
+
+  !> @brief Returns the index of the left node of the node at the given index.
+  pure function aabb_tree_get_left_index(this, node_index) result(left_index)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    integer :: left_index
+
+    left_index = this%nodes(node_index)%left_node_index
+  end function aabb_tree_get_left_index
+
+  !> @brief Returns the index of the right node of the node at the given index.
+  pure function aabb_tree_get_right_index(this, node_index) result(right_index)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    integer :: right_index
+
+    right_index = this%nodes(node_index)%right_node_index
+  end function aabb_tree_get_right_index
+
+  ! -------------------------------------------------------------------------- !
+  ! Get nodes
+
+  !> @brief Returns the node at the given index.
+  pure function aabb_tree_get_node(this, node_index) result(node)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    type(aabb_node_t) :: node
+
+    node = this%nodes(node_index)
+  end function aabb_tree_get_node
+
+  !> @brief Returns the root node of the tree.
+  pure function aabb_tree_get_root_node(this) result(root_node)
+    class(aabb_tree_t), intent(in) :: this
+    type(aabb_node_t) :: root_node
+
+    root_node = this%nodes(this%root_node_index)
+  end function aabb_tree_get_root_node
+
+  !> @brief Returns the parent node of the node at the given index.
+  pure function aabb_tree_get_parent_node(this, node_index) result(parent_node)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    type(aabb_node_t) :: parent_node
+
+    parent_node = this%nodes(this%nodes(node_index)%parent_node_index)
+  end function aabb_tree_get_parent_node
+
+  !> @brief Returns the left node of the node at the given index.
+  pure function aabb_tree_get_left_node(this, node_index) result(left_node)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    type(aabb_node_t) :: left_node
+
+    left_node = this%nodes(this%nodes(node_index)%left_node_index)
+  end function aabb_tree_get_left_node
+
+  !> @brief Returns the right node of the node at the given index.
+  pure function aabb_tree_get_right_node(this, node_index) result(right_node)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    type(aabb_node_t) :: right_node
+
+    right_node = this%nodes(this%nodes(node_index)%right_node_index)
+  end function aabb_tree_get_right_node
+
+  pure function aabb_tree_get_aabb(this, node_index) result(out_box)
+    class(aabb_tree_t), intent(in) :: this
+    integer, intent(in) :: node_index
+    type(aabb_t) :: out_box
+
+    out_box = this%nodes(node_index)%aabb
+  end function aabb_tree_get_aabb
+
+  ! -------------------------------------------------------------------------- !
+
+  !> @brief Inserts an object into the tree.
+  subroutine aabb_tree_insert_object(this, object, object_index)
+    class(aabb_tree_t), intent(inout) :: this
+    class(*), intent(in) :: object
+    integer, intent(in) :: object_index
+
+    integer :: node_index
+
+    node_index = this%allocate_node()
+    this%nodes(node_index)%aabb = get_aabb(object)
+    this%nodes(node_index)%object_index = object_index
+
+    call this%insert_leaf(node_index)
+  end subroutine aabb_tree_insert_object
+
+  !> @brief Queries the tree for overlapping objects.
+  subroutine aabb_tree_query_overlaps(this, object, object_index, overlaps)
+    use stack, only: stack_i4_t
+    implicit none
+
+    class(aabb_tree_t), intent(in) :: this
+    class(*), intent(in) :: object
+    integer, intent(in) :: object_index
+    integer, intent(out) :: overlaps(:)
+
+    type(stack_i4_t) :: simple_stack
+    type(aabb_t) :: object_box
+
+    integer :: root_index, left_index, right_index
+
+    integer :: node_index
+
+    object_box = get_aabb(object)
+    root_index = this%get_root_index()
+
+    call simple_stack%push(root_index)
+
+    do while (.not. simple_stack%is_empty())
+       node_index = simple_stack%pop()
+
+       if (node_index == AABB_NULL_NODE) cycle
+
+       if (this%nodes(node_index)%aabb%overlaps(object_box)) then
+          if (this%nodes(node_index)%is_leaf()) then
+             if (.not. this%nodes(node_index)%object_index == object_index) then
+                overlaps = [this%nodes(node_index)%object_index, overlaps]
+             end if
+          else
+             left_index = this%get_left_index(node_index)
+             call simple_stack%push(left_index)
+             right_index = this%get_right_index(node_index)
+             call simple_stack%push(right_index)
+          end if
+       end if
+    end do
+  end subroutine aabb_tree_query_overlaps
+
+  ! -------------------------------------------------------------------------- !
+  ! Internal methods
+
+  !> @brief Allocates a new node in the tree.
+  function aabb_tree_allocate_node(this) result(node_index)
+    class(aabb_tree_t), intent(inout) :: this
+    integer :: node_index
+
+    if (this%next_free_node_index == AABB_NULL_NODE) then
+       call this%resize_node_pool(this%node_capacity + this%growth_size)
+    end if
+
+    node_index = this%next_free_node_index
+
+    associate(new_node => this%nodes(node_index))
+      this%next_free_node_index = new_node%next_node_index
+
+      new_node%parent_node_index = AABB_NULL_NODE
+      new_node%left_node_index = AABB_NULL_NODE
+      new_node%right_node_index = AABB_NULL_NODE
+
+      this%next_free_node_index = new_node%next_node_index
+      this%allocated_node_count = this%allocated_node_count + 1
+
+    end associate
+  end function aabb_tree_allocate_node
+
+  !> @brief Deallocates a node in the tree.
+  subroutine aabb_tree_deallocate_node(this, node_index)
+    class(aabb_tree_t), intent(inout) :: this
+    integer, intent(in) :: node_index
+
+    this%nodes(node_index)%next_node_index = this%next_free_node_index
+    this%next_free_node_index = node_index
+    this%allocated_node_count = this%allocated_node_count - 1
+  end subroutine aabb_tree_deallocate_node
+
+  !> @brief Inserts a leaf into the tree.
+  subroutine aabb_tree_insert_leaf(this, leaf_node_index)
+    class(aabb_tree_t), intent(inout) :: this
+    integer, intent(in) :: leaf_node_index
+
+    integer :: tree_node_index
+
+    real(kind=rp) :: cost_left
+    real(kind=rp) :: cost_right
+
+    type(aabb_node_t) :: leaf_node
+    type(aabb_node_t) :: tree_node
+    type(aabb_node_t) :: left_node
+    type(aabb_node_t) :: right_node
+
+    type(aabb_t) :: combined_aabb
+    real(kind=rp) :: new_parent_node_cost
+    real(kind=rp) :: minimum_push_down_cost
+    type(aabb_t) :: new_left_aabb
+    type(aabb_t) :: new_right_aabb
+    integer :: leaf_sibling_index
+    type(aabb_node_t) :: leaf_sibling
+    integer :: old_parent_index
+    integer :: new_parent_index
+    type(aabb_node_t) :: new_parent
+    type(aabb_node_t) :: old_parent
+
+    ! make sure were inserting a new leaf
+    leaf_node = this%nodes(leaf_node_index)
+
+    ! if the tree is empty then we make the root the leaf
+    if (this%root_node_index .eq. AABB_NULL_NODE) then
+       this%root_node_index = leaf_node_index
+       leaf_node%parent_node_index = AABB_NULL_NODE
+       leaf_node%left_node_index = AABB_NULL_NODE
+       leaf_node%right_node_index = AABB_NULL_NODE
+
+       return
+    end if
+
+    ! search for the best place to put the new leaf in the tree
+    ! we use surface area and depth as search heuristics
+    tree_node_index = this%root_node_index
+    tree_node = this%get_node(tree_node_index)
+    do while (.not. tree_node%is_leaf())
+
+       ! because of the test in the while loop above we know we are never a
+       ! leaf inside it
+       left_node = this%get_left_node(tree_node_index)
+       right_node = this%get_right_node(tree_node_index)
+
+       ! ------------------------------------------------------------------- !
+
+       combined_aabb = merge(tree_node%aabb, leaf_node%get_aabb())
+
+       new_parent_node_cost = 2.0_rp * combined_aabb%get_surface_area()
+       minimum_push_down_cost = 2.0_rp * ( &
+         & combined_aabb%get_surface_area() &
+         & - tree_node%aabb%get_surface_area()&
+         & )
+
+       ! use the costs to figure out whether to create a new parent here or
+       ! descend
+       if (left_node%is_leaf()) then
+          new_left_aabb = merge(leaf_node%aabb, left_node%get_aabb())
+          cost_left = new_left_aabb%get_surface_area() + minimum_push_down_cost
+       else
+          new_left_aabb = merge(leaf_node%aabb, left_node%get_aabb())
+          cost_left = ( &
+            & new_left_aabb%get_surface_area() &
+            & - left_node%aabb%get_surface_area()&
+            & ) + minimum_push_down_cost
+       end if
+
+       if (right_node%is_leaf()) then
+          new_right_aabb = merge(leaf_node%aabb, right_node%aabb)
+          cost_right = new_right_aabb%get_surface_area() + minimum_push_down_cost
+       else
+          new_right_aabb = merge(leaf_node%aabb, right_node%aabb)
+          cost_right = ( &
+            & new_right_aabb%get_surface_area() &
+            & - right_node%aabb%get_surface_area() &
+            & ) + minimum_push_down_cost
+       end if
+
+       ! if the cost of creating a new parent node here is less than descending
+       ! in either direction then we know we need to create a new parent node,
+       ! errrr, here and attach the leaf to that
+       if (new_parent_node_cost < cost_left .and. new_parent_node_cost < cost_right) then
+          exit
+       end if
+
+       ! otherwise descend in the cheapest direction
+       if (cost_left .lt. cost_right) then
+          tree_node_index = tree_node%get_left_index()
+       else
+          tree_node_index = tree_node%get_right_index()
+       end if
+
+       ! ------------------------------------------------------------------- !
+       ! Update the node and continue the loop
+       tree_node = this%get_node(tree_node_index)
+    end do
+
+    ! the leafs sibling is going to be the node we found above and we are
+    ! going to create a new parent node and attach the leaf and this item
+    leaf_sibling_index = tree_node_index
+    leaf_sibling = this%nodes(leaf_sibling_index)
+    old_parent_index = this%get_parent_index(leaf_sibling_index)
+    new_parent_index = this%allocate_node()
+    new_parent = this%nodes(new_parent_index)
+    new_parent%parent_node_index = old_parent_index
+    new_parent%aabb = merge(leaf_node%aabb, leaf_sibling%aabb)
+
+    if (leaf_node .lt. leaf_sibling) then
+       new_parent%left_node_index = leaf_node_index
+       new_parent%right_node_index = leaf_sibling_index
+    else
+       new_parent%left_node_index = leaf_sibling_index
+       new_parent%right_node_index = leaf_node_index
+    end if
+
+    leaf_node%parent_node_index = new_parent_index
+    leaf_sibling%parent_node_index = new_parent_index
+
+    if (old_parent_index .eq. AABB_NULL_NODE) then
+       ! the old parent was the root and so this is now the root
+       this%root_node_index = new_parent_index
+    else
+       ! the old parent was not the root and so we need to patch the left or
+       ! right index to point to the new node
+       old_parent = this%nodes(old_parent_index)
+       if (old_parent%left_node_index .eq. leaf_sibling_index) then
+          old_parent%left_node_index = new_parent_index
+       else
+          old_parent%right_node_index = new_parent_index
+       end if
+       this%nodes(old_parent_index) = old_parent
+    end if
+
+    this%nodes(leaf_node_index) = leaf_node
+    this%nodes(leaf_sibling_index) = leaf_sibling
+    this%nodes(new_parent_index) = new_parent
+
+    ! finally we need to walk back up the tree fixing heights and areas
+    tree_node_index = leaf_node%parent_node_index
+
+    call this%fix_upwards_tree(tree_node_index)
+
+  end subroutine aabb_tree_insert_leaf
+
+  !> @brief Validates the tree.
+  function aabb_tree_valid_tree(this) result(valid)
+    use stack, only: stack_i4_t
+    implicit none
+
+    class(aabb_tree_t), intent(in) :: this
+    logical :: valid
+
+    type(stack_i4_t) :: simple_stack
+    integer :: current_index
+    integer :: root_index, left_index, right_index
+
+    valid = .true.
+    if (this%root_node_index .eq. AABB_NULL_NODE) then
+       valid = .false.
+    end if
+
+    root_index = this%get_root_index()
+
+    call simple_stack%init(this%node_capacity)
+    call simple_stack%push(root_index)
+
+    do while (.not. simple_stack%is_empty())
+       current_index = simple_stack%pop()
+       if (current_index == AABB_NULL_NODE) cycle
+
+       valid = valid .and. this%nodes(current_index)%is_valid()
+
+       if (.not. this%nodes(current_index)%is_leaf()) then
+          left_index = this%get_left_index(current_index)
+          right_index = this%get_right_index(current_index)
+
+          call simple_stack%push(left_index)
+          call simple_stack%push(right_index)
+       end if
+    end do
+  end function aabb_tree_valid_tree
+
+  !> @brief Fixes the tree upwards.
+  !! @details This method is used to fix the tree upwards after an insertion.
+  !! It is used to expand the nodes of the tree to fit the new leaf node.
+  subroutine aabb_tree_fix_upwards_tree(this, tree_start_index)
+    class(aabb_tree_t), intent(inout) :: this
+    integer, intent(in) :: tree_start_index
+
+    type(aabb_node_t) :: left_node
+    type(aabb_node_t) :: right_node
+    integer :: tree_node_index
+
+    tree_node_index = tree_start_index
+    do while (tree_node_index .ne. AABB_NULL_NODE)
+       left_node = this%get_left_node(tree_node_index)
+       right_node = this%get_right_node(tree_node_index)
+
+       this%nodes(tree_node_index)%aabb = merge(left_node%aabb, right_node%aabb)
+
+       tree_node_index = this%get_parent_index(tree_node_index)
+    end do
+  end subroutine aabb_tree_fix_upwards_tree
+
+  !> @brief Prints the tree.
+  subroutine aabb_tree_print(this)
+    use stack, only: stack_i4_t
+    class(aabb_tree_t), intent(inout) :: this
+    type(stack_i4_t) :: simple_stack
+
+    integer :: current_index
+    integer :: root_index, left_index, right_index
+
+    root_index = this%get_root_index()
+    call simple_stack%init(this%node_capacity)
+    call simple_stack%push(root_index)
+
+    do while (.not. simple_stack%is_empty())
+       current_index = simple_stack%pop()
+       if (current_index .eq. AABB_NULL_NODE) cycle
+
+       left_index = this%get_left_index(current_index)
+       right_index = this%get_right_index(current_index)
+
+       call simple_stack%push(this%nodes(current_index)%left_node_index)
+       call simple_stack%push(this%nodes(current_index)%right_node_index)
+
+       write(*, *) "i = ", current_index
+       write(*, *) "  Parent  : ", this%get_parent_index(current_index)
+       write(*, *) "  Children: ", this%get_left_index(current_index), this%get_right_index(current_index)
+
+       write(*, *) "  object_index = ", this%nodes(current_index)%object_index
+    end do
+
+  end subroutine aabb_tree_print
+
+  !> @brief Resizes the node pool.
+  subroutine aabb_tree_resize_node_pool(this, new_capacity)
+    class(aabb_tree_t), intent(inout) :: this
+    integer, intent(in) :: new_capacity
+
+    type(aabb_node_t), dimension(:), allocatable :: temp
+    integer :: i
+
+    allocate(temp(new_capacity))
+    temp(:this%node_capacity) = this%nodes(:this%node_capacity)
+
+    do i = this%allocated_node_count, new_capacity
+       temp(i)%next_node_index = i + 1
+    end do
+    temp(new_capacity)%next_node_index = AABB_NULL_NODE
+
+    call move_alloc(temp, this%nodes)
+
+    this%node_capacity = new_capacity
+    this%next_free_node_index = this%allocated_node_count + 1
+
+  end subroutine aabb_tree_resize_node_pool
+
+end module aabb_tree
diff --git a/src/mesh/curve.f90 b/src/mesh/curve.f90
index 6d013350468..b76f034ec23 100644
--- a/src/mesh/curve.f90
+++ b/src/mesh/curve.f90
@@ -21,7 +21,7 @@ module curve
      procedure, pass(z) :: add_element => curve_element_add
 !     procedure, pass(z) :: apply_xyz => curve_apply_xyz
   end type curve_t
-  
+
 contains
 
   !> Initialize a curved domain
@@ -36,7 +36,7 @@ subroutine curve_element_init(z, size)
     else
        call z%scratch%init()
     end if
-    
+
   end subroutine curve_element_init
 
   !> Deallocate a domain
@@ -50,24 +50,23 @@ subroutine curve_element_free(z)
     z%size = 0
 
     call z%scratch%free()
-    
+
   end subroutine curve_element_free
 
   !> Finalize a domain list
   !! @details Create a static list of (facet,el) tuples
   subroutine curve_element_finalize(z)
     class(curve_t), target, intent(inout) :: z
-    class(struct_curve_t), pointer :: tp(:)
     integer :: i
-    
+
     if (.not. z%finalized) then
 
        allocate(z%curve_el(z%scratch%size()))
        select type (tp=>z%scratch%data)
-       type is (struct_curve_t)           
-       do i = 1, z%scratch%size()
-          z%curve_el(i) = tp(i)
-       end do
+       type is (struct_curve_t)
+          do i = 1, z%scratch%size()
+             z%curve_el(i) = tp(i)
+          end do
        end select
 
        z%size = z%scratch%size()
@@ -75,9 +74,9 @@ subroutine curve_element_finalize(z)
        call z%scratch%clear()
 
        z%finalized = .true.
-       
+
     end if
-    
+
   end subroutine curve_element_finalize
 
   !> Add a (facet, el) tuple to an unfinalized domain
@@ -108,14 +107,14 @@ end subroutine curve_element_add
 !       el_idx = this%curve_el(i)%el_idx
 !       do j = 1, 12
 !          if (this%curve_el(i)%curve_type(j) .eq. 1) then
-!             !call sphere_surface(j, this%curve_el(i)%curve_data(:,j),x(1,el_idx), y(1,el_idx), z(1, el_idx)) 
+!             !call sphere_surface(j, this%curve_el(i)%curve_data(:,j),x(1,el_idx), y(1,el_idx), z(1, el_idx))
 !          else if (this%curve_el(i)%curve_type(j) .eq. 2) then
-!             !call generate_surface(j, this%curve_el(i)%curve_data(:,j), x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) 
+!             !call generate_surface(j, this%curve_el(i)%curve_data(:,j), x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx))
 !          end if
 !       end do
 !       do j = 1, 12
 !          if (this%curve_el(i)%curve_type(j) .eq. 3) then
-!             call arc_surface(j, this%curve_el(i)%curve_data(:,j),x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) 
+!             call arc_surface(j, this%curve_el(i)%curve_data(:,j),x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx))
 !          end if
 !       end do
 !
@@ -123,15 +122,15 @@ end subroutine curve_element_add
 !  end subroutine curve_apply_xyz
 
 
-!  subroutine sphere_surface(face, curve_data, x, y, z, Xh) 
+!  subroutine sphere_surface(face, curve_data, x, y, z, Xh)
 !!  Major overhaul.
 !!  Martin Karp 1 March 2021 15:30:15
 !!
-!!  5 Aug 1988 19:29:52 
+!!  5 Aug 1988 19:29:52
 !!
 !!  Program to generate spherical shell elements for NEKTON
 !!  input.  Paul F. Fischer
-!  
+!
 !    NXM1 = NX-1
 !    NYM1 = NY-1
 !    NXY  = NX*NZ
@@ -146,8 +145,8 @@ end subroutine curve_element_add
 !
 !    CALL CRN3D(XCV,XC(1,IE),YC(1,IE),ZC(1,IE),CURVE(1,IFCE,IE),IFACE)
 !
-!    Generate edge vectors on the sphere RR=1.0, 
-!    for (r,s) = (-1,*),(1,*),(*,-1),(*,1)      
+!    Generate edge vectors on the sphere RR=1.0,
+!    for (r,s) = (-1,*),(1,*),(*,-1),(*,1)
 !
 !    CALL EDG3D(XYSRF,XCV(1,1,1),XCV(1,1,2), 1, 1, 1,NY,NX,NY)
 !    CALL EDG3D(XYSRF,XCV(1,2,1),XCV(1,2,2),NX,NX, 1,NY,NX,NY)
diff --git a/src/mesh/element.f90 b/src/mesh/element.f90
index a353b9ea3c4..165ff7413d1 100644
--- a/src/mesh/element.f90
+++ b/src/mesh/element.f90
@@ -36,7 +36,7 @@ module element
   use tuple, only : tuple_t
   use point, only : point_ptr, point_t
   implicit none
-  private 
+  private
 
   !> Base type for an element
   !! @details An element is a collection of @a npts_ points forming an
@@ -44,13 +44,13 @@ module element
   type, public, extends(entity_t), abstract :: element_t
      integer, private :: gdim_              !< Geometric dimension
      integer, private :: npts_              !< number of points
-     type(point_ptr), allocatable :: pts(:) !< Points of an element 
+     type(point_ptr), allocatable :: pts(:) !< Points of an element
    contains
      procedure, pass(this) :: element => element_init
      procedure, pass(this) :: free => element_free
      procedure, pass(this) :: gdim => element_gdim
      procedure, pass(this) :: npts => element_npts
-     procedure, pass(this) :: p => element_point 
+     procedure, pass(this) :: p => element_point
      procedure, pass(this) :: n_points => element_npts
      procedure, pass(this), non_overridable :: element_point
      procedure(element_equal), pass(this), deferred :: equal
@@ -88,7 +88,7 @@ end function element_equal
   end interface
 
   abstract interface
-     subroutine element_facet_id(this, t, side) 
+     subroutine element_facet_id(this, t, side)
        import :: element_t
        import :: tuple_t
        class(element_t), intent(in) :: this
@@ -98,7 +98,7 @@ end subroutine element_facet_id
   end interface
 
   abstract interface
-     subroutine element_facet_order(this, t, side) 
+     subroutine element_facet_order(this, t, side)
        import :: element_t
        import :: tuple_t
        class(element_t), intent(in) :: this
@@ -118,7 +118,7 @@ subroutine element_init(this, id, gdim, npts)
     call this%free()
 
     call this%set_id(id)
-    
+
     this%gdim_ = gdim
     this%npts_ = npts
 
@@ -133,9 +133,9 @@ subroutine element_free(this)
     if (allocated(this%pts)) then
        deallocate(this%pts)
     end if
-    
+
   end subroutine element_free
-  
+
   !> Get the geometric dimension of an element
   pure function element_gdim(this) result(gdim)
     class(element_t), intent(in) :: this
diff --git a/src/mesh/entity.f90 b/src/mesh/entity.f90
index 8d003497b59..c9340a77161 100644
--- a/src/mesh/entity.f90
+++ b/src/mesh/entity.f90
@@ -33,13 +33,13 @@
 module entity
   implicit none
   private
-  
+
   !> Base type for an entity
-  type, public, abstract :: entity_t 
+  type, public, abstract :: entity_t
      integer, private :: id_ = -1 !< Entity index
    contains
      procedure, pass(this) :: id => entity_id
-     procedure, pass(this) :: set_id => entity_set_id     
+     procedure, pass(this) :: set_id => entity_set_id
   end type entity_t
 
 contains
diff --git a/src/mesh/facet_zone.f90 b/src/mesh/facet_zone.f90
index f7812f53294..9c6c44b5777 100644
--- a/src/mesh/facet_zone.f90
+++ b/src/mesh/facet_zone.f90
@@ -37,7 +37,7 @@ module facet_zone
   use utils, only : neko_error
   implicit none
   private
-  
+
   type, public :: facet_zone_t
      type(tuple_i4_t), allocatable :: facet_el(:)
      integer :: size
@@ -63,7 +63,7 @@ module facet_zone
      procedure, pass(z) :: finalize => facet_zone_periodic_finalize
      procedure, pass(z) :: add_periodic_facet => facet_zone_periodic_add_facet
   end type facet_zone_periodic_t
- 
+
 contains
 
   !> Initialize a facet zone
@@ -78,7 +78,7 @@ subroutine facet_zone_init(z, size)
     else
        call z%scratch%init()
     end if
-    
+
   end subroutine facet_zone_init
 
   !> Deallocate a facet zone
@@ -92,7 +92,7 @@ subroutine facet_zone_free(z)
     z%size = 0
 
     call z%scratch%free()
-    
+
   end subroutine facet_zone_free
 
   !> Finalize a zone list
@@ -101,11 +101,11 @@ subroutine facet_zone_finalize(z)
     class(facet_zone_t), intent(inout) :: z
     type(tuple_i4_t), pointer :: tp(:)
     integer :: i
-    
+
     if (.not. z%finalized) then
 
        allocate(z%facet_el(z%scratch%size()))
-       
+
        tp => z%scratch%array()
        do i = 1, z%scratch%size()
           z%facet_el(i) = tp(i)
@@ -116,9 +116,9 @@ subroutine facet_zone_finalize(z)
        call z%scratch%clear()
 
        z%finalized = .true.
-       
+
     end if
-    
+
   end subroutine facet_zone_finalize
 
   !> Add a (facet, el) tuple to an unfinalized zone
@@ -134,10 +134,10 @@ subroutine facet_zone_add_facet(z, facet, el)
 
     t%x = (/ facet, el /)
     call z%scratch%push(t)
-    
+
   end subroutine facet_zone_add_facet
 
-    !> Initialize a periodic zone
+  !> Initialize a periodic zone
   subroutine facet_zone_periodic_init(z, size)
     class(facet_zone_periodic_t), intent(inout) :: z
     integer, optional :: size
@@ -155,7 +155,7 @@ subroutine facet_zone_periodic_init(z, size)
        call z%p_id_scratch%init()
        call z%org_id_scratch%init()
     end if
-    
+
   end subroutine facet_zone_periodic_init
 
   !> Deallocate a zone
@@ -178,7 +178,7 @@ subroutine facet_zone_periodic_free(z)
     call z%p_scratch%free()
     call z%p_id_scratch%free()
     call z%org_id_scratch%free()
-    
+
   end subroutine facet_zone_periodic_free
 
   !> Finalize a periodic zone list
@@ -189,7 +189,7 @@ subroutine facet_zone_periodic_finalize(z)
     type(tuple4_i4_t), pointer :: tp2(:)
     type(tuple4_i4_t), pointer :: tp3(:)
     integer :: i
-    
+
     if (.not. z%finalized) then
 
        call facet_zone_finalize(z)
@@ -201,7 +201,7 @@ subroutine facet_zone_periodic_finalize(z)
        allocate(z%p_facet_el(z%size))
        allocate(z%p_ids(z%size))
        allocate(z%org_ids(z%size))
-       
+
        tp => z%p_scratch%array()
        do i = 1, z%size
           z%p_facet_el(i) = tp(i)
@@ -220,7 +220,7 @@ subroutine facet_zone_periodic_finalize(z)
        call z%org_id_scratch%clear()
 
     end if
-    
+
   end subroutine facet_zone_periodic_finalize
 
   !> Add a (facet, el) tuple to an unfinalized zone
@@ -248,7 +248,7 @@ subroutine facet_zone_periodic_add_facet(z, facet, el, p_facet, p_el, pids, org_
     call z%p_id_scratch%push(t2)
     t3%x = org_ids
     call z%org_id_scratch%push(t3)
-    
+
   end subroutine facet_zone_periodic_add_facet
 
 end module facet_zone
diff --git a/src/mesh/hex.f90 b/src/mesh/hex.f90
index b849c0863ac..a71d6ddd519 100644
--- a/src/mesh/hex.f90
+++ b/src/mesh/hex.f90
@@ -51,12 +51,12 @@ module hex
   !! @verbatim
   !! Node numbering (NEKTON symmetric notation)
   !!
-  !!          3+-----+4    ^ s                 
-  !!          /     /|     |                   
-  !!         /     / |     |                   
-  !!       7+-----+8 +2    +----> r            
-  !!        |     | /     /                    
-  !!        |     |/     /                     
+  !!          3+-----+4    ^ s
+  !!          /     /|     |
+  !!         /     / |     |
+  !!       7+-----+8 +2    +----> r
+  !!        |     | /     /
+  !!        |     |/     /
   !!       5+-----+6    t
   !!
   !! @endverbatim
@@ -67,7 +67,7 @@ module hex
      procedure, pass(this) :: facet_order => hex_facet_order
      procedure, pass(this) :: diameter => hex_diameter
      procedure, pass(this) :: centroid => hex_centroid
-     procedure, pass(this) :: edge_id => hex_edge_id          
+     procedure, pass(this) :: edge_id => hex_edge_id
      procedure, pass(this) :: equal => hex_equal
      generic :: operator(.eq.) => equal
   end type hex_t
@@ -76,7 +76,7 @@ module hex
   !! @details
   !! @verbatim
   !! Face numbering (NEKTON symmetric notation)
-  !!                     
+  !!
   !!          +--------+     ^ S
   !!         /        /|     |
   !!        /    4   / |     |
@@ -97,13 +97,13 @@ module hex
                                                                 1,2,4,3,&
                                                                 5,6,8,7/),&
                                                                 (/4,6/))
-  
+
   !> Edge node ids
   !! @details
   !! @verbatim
   !! Edge numbering (similar to NEKTON symmetric notation)
   !!
-  !!              2      
+  !!              2
   !!          +--------+        ^ S
   !!         /        /|        |
   !!  11--> /   12-->/ | <--6   |
@@ -129,9 +129,9 @@ module hex
                                                                 3,7,&
                                                                 4,8/),&
                                                                 (/2,12/))
-  
+
 contains
-  
+
   !> Create a hexahedron element based upon eight points
   subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8)
     class(hex_t), intent(inout) :: this
@@ -139,7 +139,7 @@ subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8)
     type(point_t), target, intent(in) :: p1, p2, p3, p4, p5, p6, p7, p8
 
     call this%element(id, NEKO_HEX_GDIM, NEKO_HEX_NPTS)
-    
+
     this%pts(1)%p => p1
     this%pts(2)%p => p2
     this%pts(3)%p => p3
@@ -152,7 +152,7 @@ subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8)
   end subroutine hex_init
 
   !> Return the facet id for face @a i as a 4-tuple @a t
-  subroutine hex_facet_id(this, t, side) 
+  subroutine hex_facet_id(this, t, side)
     class(hex_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -167,7 +167,7 @@ subroutine hex_facet_id(this, t, side)
     select type(t)
     type is(tuple4_i4_t)
        t%x = (/ p1%id(), p2%id(), p3%id(), p4%id() /)
-       do i = 1, 3 
+       do i = 1, 3
           do j = i+1,4
              if(t%x(j) .lt. t%x(i)) then
                 temp = t%x(i)
@@ -181,7 +181,7 @@ subroutine hex_facet_id(this, t, side)
   end subroutine hex_facet_id
 
   !> Return the ordered points for face @a i as a 4-tuple @a t
-  subroutine hex_facet_order(this, t, side) 
+  subroutine hex_facet_order(this, t, side)
     class(hex_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -201,7 +201,7 @@ end subroutine hex_facet_order
 
 
   !> Return the edge id for an edge @a i as a 2-tuple @a t
-  subroutine hex_edge_id(this, t, side) 
+  subroutine hex_edge_id(this, t, side)
     class(hex_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -214,14 +214,14 @@ subroutine hex_edge_id(this, t, side)
     type is(tuple_i4_t)
        if (p1%id() .lt. p2%id()) then
           t%x = (/ p1%id(), p2%id() /)
-      else
+       else
           t%x = (/ p2%id(), p1%id() /)
-      endif
+       endif
 
     end select
 
   end subroutine hex_edge_id
-  
+
   !> Compute the diameter of a hexahedron element
   function hex_diameter(this) result(res)
     class(hex_t), intent(in) :: this
@@ -275,7 +275,7 @@ function hex_centroid(this) result(res)
        res%x(i) = 0.125 * (p1%x(i) + p2%x(i) + p3%x(i) + p4%x(i) + &
             p5%x(i) + p6%x(i) + p7%x(i) + p8%x(i))
     end do
-    
+
   end function hex_centroid
 
   !> Check if two hex elements are equal
@@ -301,5 +301,5 @@ pure function hex_equal(this, other) result(res)
     end select
 
   end function hex_equal
-  
+
 end module hex
diff --git a/src/mesh/mesh.f90 b/src/mesh/mesh.f90
index 15ade4b52fc..4f60b823fb2 100644
--- a/src/mesh/mesh.f90
+++ b/src/mesh/mesh.f90
@@ -43,15 +43,18 @@ module mesh
   use htable
   use datadist
   use distdata
-  use comm    
+  use comm
   use facet_zone, only : facet_zone_t, facet_zone_periodic_t
   use math
   use uset, only : uset_i8_t
   use curve, only : curve_t
   implicit none
   private
-  
-  integer, public, parameter :: NEKO_MSH_MAX_ZLBLS = 20 !< Max num. zone labels
+
+  !> Max num. zone labels
+  integer, public, parameter :: NEKO_MSH_MAX_ZLBLS = 20
+  !> Max length of a zone label
+  integer, public, parameter :: NEKO_MSH_MAX_ZLBL_LEN = 40
 
   type, private :: mesh_element_t
      class(element_t), allocatable :: e
@@ -69,14 +72,14 @@ module mesh
      integer :: glb_mpts        !< Global number of unique points
      integer :: glb_mfcs        !< Global number of unique faces
      integer :: glb_meds        !< Global number of unique edges
-     
+
      integer :: offset_el       !< Element offset
      integer :: max_pts_id      !< Max local point id
-     
+
      type(point_t), allocatable :: points(:) !< list of points
      type(mesh_element_t), allocatable :: elements(:) !< List of elements
      logical, allocatable :: dfrmd_el(:) !< List of elements
-     
+
      type(htable_i4_t) :: htp   !< Table of unique points (global->local)
      type(htable_i4t4_t) :: htf !< Table of unique faces (facet->local id)
      type(htable_i4t2_t) :: hte !< Table of unique edges (edge->local id)
@@ -86,15 +89,15 @@ module mesh
      !> Facet to element's id tuple and the mapping of the
      !! points between lower id element and higher
      !! \f$ t=(low_id element, element with higher global id) \f$
-     class(htable_t), allocatable :: facet_map 
+     class(htable_t), allocatable :: facet_map
      type(stack_i4_t), allocatable :: point_neigh(:) !< Point to neigh. table
 
      type(distdata_t) :: ddata            !< Mesh distributed data
      logical, allocatable :: neigh(:)     !< Neighbouring ranks
      integer, allocatable :: neigh_order(:) !< Neighbour order
 
-     integer(2), allocatable :: facet_type(:,:) !< Facet type     
-     
+     integer(2), allocatable :: facet_type(:,:) !< Facet type
+
      type(facet_zone_t) :: wall                 !< Zone of wall facets
      type(facet_zone_t) :: inlet                !< Zone of inlet facets
      type(facet_zone_t) :: outlet               !< Zone of outlet facets
@@ -162,7 +165,7 @@ module mesh
 
   abstract interface
      subroutine mesh_deform(msh, x, y, z, lx, ly, lz)
-       import mesh_t       
+       import mesh_t
        import rp
        class(mesh_t) :: msh
        integer, intent(in) :: lx, ly, lz
@@ -173,8 +176,8 @@ end subroutine mesh_deform
   end interface
 
   public :: mesh_deform
-  
-contains 
+
+contains
 
   !> Initialise a mesh @a this with @a nelv elements
   subroutine mesh_init_nelv(this, gdim, nelv)
@@ -182,7 +185,7 @@ subroutine mesh_init_nelv(this, gdim, nelv)
     integer, intent(in) :: gdim          !< Geometric dimension
     integer, intent(in) :: nelv          !< Local number of elements
     integer :: ierr
-    
+
     call this%free()
 
     this%nelv = nelv
@@ -196,7 +199,7 @@ subroutine mesh_init_nelv(this, gdim, nelv)
          MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
 
     call mesh_init_common(this)
-    
+
   end subroutine mesh_init_nelv
 
   !> Initialise a mesh @a this based on a distribution @a dist
@@ -206,14 +209,14 @@ subroutine mesh_init_dist(this, gdim, dist)
     type(linear_dist_t), intent(in) :: dist !< Data distribution
 
     call this%free()
-    
+
     this%nelv = dist%num_local()
     this%glb_nelv = dist%num_global()
     this%offset_el = dist%start_idx()
     this%gdim = gdim
 
     call mesh_init_common(this)
-    
+
   end subroutine mesh_init_dist
 
   subroutine mesh_init_common(this)
@@ -222,7 +225,7 @@ subroutine mesh_init_common(this)
     type(tuple_i4_t) :: facet_data
 
     this%max_pts_id = 0
-    
+
     allocate(this%elements(this%nelv))
     allocate(this%dfrmd_el(this%nelv))
     if (this%gdim .eq. 3) then
@@ -249,7 +252,7 @@ subroutine mesh_init_common(this)
        end do
        this%npts = NEKO_QUAD_NPTS
        if (this%lgenc) then
-          allocate(htable_i4t2_t::this%facet_map)       
+          allocate(htable_i4t2_t::this%facet_map)
           select type (fmp => this%facet_map)
           type is(htable_i4t2_t)
              call fmp%init(this%nelv, facet_data)
@@ -277,7 +280,7 @@ subroutine mesh_init_common(this)
 
     allocate(this%facet_type(2 * this%gdim, this%nelv))
     this%facet_type = 0
-    
+
     call this%htp%init(this%npts*this%nelv, i)
 
     call this%wall%init(this%nelv)
@@ -293,31 +296,29 @@ subroutine mesh_init_common(this)
     end do
 
     call this%curve%init(this%nelv)
-   
+
     call distdata_init(this%ddata)
 
     allocate(this%neigh(0:pe_size-1))
     this%neigh = .false.
-    
+
     this%mpts = 0
     this%mfcs = 0
     this%meds = 0
 
   end subroutine mesh_init_common
-  
+
   !> Deallocate a mesh @a this
   subroutine mesh_free(this)
     class(mesh_t), intent(inout) :: this
     integer :: i
-    
+
     call this%htp%free()
     call this%htf%free()
     call this%hte%free()
     call distdata_free(this%ddata)
-    
-    if (allocated(this%points)) then
-       deallocate(this%points)
-    end if
+
+
     if (allocated(this%dfrmd_el)) then
        deallocate(this%dfrmd_el)
     end if
@@ -369,20 +370,27 @@ subroutine mesh_free(this)
        deallocate(this%neigh_order)
     end if
 
+    if (allocated(this%points)) then
+       deallocate(this%points)
+    end if
+
     call this%wall%free()
     call this%inlet%free()
     call this%outlet%free()
     call this%outlet_normal%free()
     call this%sympln%free()
     call this%periodic%free()
-    
+    this%lconn = .false.
+    this%lnumr = .false.
+    this%ldist = .false.
+    this%lgenc = .true.
+
   end subroutine mesh_free
 
   subroutine mesh_finalize(this)
     class(mesh_t), target, intent(inout) :: this
     integer :: i
-
-
+    
     call mesh_generate_flags(this)
     call mesh_generate_conn(this)
 
@@ -436,7 +444,7 @@ subroutine mesh_all_deformed(this)
     class(mesh_t), intent(inout) :: this
     this%dfrmd_el = .true.
   end subroutine mesh_all_deformed
-  
+
   !> Generate element-to-element connectivity
   subroutine mesh_generate_conn(this)
     class(mesh_t), target, intent(inout) :: this
@@ -444,12 +452,56 @@ subroutine mesh_generate_conn(this)
     type(tuple4_i4_t) :: face, face_comp
     type(tuple_i4_t) :: facet_data
     type(stack_i4_t) :: neigh_order
-
+    class(element_t), pointer :: ep
+    type(tuple_i4_t) :: e
+    type(tuple4_i4_t) :: f
+    integer :: p_local_idx, res
+    integer :: el, id
     integer :: i, j, k, ierr, el_glb_idx, n_sides, n_nodes, src, dst
 
     if (this%lconn) return
 
     if (.not. this%lgenc) return
+ 
+    !If we generate connectivity, we do that here.
+    do el = 1, this%nelv
+       ep => this%elements(el)%e
+       select type(ep)
+       type is (hex_t)
+          do i = 1, NEKO_HEX_NPTS
+             !Only for getting the id
+             call this%add_point(ep%pts(i)%p, id)
+             p_local_idx = this%get_local(this%points(id))
+             !should stack have inout on what we push? would be neat with in
+             id = ep%id()
+             call this%point_neigh(p_local_idx)%push(id)
+         end do
+         do i = 1, NEKO_HEX_NFCS
+             call ep%facet_id(f, i)
+             call this%add_face(f)
+          end do
+
+          do i = 1, NEKO_HEX_NEDS
+             call ep%edge_id(e, i)
+             call this%add_edge(e)
+          end do
+       type is (quad_t)
+          do i = 1, NEKO_QUAD_NPTS
+             !Only for getting the id
+             call this%add_point(ep%pts(i)%p, id)
+             p_local_idx = this%get_local(this%points(id))
+             !should stack have inout on what we push? would be neat with in
+             id = ep%id()
+             call this%point_neigh(p_local_idx)%push(id)
+          end do
+
+          do i = 1, NEKO_QUAD_NEDS
+             call ep%facet_id(e, i)
+             call this%add_edge(e)
+          end do
+       end select
+    end do
+
 
     if (this%gdim .eq. 2) then
        n_sides = 4
@@ -466,90 +518,90 @@ subroutine mesh_generate_conn(this)
     !
     ! Find all (local) boundaries
     !
-    
+
     !> @note We have to sweep through the facet map twice to make sure
     !! that both odd and even sides are marked
     !! @todo These loop nests needs a lot of love...
     select type (fmp => this%facet_map)
     type is(htable_i4t2_t)
-       do k = 1, 2              
+       do k = 1, 2
           do i = 1, this%nelv
              el_glb_idx = i + this%offset_el
              do j = 1, n_sides
                 call this%elements(i)%e%facet_id(edge, j)
-                
+
                 ! Assume that all facets are on the exterior
                 facet_data%x = (/  0, 0/)
-                
+
                 !check it this face has shown up earlier
                 if (fmp%get(edge, facet_data) .eq. 0) then
-                  !if element is already recognized on face
-                  if (facet_data%x(1) .eq. el_glb_idx ) then
-                     this%facet_neigh(j, i) = facet_data%x(2)
-                  else if( facet_data%x(2) .eq. el_glb_idx) then
-                     this%facet_neigh(j, i) = facet_data%x(1)
-                  !if this is the second element, arrange so low id is first
-                  else if(facet_data%x(1) .gt. el_glb_idx) then 
-                    facet_data%x(2) = facet_data%x(1)
-                    facet_data%x(1) = el_glb_idx
-                    this%facet_neigh(j, i) = facet_data%x(2)
-                    call fmp%set(edge, facet_data)                             
-                  else if(facet_data%x(1) .lt. el_glb_idx) then 
-                    facet_data%x(2) = el_glb_idx
-                    this%facet_neigh(j, i) = facet_data%x(1)
-                    call fmp%set(edge, facet_data)
-                 end if
+                   !if element is already recognized on face
+                   if (facet_data%x(1) .eq. el_glb_idx ) then
+                      this%facet_neigh(j, i) = facet_data%x(2)
+                   else if( facet_data%x(2) .eq. el_glb_idx) then
+                      this%facet_neigh(j, i) = facet_data%x(1)
+                      !if this is the second element, arrange so low id is first
+                   else if(facet_data%x(1) .gt. el_glb_idx) then
+                      facet_data%x(2) = facet_data%x(1)
+                      facet_data%x(1) = el_glb_idx
+                      this%facet_neigh(j, i) = facet_data%x(2)
+                      call fmp%set(edge, facet_data)
+                   else if(facet_data%x(1) .lt. el_glb_idx) then
+                      facet_data%x(2) = el_glb_idx
+                      this%facet_neigh(j, i) = facet_data%x(1)
+                      call fmp%set(edge, facet_data)
+                   end if
                 else
                    facet_data%x(1) = el_glb_idx
                    this%facet_neigh(j, i) = facet_data%x(2)
-                   call fmp%set(edge, facet_data)               
+                   call fmp%set(edge, facet_data)
                 end if
              end do
           end do
        end do
     type is(htable_i4t4_t)
-        
-        do k = 1, 2
+
+       do k = 1, 2
           do i = 1, this%nelv
              el_glb_idx = i + this%offset_el
              do j = 1, n_sides
                 call this%elements(i)%e%facet_id(face, j)
-               
+
                 facet_data%x = (/ 0, 0/)
 
                 !check it this face has shown up earlier
                 if (fmp%get(face, facet_data) .eq. 0) then
-                  !if element is already recognized on face
-                  if (facet_data%x(1) .eq. el_glb_idx ) then
-                     this%facet_neigh(j, i) = facet_data%x(2)
-                     call this%elements(i)%e%facet_id(face_comp, &
-                                                      j+(2*mod(j,2)-1)) 
-                     if (face_comp .eq. face) then
-                       facet_data%x(2) = el_glb_idx
-                       this%facet_neigh(j, i) = facet_data%x(1)
-                       call fmp%set(face, facet_data)
-                    end if
-                  else if( facet_data%x(2) .eq. el_glb_idx) then
-                    this%facet_neigh(j, i) = facet_data%x(1)
-                  !if this is the second element, arrange so low id is first
-                  else if(facet_data%x(1) .gt. el_glb_idx) then 
-                    facet_data%x(2) = facet_data%x(1)
-                    facet_data%x(1) = el_glb_idx
-                    this%facet_neigh(j, i) = facet_data%x(2)
-                    call fmp%set(face, facet_data)                             
-                  else if(facet_data%x(1) .lt. el_glb_idx) then 
-                    facet_data%x(2) = el_glb_idx
-                    this%facet_neigh(j, i) = facet_data%x(1)
-                    call fmp%set(face, facet_data)
-                 end if
+                   !if element is already recognized on face
+                   if (facet_data%x(1) .eq. el_glb_idx ) then
+                      this%facet_neigh(j, i) = facet_data%x(2)
+                      call this%elements(i)%e%facet_id(face_comp, &
+                                                      j+(2*mod(j,2)-1))
+                      if (face_comp .eq. face) then
+                         facet_data%x(2) = el_glb_idx
+                         this%facet_neigh(j, i) = facet_data%x(1)
+                         call fmp%set(face, facet_data)
+                      end if
+                   else if( facet_data%x(2) .eq. el_glb_idx) then
+                      this%facet_neigh(j, i) = facet_data%x(1)
+                      !if this is the second element, arrange so low id is first
+                   else if(facet_data%x(1) .gt. el_glb_idx) then
+                      facet_data%x(2) = facet_data%x(1)
+                      facet_data%x(1) = el_glb_idx
+                      this%facet_neigh(j, i) = facet_data%x(2)
+                      call fmp%set(face, facet_data)
+                   else if(facet_data%x(1) .lt. el_glb_idx) then
+                      facet_data%x(2) = el_glb_idx
+                      this%facet_neigh(j, i) = facet_data%x(1)
+                      call fmp%set(face, facet_data)
+                   end if
                 else
                    facet_data%x(1) = el_glb_idx
                    this%facet_neigh(j, i) = 0
-                   call fmp%set(face, facet_data)               
+                   call fmp%set(face, facet_data)
                 end if
+             end do
           end do
        end do
-    end do
     class default
        call neko_error('Invalid facet map')
     end select
@@ -559,20 +611,20 @@ subroutine mesh_generate_conn(this)
     ! Find all external (between PEs) boundaries
     !
     if (pe_size .gt. 1) then
-       
+
        call mesh_generate_external_point_conn(this)
 
        !
        ! Generate neighbour exchange order
        !
        call neigh_order%init(pe_size)
-              
+
        do i = 1, pe_size - 1
           src = modulo(pe_rank - i + pe_size, pe_size)
           dst = modulo(pe_rank + i, pe_size)
           if (this%neigh(src) .or. this%neigh(dst)) then
              j = i ! adhere to standards...
-             call neigh_order%push(j)      
+             call neigh_order%push(j)
           end if
        end do
 
@@ -584,7 +636,7 @@ subroutine mesh_generate_conn(this)
           end do
        end select
        call neigh_order%free()
-       
+
        call mesh_generate_external_facet_conn(this)
     else
        allocate(this%neigh_order(1))
@@ -599,14 +651,14 @@ subroutine mesh_generate_conn(this)
     if (this%gdim .eq. 3) then
        call mesh_generate_edge_conn(this)
     end if
-    
+
 
     call mesh_generate_facet_numbering(this)
-    
+
     this%lconn = .true.
-    
+
   end subroutine mesh_generate_conn
- 
+
   !> Generate element-element connectivity via facets between PEs
   subroutine mesh_generate_external_facet_conn(this)
     type(mesh_t), intent(inout) :: this
@@ -630,7 +682,7 @@ subroutine mesh_generate_external_facet_conn(this)
     end if
 
     call buffer%init()
-        
+
     ! Build send buffers containing
     ! [el_glb_idx, side number, facet_id (global ids of points)]
     do i = 1, this%nelv
@@ -639,7 +691,7 @@ subroutine mesh_generate_external_facet_conn(this)
           facet = j             ! Adhere to standards...
           if (this%facet_neigh(j, i) .eq. 0) then
              if (n_nodes .eq. 2) then
-                call this%elements(i)%e%facet_id(edge, j)                
+                call this%elements(i)%e%facet_id(edge, j)
                 call buffer%push(el_glb_idx)
                 call buffer%push(facet)
                 do k = 1, n_nodes
@@ -662,7 +714,7 @@ subroutine mesh_generate_external_facet_conn(this)
          MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr)
 
     allocate(recv_buffer(max_recv))
-    
+
     do i = 1, size(this%neigh_order)
        src = modulo(pe_rank - this%neigh_order(i) + pe_size, pe_size)
        dst = modulo(pe_rank + this%neigh_order(i), pe_size)
@@ -741,7 +793,7 @@ subroutine mesh_generate_external_facet_conn(this)
                       end if
                    end do
                    this%facet_neigh(facet, element) = -neigh_el
-                   facet_data%x(2) = -neigh_el                   
+                   facet_data%x(2) = -neigh_el
 
                    ! Update facet map
                    call fmp%set(face, facet_data)
@@ -764,10 +816,10 @@ subroutine mesh_generate_external_facet_conn(this)
        if (this%neigh(dst)) then
           call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr)
        end if
-       
+
     end do
 
-       
+
     deallocate(recv_buffer)
 
     call buffer%free()
@@ -783,13 +835,13 @@ subroutine mesh_generate_external_point_conn(this)
     integer :: i, j, k
     integer :: max_recv, ierr, src, dst, n_recv, neigh_el
     integer :: pt_glb_idx, pt_loc_idx, num_neigh
-    integer, pointer :: neighs(:)
+    integer, contiguous, pointer :: neighs(:)
+
 
-    
     call send_buffer%init(this%mpts * 2)
-    
+
     ! Build send buffers containing
-    ! [pt_glb_idx, #neigh, neigh id_1 ....neigh_id_n] 
+    ! [pt_glb_idx, #neigh, neigh id_1 ....neigh_id_n]
     do i = 1, this%mpts
        pt_glb_idx = this%points(i)%id() ! Adhere to standards...
        num_neigh = this%point_neigh(i)%size()
@@ -805,7 +857,7 @@ subroutine mesh_generate_external_point_conn(this)
     call MPI_Allreduce(send_buffer%size(), max_recv, 1, &
          MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr)
     allocate(recv_buffer(max_recv))
-       
+
     do i = 1, pe_size - 1
        src = modulo(pe_rank - i + pe_size, pe_size)
        dst = modulo(pe_rank + i, pe_size)
@@ -830,14 +882,14 @@ subroutine mesh_generate_external_point_conn(this)
                 call distdata_set_shared_point(this%ddata, pt_loc_idx)
              end do
           end if
-          j = j + (2 + num_neigh)          
+          j = j + (2 + num_neigh)
        end do
-       
+
     end do
 
     deallocate(recv_buffer)
     call send_buffer%free()
-    
+
   end subroutine mesh_generate_external_point_conn
 
   !> Generate element-element connectivity via edges
@@ -852,7 +904,7 @@ subroutine mesh_generate_edge_conn(this)
     type(htable_i8_t) :: glb_to_loc
     type(MPI_Status) :: status
     type(MPI_Request) :: send_req, recv_req
-    integer, pointer :: p1(:), p2(:), ns_id(:)
+    integer, contiguous, pointer :: p1(:), p2(:), ns_id(:)
     integer :: i, j, id, ierr, num_edge_glb, edge_offset, num_edge_loc
     integer :: k, l , shared_offset, glb_nshared, n_glb_id
     integer(kind=i8) :: C, glb_max, glb_id
@@ -874,7 +926,7 @@ subroutine mesh_generate_edge_conn(this)
 
     !
     ! Determine/ constants used to generate unique global edge numbers
-    ! for shared edges 
+    ! for shared edges
     !
     C = int(this%glb_nelv, i8) * int(NEKO_HEX_NEDS, i8)
 
@@ -887,7 +939,7 @@ subroutine mesh_generate_edge_conn(this)
     call non_shared_edges%init(this%hte%num_entries())
 
     call it%init(this%hte)
-    do while(it%next())       
+    do while(it%next())
        edge => it%key()
        call it%data(id)
 
@@ -897,8 +949,8 @@ subroutine mesh_generate_edge_conn(this)
        p2 => this%point_neigh(l)%array()
 
        shared_edge = .false.
-       
-       ! Find edge neighbor from point neighbors 
+
+       ! Find edge neighbor from point neighbors
        do i = 1, this%point_neigh(k)%size()
           do j = 1, this%point_neigh(l)%size()
              if ((p1(i) .eq. p2(j)) .and. &
@@ -911,7 +963,7 @@ subroutine mesh_generate_edge_conn(this)
 
        ! Generate a unique id for the shared edge as,
        ! ((e1 * C) + e2 )) + glb_max if e1 > e2
-       ! ((e2 * C) + e1 )) + glb_max if e2 > e1     
+       ! ((e2 * C) + e1 )) + glb_max if e2 > e1
        if (shared_edge) then
           glb_id = ((int(edge%x(1), i8)) + int(edge%x(2), i8)*C) + glb_max
           call glb_to_loc%set(glb_id, id)
@@ -934,14 +986,14 @@ subroutine mesh_generate_edge_conn(this)
     ns_id => non_shared_edges%array()
     do i = 1, non_shared_edges%size()
        call distdata_set_local_to_global_edge(this%ddata, ns_id(i), edge_offset)
-       edge_offset = edge_offset + 1          
+       edge_offset = edge_offset + 1
     end do
     nullify(ns_id)
-    
+
     !
     ! Renumber shared edges into integer range
     !
-    
+
     call MPI_Allreduce(send_buff%size(), max_recv, 1, &
          MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr)
 
@@ -972,7 +1024,7 @@ subroutine mesh_generate_edge_conn(this)
        if (this%neigh(src)) then
           call MPI_Wait(recv_req, status, ierr)
           call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr)
-          
+
           do j = 1, n_recv
              if ((edge_idx%element(recv_buff(j))) .and. (src .lt. pe_rank)) then
                 call ghost%add(recv_buff(j))
@@ -986,7 +1038,7 @@ subroutine mesh_generate_edge_conn(this)
        end if
     end do
 
-   
+
     ! Determine start offset for global numbering of shared edges
     glb_nshared = num_edge_loc
     call MPI_Allreduce(MPI_IN_PLACE, glb_nshared, 1, &
@@ -996,7 +1048,7 @@ subroutine mesh_generate_edge_conn(this)
     call MPI_Exscan(owner%size(), shared_offset, 1, &
          MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
     shared_offset = shared_offset + glb_nshared + 1
-    
+
     ! Renumber locally owned set of shared edges
     call send_buff%clear()
     call owner%iter_init()
@@ -1016,12 +1068,12 @@ subroutine mesh_generate_edge_conn(this)
        end if
     end do
     nullify(glb_ptr)
-       
+
     ! Determine total number of unique edges in the mesh
     ! (This can probably be done in a clever way...)
-    this%glb_meds = shared_offset -1 
+    this%glb_meds = shared_offset -1
     call MPI_Allreduce(MPI_IN_PLACE, this%glb_meds, 1, &
-         MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR)    
+         MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR)
 
     !
     ! Update ghosted edges with new global id
@@ -1053,11 +1105,11 @@ subroutine mesh_generate_edge_conn(this)
                   dst, 0, NEKO_COMM, send_req, ierr)
           end select
        end if
-       
+
        if (this%neigh(src)) then
           call MPI_Wait(recv_req, status, ierr)
           call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr)
-          
+
           do j = 1, n_recv, 2
              if (ghost%element(recv_buff(j))) then
                 if (glb_to_loc%get(recv_buff(j), id) .eq. 0) then
@@ -1104,7 +1156,7 @@ subroutine mesh_generate_facet_numbering(this)
     type(MPI_Status) :: status
     type(MPI_Request) :: send_req, recv_req
     integer, allocatable :: recv_buff(:)
-    integer :: non_shared_facets, shared_facets, facet_offset    
+    integer :: non_shared_facets, shared_facets, facet_offset
     integer :: id, glb_nshared, shared_offset, owned_facets
     integer :: i, j, ierr, max_recv, src, dst, n_recv
 
@@ -1114,29 +1166,29 @@ subroutine mesh_generate_facet_numbering(this)
     if (this%gdim .eq. 2) then
        allocate(this%ddata%local_to_global_facet(this%meds))
        call edge_owner%init(this%meds)
-       call edge_ghost%init(64, i)       
+       call edge_ghost%init(64, i)
        non_shared_facets = this%hte%num_entries() - shared_facets
     else
        allocate(this%ddata%local_to_global_facet(this%mfcs))
        call face_owner%init(this%mfcs)
-       call face_ghost%init(64, i)       
+       call face_ghost%init(64, i)
        non_shared_facets = this%htf%num_entries() - shared_facets
     end if
-    
+
     !> @todo Move this into distdata as a method...
-    
+
     facet_offset = 0
     call MPI_Exscan(non_shared_facets, facet_offset, 1, &
          MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
     facet_offset = facet_offset + 1
-    
+
     ! Determine ownership of shared facets
     if (this%gdim .eq. 2) then
        call edge_it%init(this%hte)
        do while (edge_it%next())
           call edge_it%data(id)
           edge => edge_it%key()
-          if (.not. this%ddata%shared_facet%element(id)) then       
+          if (.not. this%ddata%shared_facet%element(id)) then
              call distdata_set_local_to_global_facet(this%ddata, &
                   id, facet_offset)
              facet_offset = facet_offset + 1
@@ -1163,7 +1215,7 @@ subroutine mesh_generate_facet_numbering(this)
        do while (face_it%next())
           call face_it%data(id)
           face => face_it%key()
-          if (.not. this%ddata%shared_facet%element(id)) then       
+          if (.not. this%ddata%shared_facet%element(id)) then
              call distdata_set_local_to_global_facet(this%ddata, &
                   id, facet_offset)
              facet_offset = facet_offset + 1
@@ -1186,50 +1238,50 @@ subroutine mesh_generate_facet_numbering(this)
        end do
        owned_facets = face_owner%size()
     end if
-    
+
     ! Determine start offset for global numbering of shared facets
     glb_nshared = non_shared_facets
     call MPI_Allreduce(MPI_IN_PLACE, glb_nshared, 1, &
          MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
-    
+
     shared_offset = 0
     call MPI_Exscan(owned_facets, shared_offset, 1, &
          MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
     shared_offset = shared_offset + glb_nshared + 1
 
     if (this%gdim .eq. 2) then
-       
+
        if (owned_facets .gt. 32)  then
           call send_buff%init(owned_facets)
        else
           call send_buff%init()
        end if
-       
+
        ed => edge_owner%array()
        do i = 1, edge_owner%size()
           if (this%hte%get(ed(i), id) .eq. 0) then
              call distdata_set_local_to_global_facet(this%ddata, id, &
                                                      shared_offset)
-             
+
              ! Add new number to send buffer
              ! [edge id1 ... edge idn new_glb_id]
              do j = 1, 2
                 call send_buff%push(ed(i)%x(j))
              end do
              call send_buff%push(shared_offset)
-             
+
              shared_offset = shared_offset + 1
           end if
        end do
-       
+
     else
-       
+
        if (owned_facets .gt. 32)  then
           call send_buff%init(owned_facets)
        else
           call send_buff%init()
        end if
-           
+
        fd => face_owner%array()
        do i = 1, face_owner%size()
           if (this%htf%get(fd(i), id) .eq. 0) then
@@ -1242,28 +1294,28 @@ subroutine mesh_generate_facet_numbering(this)
                 call send_buff%push(fd(i)%x(j))
              end do
              call send_buff%push(shared_offset)
-             
+
              shared_offset = shared_offset + 1
           end if
        end do
        nullify(fd)
-       
+
     end if
 
     ! Determine total number of unique facets in the mesh
     ! (This can probably be done in a clever way...)
     this%glb_mfcs = shared_offset - 1
     call MPI_Allreduce(MPI_IN_PLACE, this%glb_mfcs, 1, &
-         MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR)    
+         MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR)
 
     !
     ! Update ghosted facets with new global id
     !
-    
+
     call MPI_Allreduce(send_buff%size(), max_recv, 1, &
          MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr)
 
-    allocate(recv_buff(max_recv))    
+    allocate(recv_buff(max_recv))
 
     !> @todo Since we now the neigh. we can actually do p2p here...
     do i = 1, size(this%neigh_order)
@@ -1279,28 +1331,28 @@ subroutine mesh_generate_facet_numbering(this)
           call MPI_Isend(send_buff%array(), send_buff%size(), MPI_INTEGER, &
                dst, 0, NEKO_COMM, send_req, ierr)
        end if
-       
+
        if (this%neigh(src)) then
           call MPI_Wait(recv_req, status, ierr)
           call MPI_Get_count(status, MPI_INTEGER, n_recv, ierr)
-          
+
           if (this%gdim .eq. 2) then
              do j = 1, n_recv, 3
-             
+
                 recv_edge = (/recv_buff(j), recv_buff(j+1)/)
-             
+
                 ! Check if the PE has the shared edge
                 if (edge_ghost%get(recv_edge, id) .eq. 0) then
                    call distdata_set_local_to_global_facet(this%ddata, &
                         id, recv_buff(j+2))
                 end if
              end do
-          else             
+          else
              do j = 1, n_recv, 5
-                
+
                 recv_face = (/recv_buff(j), recv_buff(j+1), &
                      recv_buff(j+2), recv_buff(j+3) /)
-                
+
                 ! Check if the PE has the shared face
                 if (face_ghost%get(recv_face, id) .eq. 0) then
                    call distdata_set_local_to_global_facet(this%ddata, &
@@ -1313,7 +1365,7 @@ subroutine mesh_generate_facet_numbering(this)
        if (this%neigh(dst)) then
           call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr)
        end if
-       
+
     end do
 
     if (this%gdim .eq. 2) then
@@ -1323,28 +1375,28 @@ subroutine mesh_generate_facet_numbering(this)
        call face_owner%free()
        call face_ghost%free()
     end if
-    
+
     call send_buff%free()
     deallocate(recv_buff)
-       
+
   end subroutine mesh_generate_facet_numbering
-  
-  
+
+
   !> Add a quadrilateral element to the mesh @a this
   subroutine mesh_add_quad(this, el, p1, p2, p3, p4)
     class(mesh_t), target, intent(inout) :: this
     integer, value :: el
-    type(point_t), intent(inout) :: p1, p2, p3, p4
+    type(point_t), target, intent(inout) :: p1, p2, p3, p4
     class(element_t), pointer :: ep
     integer :: p(4), el_glb_idx, i, p_local_idx
     type(tuple_i4_t) :: e
 
-    ! Connectivity invalidated if a new element is added        
+    ! Connectivity invalidated if a new element is added
     this%lconn = .false.
 
     ! Numbering invalidated if a new element is added
     this%lnumr = .false.
-    
+
     call this%add_point(p1, p(1))
     call this%add_point(p2, p(2))
     call this%add_point(p3, p(3))
@@ -1353,44 +1405,35 @@ subroutine mesh_add_quad(this, el, p1, p2, p3, p4)
     ep => this%elements(el)%e
     el_glb_idx = el + this%offset_el
 
-    do i = 1, NEKO_QUAD_NPTS
-       p_local_idx = this%get_local(this%points(p(i)))
-       call this%point_neigh(p_local_idx)%push(el_glb_idx)
-    end do
-    
     select type(ep)
     type is (quad_t)
        call ep%init(el_glb_idx, &
             this%points(p(1)), this%points(p(2)), &
             this%points(p(3)), this%points(p(4)))
 
-       do i = 1, NEKO_QUAD_NEDS
-          call ep%facet_id(e, i)
-          call this%add_edge(e)
-       end do
 
     class default
        call neko_error('Invalid element type')
     end select
-        
+
   end subroutine mesh_add_quad
 
   !> Add a hexahedral element to the mesh @a this
   subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8)
     class(mesh_t), target, intent(inout) :: this
     integer, value :: el
-    type(point_t), intent(inout) :: p1, p2, p3, p4, p5, p6, p7, p8
+    type(point_t), target, intent(inout) :: p1, p2, p3, p4, p5, p6, p7, p8
     class(element_t), pointer :: ep
     integer :: p(8), el_glb_idx, i, p_local_idx
     type(tuple4_i4_t) :: f
     type(tuple_i4_t) :: e
 
-    ! Connectivity invalidated if a new element is added        
+    ! Connectivity invalidated if a new element is added
     this%lconn = .false.
 
     ! Numbering invalidated if a new element is added
     this%lnumr = .false.
-    
+
     call this%add_point(p1, p(1))
     call this%add_point(p2, p(2))
     call this%add_point(p3, p(3))
@@ -1402,12 +1445,6 @@ subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8)
 
     ep => this%elements(el)%e
     el_glb_idx = el + this%offset_el
-    if (this%lgenc) then
-       do i = 1, NEKO_HEX_NPTS
-          p_local_idx = this%get_local(this%points(p(i)))
-          call this%point_neigh(p_local_idx)%push(el_glb_idx)
-       end do
-    end if
     select type(ep)
     type is (hex_t)
        call ep%init(el_glb_idx, &
@@ -1415,19 +1452,6 @@ subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8)
             this%points(p(3)), this%points(p(4)), &
             this%points(p(5)), this%points(p(6)), &
             this%points(p(7)), this%points(p(8)))
-       
-       if (this%lgenc) then
-          do i = 1, NEKO_HEX_NFCS
-             call ep%facet_id(f, i)
-             call this%add_face(f)
-          end do
-
-          do i = 1, NEKO_HEX_NEDS
-             call ep%edge_id(e, i)
-             call this%add_edge(e)
-          end do
-       end if
-       
     class default
        call neko_error('Invalid element type')
     end select
@@ -1440,11 +1464,11 @@ subroutine mesh_add_point(this, p, idx)
     type(point_t), intent(inout) :: p
     integer, intent(inout) :: idx
     integer :: tmp
-   
+
     tmp = p%id()
 
     this%max_pts_id = max(this%max_pts_id, tmp)
-    
+
     if (tmp .le. 0) then
        call neko_error("Invalid point id")
     end if
@@ -1455,7 +1479,7 @@ subroutine mesh_add_point(this, p, idx)
        this%points(this%mpts) = p
        idx = this%mpts
     end if
-    
+
   end subroutine mesh_add_point
 
   !> Add a unique face represented as a 4-tuple to the mesh
@@ -1468,9 +1492,9 @@ subroutine mesh_add_face(this, f)
        this%mfcs = this%mfcs + 1
        call this%htf%set(f, this%mfcs)
     end if
-    
+
   end subroutine mesh_add_face
-  
+
   !> Add a unique edge represented as a 2-tuple to the mesh
   subroutine mesh_add_edge(this, e)
     class(mesh_t), intent(inout) :: this
@@ -1481,7 +1505,7 @@ subroutine mesh_add_edge(this, e)
        this%meds = this%meds + 1
        call this%hte%set(e, this%meds)
     end if
-    
+
   end subroutine mesh_add_edge
 
   !> Mark facet @a f in element @a e as a wall
@@ -1500,15 +1524,15 @@ subroutine mesh_mark_wall_facet(this, f, e)
     end if
     this%facet_type(f, e) = 2
     call this%wall%add_facet(f, e)
-    
+
   end subroutine mesh_mark_wall_facet
 
   !> Mark element @a e as a curve element
   subroutine mesh_mark_curve_element(this, e, curve_data, curve_type)
     class(mesh_t), intent(inout) :: this
     integer, intent(in) :: e
-    real(kind=dp), dimension(5,12), intent(in) :: curve_data 
-    integer, dimension(12), intent(in) :: curve_type 
+    real(kind=dp), dimension(5,12), intent(in) :: curve_data
+    integer, dimension(12), intent(in) :: curve_type
 
     if (e .gt. this%nelv) then
        call neko_error('Invalid element index')
@@ -1517,7 +1541,7 @@ subroutine mesh_mark_curve_element(this, e, curve_data, curve_type)
        call neko_error('Invalid curve element')
     end if
     call this%curve%add_element(e, curve_data, curve_type)
-    
+
   end subroutine mesh_mark_curve_element
 
 
@@ -1537,10 +1561,10 @@ subroutine mesh_mark_inlet_facet(this, f, e)
     end if
     this%facet_type(f, e) = 2
     call this%inlet%add_facet(f, e)
-    
+
   end subroutine mesh_mark_inlet_facet
- 
-  !> Mark facet @a f in element @a e with label 
+
+  !> Mark facet @a f in element @a e with label
   subroutine mesh_mark_labeled_facet(this, f, e, label)
     class(mesh_t), intent(inout) :: this
     integer, intent(in) :: f
@@ -1557,10 +1581,10 @@ subroutine mesh_mark_labeled_facet(this, f, e, label)
     end if
     call this%labeled_zones(label)%add_facet(f, e)
     this%facet_type(f,e) = -label
-    
+
   end subroutine mesh_mark_labeled_facet
 
- 
+
   !> Mark facet @a f in element @a e as an outlet normal
   subroutine mesh_mark_outlet_normal_facet(this, f, e)
     class(mesh_t), intent(inout) :: this
@@ -1577,7 +1601,7 @@ subroutine mesh_mark_outlet_normal_facet(this, f, e)
     end if
     this%facet_type(f, e) = 1
     call this%outlet_normal%add_facet(f, e)
-    
+
   end subroutine mesh_mark_outlet_normal_facet
 
 
@@ -1597,7 +1621,7 @@ subroutine mesh_mark_outlet_facet(this, f, e)
     end if
     this%facet_type(f, e) = 1
     call this%outlet%add_facet(f, e)
-    
+
   end subroutine mesh_mark_outlet_facet
 
   !> Mark facet @a f in element @a e as a symmetry plane
@@ -1616,7 +1640,7 @@ subroutine mesh_mark_sympln_facet(this, f, e)
     end if
     this%facet_type(f, e) = 2
     call this%sympln%add_facet(f, e)
-    
+
   end subroutine mesh_mark_sympln_facet
 
   !> Mark facet @a f in element @a e as periodic with (@a pf, @a pe)
@@ -1628,7 +1652,7 @@ subroutine mesh_mark_periodic_facet(this, f, e, pf, pe, pids)
     integer, intent(in) :: pe
     integer, intent(inout) :: pids(4)
     integer, dimension(4) :: org_ids
-    
+
     call this%get_facet_ids(f, e, org_ids)
     call this%periodic%add_periodic_facet(f, e, pf, pe, pids, org_ids)
   end subroutine mesh_mark_periodic_facet
@@ -1656,7 +1680,7 @@ subroutine mesh_get_facet_ids(this, f, e, pids)
        pids(4) = 0
     end select
   end subroutine mesh_get_facet_ids
-  
+
   !> Reset ids of periodic points to their original ids
   subroutine mesh_reset_periodic_ids(this)
     class(mesh_t), intent(inout) :: this
@@ -1679,9 +1703,9 @@ subroutine mesh_reset_periodic_ids(this)
                                                        1,2,&
                                                        3,4 /),&
                                                        (/2,4/))
- 
+
     do i = 1, this%periodic%size
-       e = this%periodic%facet_el(i)%x(2) 
+       e = this%periodic%facet_el(i)%x(2)
        f = this%periodic%facet_el(i)%x(1)
        pe = this%periodic%p_facet_el(i)%x(2)
        pf = this%periodic%p_facet_el(i)%x(1)
@@ -1690,24 +1714,24 @@ subroutine mesh_reset_periodic_ids(this)
        this%periodic%p_ids(i)%x = pids
     end do
     do i = 1, this%periodic%size
-       e = this%periodic%facet_el(i)%x(2) 
+       e = this%periodic%facet_el(i)%x(2)
        f = this%periodic%facet_el(i)%x(1)
        org_ids = this%periodic%org_ids(i)%x
        select type(ele => this%elements(e)%e)
        type is(hex_t)
-       do j = 1, 4
-          pi => ele%pts(face_nodes(j,f))%p
-          call pi%set_id(org_ids(j))
-       end do
+          do j = 1, 4
+             pi => ele%pts(face_nodes(j,f))%p
+             call pi%set_id(org_ids(j))
+          end do
        type is(quad_t)
-       do j = 1, 2
-          pi => ele%pts(edge_nodes(j,f))%p
-          call pi%set_id(org_ids(j))
-       end do
+          do j = 1, 2
+             pi => ele%pts(edge_nodes(j,f))%p
+             call pi%set_id(org_ids(j))
+          end do
        end select
     end do
   end subroutine mesh_reset_periodic_ids
-  
+
   !> Creates common ids for matching periodic points.
   subroutine mesh_create_periodic_ids(this, f, e, pf, pe)
     class(mesh_t), intent(inout) :: this
@@ -1717,7 +1741,7 @@ subroutine mesh_create_periodic_ids(this, f, e, pf, pe)
     integer, intent(in) :: pe
     type(point_t), pointer :: pi, pj
     real(kind=dp) :: L(3)
-    integer :: i, j, id, p_local_idx
+    integer :: i, j, id, p_local_idx, match
     type(tuple4_i4_t) :: ft
     type(tuple_i4_t) :: et
     integer, dimension(4, 6) :: face_nodes = reshape((/1,5,7,3,&
@@ -1732,89 +1756,60 @@ subroutine mesh_create_periodic_ids(this, f, e, pf, pe)
                                                        1,2,&
                                                        3,4 /),&
                                                       (/2,4/))
-  
+
     select type(ele => this%elements(e)%e)
     type is(hex_t)
-    select type(elp => this%elements(pe)%e)
-    type is(hex_t)
-       L = 0d0
-       do i = 1, 4
-          L = L + ele%pts(face_nodes(i,f))%p%x(1:3) - &
+       select type(elp => this%elements(pe)%e)
+       type is(hex_t)
+          L = 0d0
+          do i = 1, 4
+             L = L + ele%pts(face_nodes(i,f))%p%x(1:3) - &
                elp%pts(face_nodes(i,pf))%p%x(1:3)
-       end do
-       L = L/4
-       do i = 1, 4
-          pi => ele%pts(face_nodes(i,f))%p
-          do j = 1, 4
-             pj => elp%pts(face_nodes(j,pf))%p
-             if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then
-                id = min(pi%id(), pj%id())
-                call pi%set_id(id)
-                call pj%set_id(id)
-                p_local_idx = this%get_local(this%points(id))
-                if (this%lgenc) then
-                   id = ele%id()
-                   call this%point_neigh(p_local_idx)%push(id)
-                   id = elp%id()
-                   call this%point_neigh(p_local_idx)%push(id)
+          end do
+          L = L/4
+          do i = 1, 4
+             pi => ele%pts(face_nodes(i,f))%p
+             match = 0
+             do j = 1, 4
+                pj => elp%pts(face_nodes(j,pf))%p
+                if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then
+                   id = min(pi%id(), pj%id())
+                   call pi%set_id(id)
+                   call pj%set_id(id)
+                   p_local_idx = this%get_local(this%points(id))
+                   match = match + 1
                 end if
+             end do
+             if ( match .gt. 1) then
+                call neko_error('Multiple matches when creating periodic ids')
+             else if (match .eq. 0) then
+                call neko_error('Cannot find matching periodic point')
              end if
           end do
-       end do
-
-       if (this%lgenc) then
-          do i = 1, NEKO_HEX_NFCS
-             call ele%facet_id(ft, i)
-             call this%add_face(ft)
-             call elp%facet_id(ft, i)
-             call this%add_face(ft)
-          end do
-
-          do i = 1, NEKO_HEX_NEDS
-             call ele%edge_id(et, i)
-             call this%add_edge(et)
-             call elp%edge_id(et, i)
-             call this%add_edge(et)
-          end do
-       end if
-    end select
-    type is(quad_t)
-    select type(elp => this%elements(pe)%e)
+       end select
     type is(quad_t)
-       L = 0d0
-       do i = 1, 2
-          L = L + ele%pts(edge_nodes(i,f))%p%x(1:3) - &
+       select type(elp => this%elements(pe)%e)
+       type is(quad_t)
+          L = 0d0
+          do i = 1, 2
+             L = L + ele%pts(edge_nodes(i,f))%p%x(1:3) - &
                elp%pts(edge_nodes(i,pf))%p%x(1:3)
-       end do
-       L = L/2
-       do i = 1, 2
-          pi => ele%pts(edge_nodes(i,f))%p
-          do j = 1, 2
-             pj => elp%pts(edge_nodes(j,pf))%p
-             !whatabout thie tolerance?
-             if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then
-                id = min(pi%id(), pj%id())
-                call pi%set_id(id)
-                call pj%set_id(id)
-                p_local_idx = this%get_local(this%points(id))
-                if (this%lgenc) then
-                   id = ele%id()
-                   call this%point_neigh(p_local_idx)%push(id)
-                   id = elp%id()
-                   call this%point_neigh(p_local_idx)%push(id)
-                end if
-             end if
           end do
-       end do
-       if (this%lgenc) then
-          do i = 1, NEKO_QUAD_NEDS
-             call ele%facet_id(et, i)
-             call this%add_edge(et)
-             call elp%facet_id(et, i)
-             call this%add_edge(et)
+          L = L/2
+          do i = 1, 2
+             pi => ele%pts(edge_nodes(i,f))%p
+             do j = 1, 2
+                pj => elp%pts(edge_nodes(j,pf))%p
+                !whatabout thie tolerance?
+                if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then
+                   id = min(pi%id(), pj%id())
+                   call pi%set_id(id)
+                   call pj%set_id(id)
+                   p_local_idx = this%get_local(this%points(id))
+                end if
+             end do
           end do
-       end if
-    end select
+       end select
     end select
   end subroutine mesh_create_periodic_ids
 
@@ -1838,7 +1833,6 @@ subroutine mesh_apply_periodic_facet(this, f, e, pf, pe, pids)
                                                        1,2,4,3,&
                                                        5,6,8,7/),&
                                                        (/4,6/))
-  
     select type(ele => this%elements(e)%e)
     type is(hex_t)
        do i = 1, 4
@@ -1846,22 +1840,7 @@ subroutine mesh_apply_periodic_facet(this, f, e, pf, pe, pids)
           call pi%set_id(pids(i))
           call this%add_point(pi, id)
           p_local_idx = this%get_local(this%points(id))
-          id = ele%id()
-          if (this%lgenc) then
-             call this%point_neigh(p_local_idx)%push(id)
-          end if
        end do
-       if (this%lgenc) then
-          do i = 1, NEKO_HEX_NFCS
-             call ele%facet_id(ft, i)
-             call this%add_face(ft)
-          end do
-
-          do i = 1, NEKO_HEX_NEDS
-             call ele%edge_id(et, i)
-             call this%add_edge(et)
-          end do
-       end if
     end select
 
   end subroutine mesh_apply_periodic_facet
@@ -1879,7 +1858,7 @@ function mesh_get_local_point(this, p) result(local_id)
     if (this%htp%get(tmp, local_id) .gt. 0) then
        call neko_error('Invalid global id (local point)')
     end if
-    
+
   end function mesh_get_local_point
 
   !> Return the local id of an edge @a e
@@ -1892,7 +1871,7 @@ function mesh_get_local_edge(this, e) result(local_id)
     if (this%hte%get(e, local_id) .gt. 0) then
        call neko_error('Invalid global id (local edge)')
     end if
-    
+
   end function mesh_get_local_edge
 
   !> Return the local id of a face @a f
@@ -1904,7 +1883,7 @@ function mesh_get_local_facet(this, f) result(local_id)
     if (this%htf%get(f, local_id) .gt. 0) then
        call neko_error('Invalid global id (local facet)')
     end if
-    
+
   end function mesh_get_local_facet
 
   !> Return the global id of an edge @a e
@@ -1919,7 +1898,7 @@ function mesh_get_global_edge(this, e) result(global_id)
        if (pe_size .gt. 1) then
           global_id = this%ddata%local_to_global_facet(global_id)
        end if
-    else 
+    else
        if (pe_size .gt. 1) then
           global_id = this%ddata%local_to_global_edge(global_id)
        end if
@@ -1934,26 +1913,26 @@ function mesh_get_global_facet(this, f) result(global_id)
     integer :: global_id
 
     global_id = this%get_local_facet(f)
-    
+
     if (pe_size .gt. 1) then
        global_id = this%ddata%local_to_global_facet(global_id)
     end if
-    
+
   end function mesh_get_global_facet
 
-  
+
   !> Check if the mesh has a point given its global index
   !! @return The local id of the point (if present) otherwise -1
   !! @todo Consider moving this to distdata
   function mesh_have_point_glb_idx(this, index) result(local_id)
-    class(mesh_t), intent(inout) :: this 
+    class(mesh_t), intent(inout) :: this
     integer, intent(inout) :: index  !< Global index
     integer :: local_id
 
     if (this%htp%get(index, local_id) .eq. 1) then
        local_id = -1
     end if
-        
+
   end function mesh_have_point_glb_idx
 
 
@@ -1966,9 +1945,9 @@ function mesh_is_shared_point(this, p) result(shared)
 
     local_index = this%get_local(p)
     shared = this%ddata%shared_point%element(local_index)
-    
+
   end function mesh_is_shared_point
-  
+
 
   !> Check if an edge is shared
   !! @attention only defined for gdim .ne. 2
@@ -1980,7 +1959,7 @@ function mesh_is_shared_edge(this, e) result(shared)
     local_index = this%get_local(e)
     if (this%gdim .eq. 2) then
        shared = this%ddata%shared_facet%element(local_index)
-    else 
+    else
        shared = this%ddata%shared_edge%element(local_index)
     end if
   end function mesh_is_shared_edge
@@ -1994,7 +1973,7 @@ function mesh_is_shared_facet(this, f) result(shared)
 
     local_index = this%get_local(f)
     shared = this%ddata%shared_facet%element(local_index)
-    
+
   end function mesh_is_shared_facet
 
 end module mesh
diff --git a/src/mesh/octree.f90 b/src/mesh/octree.f90
index f026333d88e..a2cf677f262 100644
--- a/src/mesh/octree.f90
+++ b/src/mesh/octree.f90
@@ -71,12 +71,12 @@ subroutine octree_init(t, width)
     real(kind=dp), intent(in) :: width
     type(point_t) :: origin
     integer, parameter :: top_level = 0
-    
+
     call octree_free(t)
 
     origin = (/ 0d0, 0d0, 0d0 /)
     call octree_oct_init(t%root, origin, width, top_level)
-    
+
   end subroutine octree_init
 
   !> Destroy an octree
@@ -84,7 +84,7 @@ subroutine octree_free(t)
     class(octree_t), intent(inout) :: t
 
     call octree_free_oct(t%root)
-    
+
   end subroutine octree_free
 
   !> Insert a point @a p into the octree
@@ -93,7 +93,7 @@ subroutine octree_insert(t, p)
     type(point_t), intent(in) :: p
 
     call octree_oct_insert(t%root, p)
-    
+
   end subroutine octree_insert
 
   !> Find a point @a p in an octree
@@ -103,9 +103,9 @@ function octree_find(t, p) result(rcode)
     logical rcode
 
     rcode = (octree_oct_find(t%root, p) .eq. 0)
-    
+
   end function octree_find
-  
+
 
   !> Insert a point @a p into the octree rooted at @a o
   recursive subroutine octree_oct_insert(o, p)
@@ -113,7 +113,7 @@ recursive subroutine octree_oct_insert(o, p)
     type(point_t), intent(in) :: p
     type(point_t) :: tmp_pt, offset, new_origin
     integer :: i
-    
+
     if (.not. associated(o%oct(1)%ptr)) then
        if (.not. o%valid) then
           o%point = p
@@ -124,14 +124,14 @@ recursive subroutine octree_oct_insert(o, p)
              return
           else
              tmp_pt = o%point
-             o%valid = .false.             
+             o%valid = .false.
 
              do i = 1, 8
                 offset = (/ -0.5d0, -0.5d0, -0.5d0 /)
                 if (iand((i - 1), 4) .gt. 0) offset%x(1) = 0.5d0
                 if (iand((i - 1), 2) .gt. 0) offset%x(2) = 0.5d0
                 if (iand((i - 1), 1) .gt. 0) offset%x(3) = 0.5d0
-                
+
                 new_origin = o%origin%x + (o%width * offset%x)
                 call octree_oct_init(o%oct(i)%ptr, new_origin, &
                      o%width * 0.5d0, o%level + 1)
@@ -139,7 +139,7 @@ recursive subroutine octree_oct_insert(o, p)
 
              call octree_oct_insert(o%oct(octree_oct(o, tmp_pt))%ptr, tmp_pt)
              call octree_oct_insert(o%oct(octree_oct(o, p))%ptr, p)
-             
+
           end if
        end if
     else
@@ -165,7 +165,7 @@ recursive function octree_oct_find(o, p) result(rcode)
        oct_idx = octree_oct(o, p)
        rcode = octree_oct_find(o%oct(oct_idx)%ptr, p)
     end if
-    
+
   end function octree_oct_find
 
   !> Initialize an octant width a given width, origin and level
@@ -175,7 +175,7 @@ subroutine octree_oct_init(o, origin, width, level)
     real(kind=dp), intent(in) :: width
     integer, intent(in) :: level
     integer :: i
-    
+
     if (associated(o)) then
        call neko_error('Octree octant already initialized')
     else
@@ -189,14 +189,14 @@ subroutine octree_oct_init(o, origin, width, level)
           nullify(o%oct(i)%ptr)
        end do
     end if
-    
+
   end subroutine octree_oct_init
-    
+
   !> Deallocate an oct in an octree
   recursive subroutine octree_free_oct(o)
     type(oct_t), pointer, intent(inout) :: o
     integer :: i
-    
+
     if (.not. associated(o)) then
        return
     else if (.not. associated(o%oct(1)%ptr)) then
@@ -211,7 +211,7 @@ recursive subroutine octree_free_oct(o)
        nullify(o)
     end if
 
-           
+
   end subroutine octree_free_oct
 
   !> Return the octant for a given point
@@ -225,7 +225,7 @@ pure function octree_oct(oct, point) result(oct_idx)
     if (point%x(2) .ge. oct%origin%x(2)) oct_idx = ior(oct_idx, 2)
     if (point%x(3) .ge. oct%origin%x(3)) oct_idx = ior(oct_idx, 1)
     oct_idx = oct_idx + 1
-    
+
   end function octree_oct
 
   !> Return if a point is inside an octant
@@ -244,5 +244,5 @@ pure function octree_oct_inside(oct, point) result(inside)
        inside = .true.
     end if
   end function octree_oct_inside
-  
+
 end module octree
diff --git a/src/mesh/point.f90 b/src/mesh/point.f90
index 7fca3d63def..5c69f12fd32 100644
--- a/src/mesh/point.f90
+++ b/src/mesh/point.f90
@@ -208,7 +208,7 @@ function point_subtract(p1, p2) result(res)
     res%x(2) = p1%x(2) - p2%x(2)
     res%x(3) = p1%x(3) - p2%x(3)
 
- end function point_subtract
+  end function point_subtract
 
   !> Returns the multiplication of a point by a scalar \f$ a*p_{1} \f$.
   function point_scalar_mult(p, a) result(res)
@@ -229,10 +229,10 @@ pure function point_euclid_dist(p1, p2) result(res)
     real(kind=rp) :: res
 
     res = sqrt(  (p1%x(1) - p2%x(1))**2 &
-               + (p1%x(2) - p2%x(2))**2 &         
+               + (p1%x(2) - p2%x(2))**2 &
                + (p1%x(3) - p2%x(3))**2 )
   end function point_euclid_dist
-    
+
   !> Computes matrix-vector product in \f$ \mathbb{R}^3 \f$: \f$ b = Ax \f$.
   function point_mat_mult(A,x) result(b)
     class(point_t), intent(in) :: x
diff --git a/src/mesh/point_zone.f90 b/src/mesh/point_zone.f90
index fac110b64f3..f0316d675bf 100644
--- a/src/mesh/point_zone.f90
+++ b/src/mesh/point_zone.f90
@@ -45,7 +45,7 @@ module point_zone
 
   !> Base abstract type for point zones.
   type, public, abstract :: point_zone_t
-     !> List of linear indices of the GLL points in the zone. 
+     !> List of linear indices of the GLL points in the zone.
      integer, allocatable :: mask(:)
      !> List of linear indices of the GLL points in the zone on the device.
      type(c_ptr) :: mask_d = c_null_ptr
@@ -166,7 +166,7 @@ subroutine point_zone_free_base(this)
     if (c_associated(this%mask_d)) then
        call device_free(this%mask_d)
     end if
-    
+
   end subroutine point_zone_free_base
 
   !> Builds the mask from the scratch stack.
@@ -174,45 +174,46 @@ subroutine point_zone_finalize(this)
     class(point_zone_t), intent(inout) :: this
     integer, pointer :: tp(:)
     integer :: i
-    
+
     if (.not. this%finalized) then
 
        allocate(this%mask(this%scratch%size()))
-       
+
        tp => this%scratch%array()
        do i = 1, this%scratch%size()
           this%mask(i) = tp(i)
        end do
 
        this%size = this%scratch%size()
-       
+
        call this%scratch%clear()
 
        if (NEKO_BCKND_DEVICE .eq. 1) then
           call device_map(this%mask, this%mask_d, this%size)
-          call device_memcpy(this%mask, this%mask_d, this%size, HOST_TO_DEVICE)
+          call device_memcpy(this%mask, this%mask_d, this%size, &
+                             HOST_TO_DEVICE, sync=.false.)
        end if
 
        this%finalized = .true.
-       
+
     end if
-    
+
   end subroutine point_zone_finalize
 
   !> Adds a point's linear index to the scratch stack.
   !! @param idx Linear index of the point to add.
-  !! @note The linear index of a point `(j,k,l,e)` can be retrieved using the 
+  !! @note The linear index of a point `(j,k,l,e)` can be retrieved using the
   !! subroutine `linear_index(j,k,l,e,lx)` in the `utils` module.
   subroutine point_zone_add(this, idx)
     class(point_zone_t), intent(inout) :: this
     integer, intent(inout) :: idx
-    
+
     if (this%finalized) then
        call neko_error('Point zone already finalized')
     end if
-    
+
     call this%scratch%push(idx)
-    
+
   end subroutine point_zone_add
 
   !> Maps the GLL points that verify a point_zone's `criterion` by adding
diff --git a/src/mesh/point_zone_factory.f90 b/src/mesh/point_zone_fctry.f90
similarity index 63%
rename from src/mesh/point_zone_factory.f90
rename to src/mesh/point_zone_fctry.f90
index 90ee9da41e3..1ab15696397 100644
--- a/src/mesh/point_zone_factory.f90
+++ b/src/mesh/point_zone_fctry.f90
@@ -36,6 +36,7 @@ module point_zone_fctry
   use point_zone, only: point_zone_t
   use box_point_zone, only: box_point_zone_t
   use sphere_point_zone, only: sphere_point_zone_t
+  use cylinder_point_zone, only: cylinder_point_zone_t
   use json_module, only: json_file
   use json_utils, only: json_get
   use dofmap, only: dofmap_t
@@ -45,34 +46,36 @@ module point_zone_fctry
 
   public :: point_zone_factory
 
-  contains
-    
-    !> Point zone factory. Constructs, initializes, and maps the
-    !! point zone object.
-    !! @param json JSON object initializing the point zone.
-    !! @param dof Dofmap from which to map the point zone.
-    subroutine point_zone_factory(point_zone, json, dof)
-      class(point_zone_t), allocatable, intent(inout) :: point_zone
-      type(json_file), intent(inout) :: json
-      type(dofmap_t), intent(inout) :: dof
-      character(len=:), allocatable :: zone_type
+contains
 
-      call json_get(json, "geometry", zone_type)
+  !> Point zone factory. Constructs, initializes, and maps the
+  !! point zone object.
+  !! @param json JSON object initializing the point zone.
+  !! @param dof Dofmap from which to map the point zone.
+  subroutine point_zone_factory(point_zone, json, dof)
+    class(point_zone_t), allocatable, intent(inout) :: point_zone
+    type(json_file), intent(inout) :: json
+    type(dofmap_t), intent(inout) :: dof
+    character(len=:), allocatable :: zone_type
 
-      if (trim(zone_type) .eq. "box") then
-         allocate(box_point_zone_t::point_zone)
-      else if (trim(zone_type) .eq. "sphere") then
-         allocate(sphere_point_zone_t::point_zone)
-      else
-         call neko_error("Unknown source term "//trim(zone_type)//"! Valid &
-              &source terms are 'box', 'sphere'.")
-      end if
+    call json_get(json, "geometry", zone_type)
 
-      call point_zone%init(json, dof%size())
+    if (trim(zone_type) .eq. "box") then
+       allocate(box_point_zone_t::point_zone)
+    else if (trim(zone_type) .eq. "sphere") then
+       allocate(sphere_point_zone_t::point_zone)
+    else if (trim(zone_type) .eq. "cylinder") then
+       allocate(cylinder_point_zone_t::point_zone)
+    else
+       call neko_error("Unknown source term "//trim(zone_type)//"! Valid &
+         &source terms are 'box', 'sphere', 'cylinder'.")
+    end if
 
-      call point_zone%map(dof)
-      call point_zone%finalize()
+    call point_zone%init(json, dof%size())
 
-    end subroutine point_zone_factory
+    call point_zone%map(dof)
+    call point_zone%finalize()
+
+  end subroutine point_zone_factory
 
 end module point_zone_fctry
diff --git a/src/mesh/point_zone_registry.f90 b/src/mesh/point_zone_registry.f90
index 763466e9719..adc0e965400 100644
--- a/src/mesh/point_zone_registry.f90
+++ b/src/mesh/point_zone_registry.f90
@@ -32,10 +32,11 @@
 !
 ! Implements a point zone registry for storing point zones.
 module point_zone_registry
-  use point_zone, only : point_zone_t, point_zone_wrapper_t
+  use point_zone, only: point_zone_t, point_zone_wrapper_t
   use point_zone_fctry, only: point_zone_factory
-  use dofmap, only : dofmap_t
-  use utils, only : neko_error
+  use dofmap, only: dofmap_t
+  use mesh, only: mesh_t
+  use space, only: space_t, GLL
   use utils, only: neko_error
   use json_utils, only: json_get
   use json_module, only: json_file, json_core, json_value
@@ -60,7 +61,7 @@ module point_zone_registry
      procedure, pass(this) :: add_point_zone_from_json
      !> Returns the number of point zones in the registry.
      procedure, pass(this) :: n_point_zones
-     !> Retrieves a point zone in the registry by its index in the 
+     !> Retrieves a point zone in the registry by its index in the
      !! `point_zones` array.
      procedure, pass(this) :: get_point_zone_by_index
      !> Retrieves a point zone in the registry by its name.
@@ -68,7 +69,7 @@ module point_zone_registry
      !> Returns the expansion size with which the `point_zone_registry_t`
      !! was initialized.
      procedure, pass(this) :: get_expansion_size
-     !> Returns the total size of the `point_zones` array (not the number of 
+     !> Returns the total size of the `point_zones` array (not the number of
      !! point zones in the registry!).
      procedure, pass(this) :: get_size
      !> Checks if a point zone exists in the registry.
@@ -83,17 +84,17 @@ module point_zone_registry
 contains
   !> Constructor, reading from json point zones.
   !! @param json Json file object.
-  !! @param dof Dofmap to map the point zone from GLL points.
+  !! @param msh Mesh associated with the point zone.
   !! @param size Size of the point zone registry.
   !! @param expansion_size Expansion size for the point zone registry.
   !! @note At this stage, the point_zone registry is only allocated
   !! if we find anything in the `case.point_zones` json path. Any
   !! point_zones that are not defined in that way will need to be added
   !! using the `add_point_zone` subroutine.
-  subroutine point_zone_registry_init(this, json, dof, expansion_size)
+  subroutine point_zone_registry_init(this, json, msh, expansion_size)
     class(point_zone_registry_t), intent(inout):: this
     type(json_file), intent(inout) :: json
-    type(dofmap_t), intent(inout) :: dof
+    type(mesh_t), target, intent(inout) :: msh
     integer, optional, intent(in) :: expansion_size
 
     ! Json low-level manipulator.
@@ -104,13 +105,28 @@ subroutine point_zone_registry_init(this, json, dof, expansion_size)
     character(len=:), allocatable :: buffer
     ! A single source term as its own json_file.
     type(json_file) :: source_subdict
-    character(len=:), allocatable :: type
     logical :: found
     integer :: n_zones, i
 
+    ! Parameters used to setup the GLL space.
+    integer :: order
+    type(space_t), target :: Xh
+    type(dofmap_t) :: dof
+
+
+    call json_get(json, 'case.numerics.polynomial_order', order)
+    order = order + 1 ! add 1 to get poly order
+
+    if (msh%gdim .eq. 2) then
+       call Xh%init(GLL, order, order)
+    else
+       call Xh%init(GLL, order, order, order)
+    end if
+    dof = dofmap_t(msh, Xh)
+
     call this%free()
 
-     if (present(expansion_size)) then
+    if (present(expansion_size)) then
        this%expansion_size = expansion_size
     else
        this%expansion_size = 10
@@ -199,7 +215,7 @@ subroutine add_point_zone_from_json(this, json, dof)
     ! Check if point zone exists with the input name
     if (this%point_zone_exists(trim(str_read))) then
        call neko_error("Field with name " // trim(str_read) // &
-            " is already registered")
+                       " is already registered")
     end if
 
     !
@@ -207,7 +223,7 @@ subroutine add_point_zone_from_json(this, json, dof)
     ! init.
     !
     if (this%n_point_zones() .eq. this%get_size()) then
-      call this%expand()
+       call this%expand()
     end if
 
     this%n = this%n + 1
@@ -230,7 +246,7 @@ pure function n_point_zones(this) result(n)
     n = this%n
   end function n_point_zones
 
-  !> Returns the total size of the `point_zones` array (not the number of 
+  !> Returns the total size of the `point_zones` array (not the number of
   !! point zones in the registry!).
   !! @note Use `n_point_zones()` to retrieve the actual number of point
   !! zones in the registry.
@@ -250,7 +266,7 @@ pure function get_expansion_size(this) result(n)
     n = this%expansion_size
   end function get_expansion_size
 
-  !> Retrieves a point zone in the registry by its index in the 
+  !> Retrieves a point zone in the registry by its index in the
   !! `point_zones` array.
   !! @param i Index in the `point_zones` array.
   function get_point_zone_by_index(this, i) result(pz)
@@ -287,7 +303,7 @@ function get_point_zone_by_name(this, name) result(pz)
 
     if (.not. found) then
        call neko_error("Point zone " // trim(name) // &
-            " could not be found in the registry")
+                       " could not be found in the registry")
     end if
   end function get_point_zone_by_name
 
diff --git a/src/mesh/point_zones/box_point_zone.f90 b/src/mesh/point_zones/box_point_zone.f90
index 43494b46543..b70b890f101 100644
--- a/src/mesh/point_zones/box_point_zone.f90
+++ b/src/mesh/point_zones/box_point_zone.f90
@@ -42,7 +42,7 @@ module box_point_zone
 
   !> A box-shaped point zone.
   !! @details As defined here, a box is described by its `x,y,z` bounds,
-  !! specified in the json file as e.g. `"x_bounds": [<xmin>, <xmax>]"`, 
+  !! specified in the json file as e.g. `"x_bounds": [<xmin>, <xmax>]"`,
   !! etc for `y` and `z` coordinates.
   type, public, extends(point_zone_t) :: box_point_zone_t
      real(kind=rp) :: xmin
@@ -138,7 +138,7 @@ subroutine box_point_zone_free(this)
   end subroutine box_point_zone_free
 
   !> Defines the criterion of selection of a GLL point in the box point zone.
-  !! In the case of a box point zone, an `x,y,z` GLL point is considered as 
+  !! In the case of a box point zone, an `x,y,z` GLL point is considered as
   !! being inside the zone if:
   !! \f{eqnarray*}{
   !!    x_{min} \le x \le x_{max} \\
diff --git a/src/mesh/point_zones/cylinder_point_zone.f90 b/src/mesh/point_zones/cylinder_point_zone.f90
new file mode 100644
index 00000000000..b8385d64580
--- /dev/null
+++ b/src/mesh/point_zones/cylinder_point_zone.f90
@@ -0,0 +1,186 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements a cylinder geometry subset.
+module cylinder_point_zone
+  use point_zone, only: point_zone_t
+  use num_types, only: rp
+  use json_utils, only: json_get
+  use json_module, only: json_file
+  use utils, only: neko_error
+  implicit none
+  private
+
+  !> A cylindrical point zone.
+  !! @details As defined here, a cylinder is described by its two end points and
+  !! its radius, specified in the json file
+  !! as e.g. `"start": [<x0>, <y0>, <z0>]",
+  !! "start": [<x1>, <y1>, <z1>]", "radius": <r>`.
+  type, public, extends(point_zone_t) :: cylinder_point_zone_t
+     real(kind=rp), dimension(3) :: p0
+     real(kind=rp), dimension(3) :: p1
+     real(kind=rp) :: radius
+   contains
+     !> Constructor from json object file.
+     procedure, pass(this) :: init => cylinder_point_zone_init_from_json
+     !> Destructor.
+     procedure, pass(this) :: free => cylinder_point_zone_free
+     !> Defines the criterion of selection of a GLL point in the sphere point zone.
+     procedure, pass(this) :: criterion => cylinder_point_zone_criterion
+  end type cylinder_point_zone_t
+
+contains
+
+  !> Constructor from json object file.
+  !! @param json Json object file.
+  !! @param size Size with which to initialize the stack
+  subroutine cylinder_point_zone_init_from_json(this, json, size)
+    class(cylinder_point_zone_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    integer, intent(in) :: size
+
+    character(len=:), allocatable :: name
+    real(kind=rp), dimension(:), allocatable :: p0, p1
+    real(kind=rp) :: radius
+
+    call json_get(json, "name", name)
+    call json_get(json, "start", p0)
+    call json_get(json, "end", p1)
+    call json_get(json, "radius", radius)
+
+    ! Needed to use `shape` because of input name.
+    if (all(shape(p0) .ne. (/3/))) then
+       call neko_error("Cylinder point zone: invalid start point")
+    end if
+
+    if (all(shape(p1) .ne. (/3/))) then
+       call neko_error("Cylinder point zone: invalid end point")
+    end if
+
+    if (radius .lt. 0.0_rp) then
+       call neko_error("Cylinder point zone: invalid radius")
+    end if
+
+    call cylinder_point_zone_init_common(this, size, trim(name), p0, p1, &
+                                         radius)
+
+  end subroutine cylinder_point_zone_init_from_json
+
+  !> Initializes a cylinder point zone from its endpoint coordinates and radius.
+  !! @param size Size of the scratch stack.
+  !! @param name Name of the cylinder point zone.
+  !! @param p0 Coordinates of the first endpoint.
+  !! @param p1 Coordinates of the second endpoint.
+  !! @param radius Sphere radius.
+  subroutine cylinder_point_zone_init_common(this, size, name, p0, p1, radius)
+    class(cylinder_point_zone_t), intent(inout) :: this
+    integer, intent(in), optional :: size
+    character(len=*), intent(in) :: name
+    real(kind=rp), intent(in), dimension(3) :: p0
+    real(kind=rp), intent(in), dimension(3) :: p1
+    real(kind=rp), intent(in) :: radius
+
+    call this%init_base(size, name)
+
+    this%p0 = p0
+    this%p1 = p1
+    this%radius = radius
+
+  end subroutine cylinder_point_zone_init_common
+
+  !> Destructor.
+  subroutine cylinder_point_zone_free(this)
+    class(cylinder_point_zone_t), intent(inout) :: this
+
+    call this%free_base()
+
+    this%p0 = 0.0_rp
+    this%p1 = 0.0_rp
+    this%radius = 0.0_rp
+
+  end subroutine cylinder_point_zone_free
+
+  !> @brief Defines the criterion of selection of a GLL point in the cylinder
+  !! point zone.
+  !! @details A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is
+  !! considered as being inside the cylinder defined by endpoints
+  !! \f$ \vec{p_0} \f$ and \f$ \vec{p_1} \f$ and radius \f$ r \f$ if it
+  !! satisfies the following conditions:
+  !! \f{eqnarray*}{
+  !!   ||\vec{X} - \vec{X_0}|| &\le& r\\
+  !!   0 &\le& t \le 1
+  !! \f}
+  !! where,
+  !! \f{eqnarray*}{
+  !!   t &=& (\vec{X} - \vec{p_0}) \cdot (\vec{p_1} - \vec{p_0})
+  !!         / ||\vec{p_1} - \vec{p_0}|| \\
+  !! \vec{X_0} &=& \vec{p_0} + t \cdot (\vec{p_1} - \vec{p_0})
+  !! \f}
+  !! @param x x-coordinate of the GLL point.
+  !! @param y y-coordinate of the GLL point.
+  !! @param z z-coordinate of the GLL point.
+  !! @param j 1st nonlinear index of the GLL point.
+  !! @param k 2nd nonlinear index of the GLL point.
+  !! @param l 3rd nonlinear index of the GLL point.
+  !! @param e element index of the GLL point.
+  pure function cylinder_point_zone_criterion(this, x, y, z, j, k, l, e) result(is_inside)
+    class(cylinder_point_zone_t), intent(in) :: this
+    real(kind=rp), intent(in) :: x
+    real(kind=rp), intent(in) :: y
+    real(kind=rp), intent(in) :: z
+    integer, intent(in) :: j
+    integer, intent(in) :: k
+    integer, intent(in) :: l
+    integer, intent(in) :: e
+    logical :: is_inside
+
+    real(kind=rp), dimension(3) :: p
+    real(kind=rp), dimension(3) :: centerline
+    real(kind=rp), dimension(3) :: vec_p
+    real(kind=rp) :: t
+    real(kind=rp), dimension(3) :: projection
+    real(kind=rp) :: distance
+
+    p = [x, y, z]
+
+    centerline = this%p1 - this%p0
+    vec_p = p - this%p0
+    t = dot_product(vec_p, centerline) / dot_product(centerline, centerline)
+
+    projection = this%p0 + t * centerline
+    distance = norm2(projection - p)
+
+    is_inside = t >= 0.0_rp .and. t <= 1.0_rp .and. distance <= this%radius
+
+  end function cylinder_point_zone_criterion
+
+end module cylinder_point_zone
diff --git a/src/mesh/point_zones/sphere_point_zone.f90 b/src/mesh/point_zones/sphere_point_zone.f90
index d29672694a9..2da12c73810 100644
--- a/src/mesh/point_zones/sphere_point_zone.f90
+++ b/src/mesh/point_zones/sphere_point_zone.f90
@@ -39,9 +39,9 @@ module sphere_point_zone
   use math, only: abscmp
   implicit none
   private
-  
+
   !> A sphere-shaped point zone.
-  !! @details As defined here, a sphere is described by its center of 
+  !! @details As defined here, a sphere is described by its center of
   !! coordinates `x0,y0,z0` and its radius, specified in the json file
   !! as e.g. `"center": [<x0>, <y0>, <z0>]", "radius": <r>`.
   type, public, extends(point_zone_t) :: sphere_point_zone_t
@@ -85,7 +85,7 @@ subroutine sphere_point_zone_init_from_json(this, json, size)
          y0, z0, radius)
 
   end subroutine sphere_point_zone_init_from_json
-  
+
   !> Initializes a sphere point zone from its center coordinates and radius.
   !! @param size Size of the scratch stack.
   !! @param name Name of the sphere point zone.
@@ -125,7 +125,7 @@ subroutine sphere_point_zone_free(this)
   end subroutine sphere_point_zone_free
 
   !> Defines the criterion of selection of a GLL point in the sphere point zone.
-  !! A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is considered as being 
+  !! A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is considered as being
   !! inside the zone if:
   !! \f{eqnarray*}{
   !!    |\vec{X} - \vec{X_0}|^2 \le r
diff --git a/src/mesh/quad.f90 b/src/mesh/quad.f90
index 56a05639a37..3b97f2c88bf 100644
--- a/src/mesh/quad.f90
+++ b/src/mesh/quad.f90
@@ -49,10 +49,10 @@ module quad
   !! @verbatim
   !! Node numbering (NEKTON symmetric notation)
   !!
-  !!      3+-----+4    ^ s                 
-  !!       |     |     |                   
-  !!       |     |     |                   
-  !!      1+-----+2    +----> r            
+  !!      3+-----+4    ^ s
+  !!       |     |     |
+  !!       |     |     |
+  !!      1+-----+2    +----> r
   !!
   !! @endverbatim
   type, public, extends(element_t) :: quad_t
@@ -71,11 +71,11 @@ module quad
   !! @verbatim
   !! Edge numbering (similar to NEKTON symmetric notation)
   !!          4
-  !!       +------+      ^ s                 
+  !!       +------+      ^ s
   !!       |      |      |
   !!     1 |      | 2    |
-  !!       |      |      |                   
-  !!       +------+      +-----> r            
+  !!       |      |      |
+  !!       +------+      +-----> r
   !!          3
   !! @endverbatim
   integer, parameter, dimension(2, 4) :: edge_nodes = reshape((/1,3,&
@@ -83,7 +83,7 @@ module quad
                                                                 1,2,&
                                                                 3,4 /),&
                                                                 (/2,4/))
-  
+
 contains
 
   !> Create a quadrilateral element based upon four points
@@ -116,13 +116,13 @@ subroutine quad_facet_id(this, t, side)
     type is(tuple_i4_t)
        if (p1%id() .lt. p2%id()) then
           t%x = (/ p1%id(), p2%id() /)
-      else
+       else
           t%x = (/ p2%id(), p1%id() /)
-      endif
-    end select   
+       endif
+    end select
   end subroutine quad_facet_id
 
-  !> Return the ordered edge for face @a i as a 2-tuple @a t 
+  !> Return the ordered edge for face @a i as a 2-tuple @a t
   subroutine quad_facet_order(this, t, side)
     class(quad_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
@@ -136,7 +136,7 @@ subroutine quad_facet_order(this, t, side)
     type is(tuple_i4_t)
        t%x = (/ p1%id(), p2%id() /)
     end select
-    
+
   end subroutine quad_facet_order
 
   !> Compute the diameter of a quadrilateral element
@@ -186,7 +186,7 @@ end function quad_centroid
   pure function quad_equal(this, other) result(res)
     class(quad_t), intent(in) :: this
     class(element_t), intent(in) :: other
-    integer :: i    
+    integer :: i
     logical :: res
 
     res = .false.
@@ -202,7 +202,7 @@ pure function quad_equal(this, other) result(res)
           res = .true.
        end if
     end select
-    
+
   end function quad_equal
-  
+
 end module quad
diff --git a/src/mesh/search_tree/aabb.f90 b/src/mesh/search_tree/aabb.f90
new file mode 100644
index 00000000000..854c1c555dc
--- /dev/null
+++ b/src/mesh/search_tree/aabb.f90
@@ -0,0 +1,600 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+! ============================================================================ !
+! Original C++ Implementation from:
+! https://github.com/JamesRandall/SimpleVoxelEngine/blob/master/voxelEngine/include/AABB.h
+!
+! Translated to Fortran by:
+! @author Tim Felle Olsen
+! @date 9 Feb 2024
+!
+! C++ Code License:
+! The MIT License (MIT)
+!
+! Copyright (c) 2017 James Randall
+!
+! Permission is hereby granted, free of charge, to any person obtaining a copy of
+! this software and associated documentation files (the "Software"), to deal in
+! the Software without restriction, including without limitation the rights to
+! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+! the Software, and to permit persons to whom the Software is furnished to do so,
+! subject to the following conditions:
+!
+! The above copyright notice and this permission notice shall be included in all
+! copies or substantial portions of the Software.
+!
+! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+! ============================================================================ !
+
+!> @brief Axis Aligned Bounding Box (aabb) implementation in Fortran
+!! @details
+!! This is a Fortran implementation of an Axis Aligned Bounding Box (aabb) data
+!! structure. The aabb is a box that is aligned to the x, y and z axes. It is
+!! defined by two points, the lower left front corner and the upper right back
+!! corner. This is the base data structure for the aabb_Tree, which is used to
+!! accelerate a Signed Distance Function.
+module aabb
+  use num_types, only: dp
+  use element, only: element_t
+  use point, only: point_t
+  use tri, only: tri_t
+  use quad, only: quad_t
+  use tet, only: tet_t
+  use hex, only: hex_t
+  use mesh, only: mesh_t
+  use tri_mesh, only: tri_mesh_t
+  use tet_mesh, only: tet_mesh_t
+  use utils, only: neko_error
+
+  implicit none
+  private
+  public :: aabb_t, get_aabb, merge, intersection
+
+  ! ========================================================================== !
+  ! Public interface for free functions
+  ! ========================================================================== !
+
+  !> @brief Merge two aabbs.
+  interface merge
+     module procedure merge_aabb
+  end interface merge
+
+  !> @brief Intersect two aabbs.
+  interface intersection
+     module procedure intersection_aabb
+  end interface intersection
+
+  !> @brief Axis Aligned Bounding Box (aabb) data structure.
+  !! @details The aabb is a box that is aligned to the x, y and z axes. It is
+  !! defined by two points, the lower left front corner and the upper right back
+  !! corner. The purpose of this is to accelerate a Signed Distance Function,
+  !! through an aabb_Tree.
+  type :: aabb_t
+     private
+
+     logical :: initialized = .false.
+     real(kind=dp) :: box_min(3) = huge(0.0_dp)
+     real(kind=dp) :: box_max(3) = -huge(0.0_dp)
+     real(kind=dp) :: center(3) = 0.0_dp
+     real(kind=dp) :: diameter = huge(0.0_dp)
+     real(kind=dp) :: surface_area = 0.0_dp
+
+   contains
+
+     ! Initializers
+     procedure, pass(this), public :: init => aabb_init
+
+     ! Getters
+     procedure, pass(this), public :: get_min => aabb_get_min
+     procedure, pass(this), public :: get_max => aabb_get_max
+     procedure, pass(this), public :: get_width => aabb_get_width
+     procedure, pass(this), public :: get_height => aabb_get_height
+     procedure, pass(this), public :: get_depth => aabb_get_depth
+     procedure, pass(this), public :: get_diameter => aabb_get_diameter
+     procedure, pass(this), public :: get_surface_area => aabb_get_surface_area
+     procedure, pass(this), public :: get_center => aabb_get_center
+     procedure, pass(this), public :: get_diagonal => aabb_get_diagonal
+
+     procedure, pass(this), public :: add_padding
+
+     ! Comparison operators
+     generic :: operator(.lt.) => less
+     generic :: operator(.gt.) => greater
+
+     !> @brief Check if two aabbs are overlapping.
+     procedure, pass(this), public :: overlaps => aabb_overlaps
+     !> @brief Check if this aabb fully contains another aabb.
+     procedure, pass(this), public :: contains => aabb_contains_other
+     !> @brief Check if this aabb contains a point.
+     procedure, pass(this), public :: contains_point => aabb_contains_point
+
+     ! Private comparison operators
+     procedure, pass(this) :: less => aabb_less
+     procedure, pass(this) :: greater => aabb_greater
+
+     ! Private operations
+     procedure, pass(this), private :: calculate_surface_area
+
+  end type aabb_t
+
+contains
+
+  ! ========================================================================== !
+  ! Constructors
+  ! ========================================================================== !
+
+  !> @brief Construct the aabb of a predefined object.
+  !!
+  !! @details This function is used to get the aabb of a predefined object.
+  !! Optionally, the user can define the padding of the aabb, which is a
+  !! multiple of the diameter of the aabb. This is used to avoid numerical
+  !! issues when the object itself it axis aligned.
+  !!
+  !! Current support:
+  !! - Triangle (tri_t)
+  !! - Quadrilateral (quad_t)
+  !! - Tetrahedron (tet_t)
+  !! - Hexahedron (hex_t)
+  !! - Mesh (mesh_t)
+  !! - Triangular mesh (tri_mesh_t)
+  !! - Tetrahedral mesh (tet_mesh_t)
+  !!
+  !! @param[in] object The object to get the aabb of.
+  !! @param[in] padding The padding of the aabb.
+  !! @return The aabb of the object.
+  function get_aabb(object, padding) result(box)
+    use utils, only: neko_error
+    implicit none
+
+    class(*), intent(in) :: object
+    real(kind=dp), intent(in), optional :: padding
+    type(aabb_t) :: box
+
+    select type(object)
+
+      type is (tri_t)
+       box = get_aabb_element(object)
+      type is (hex_t)
+       box = get_aabb_element(object)
+      type is (tet_t)
+       box = get_aabb_element(object)
+      type is (quad_t)
+       box = get_aabb_element(object)
+
+      type is (mesh_t)
+       box = get_aabb_mesh(object)
+      type is (tri_mesh_t)
+       box = get_aabb_tri_mesh(object)
+      type is (tet_mesh_t)
+       box = get_aabb_tet_mesh(object)
+
+      class default
+       call neko_error("get_aabb: Unsupported object type")
+    end select
+
+    if (present(padding)) then
+       call box%add_padding(padding)
+    end if
+
+  end function get_aabb
+
+  !> @brief Add padding to the aabb.
+  !! @details This function adds padding to the aabb. The padding is a multiple
+  !! of the diameter of the aabb. This is used to avoid numerical issues when
+  !! the object itself it axis aligned.
+  !! @param[in] padding The padding of the aabb.
+  subroutine add_padding(this, padding)
+    class(aabb_t), intent(inout) :: this
+    real(kind=dp), intent(in) :: padding
+    real(kind=dp) :: box_min(3), box_max(3)
+
+    box_min = this%box_min - padding * (this%diameter)
+    box_max = this%box_max + padding * (this%diameter)
+
+    call this%init(box_min, box_max)
+  end subroutine add_padding
+
+  !> @brief Get the aabb of an arbitrary element.
+  !!
+  !! @details This function calculates the aabb of an element. The aabb is
+  !! defined by the lower left front corner and the upper right back corner.
+  !! The aabb is calculated by finding the minimum and maximum x, y and z
+  !! coordinate for all points in the arbitrary element type.
+  !!
+  !! @param object The arbitrary element to get the aabb of.
+  !! @return The aabb of the element.
+  function get_aabb_element(object) result(box)
+    class(element_t), intent(in) :: object
+    type(aabb_t) :: box
+
+    integer :: i
+    type(point_t), pointer :: pi
+    real(kind=dp) :: box_min(3), box_max(3)
+
+    box_min = huge(0.0_dp); box_max = -huge(0.0_dp)
+
+    do i = 1, object%n_points()
+       pi => object%p(i)
+       box_min = min(box_min, pi%x)
+       box_max = max(box_max, pi%x)
+    end do
+
+    call box%init(box_min, box_max)
+  end function get_aabb_element
+
+  !> @brief Get the aabb of a mesh.
+  !!
+  !! @details This function calculates the aabb of a mesh. The aabb is
+  !! defined by the lower left front corner and the upper right back corner.
+  !! The aabb is calculated by merging the aabb of all elements in the mesh.
+  !!
+  !! @param object The mesh to get the aabb of.
+  !! @return The aabb of the mesh.
+  function get_aabb_mesh(object) result(box)
+    type(mesh_t), intent(in) :: object
+    type(aabb_t) :: box
+
+    integer :: i
+    type(aabb_t) :: temp_box
+
+    do i = 1, object%nelv
+       temp_box = get_aabb(object%elements(i))
+       box = merge(box, temp_box)
+    end do
+
+  end function get_aabb_mesh
+
+  !> @brief Get the aabb of a triangular mesh.
+  !!
+  !! @details This function calculates the aabb of a mesh. The aabb is
+  !! defined by the lower left front corner and the upper right back corner.
+  !! The aabb is calculated by merging the aabb of all elements in the mesh.
+  !!
+  !! @param object The triangular mesh to get the aabb of.
+  !! @return The aabb of the mesh.
+  function get_aabb_tri_mesh(object) result(box)
+    type(tri_mesh_t), intent(in) :: object
+    type(aabb_t) :: box
+
+    integer :: i
+    type(aabb_t) :: temp_box
+
+    do i = 1, object%nelv
+       temp_box = get_aabb(object%el(i))
+       box = merge(box, temp_box)
+    end do
+
+  end function get_aabb_tri_mesh
+
+  !> @brief Get the aabb of a tetrahedral mesh.
+  !!
+  !! @details This function calculates the aabb of a mesh. The aabb is
+  !! defined by the lower left front corner and the upper right back corner.
+  !! The aabb is calculated by merging the aabb of all elements in the mesh.
+  !!
+  !! @param object The tetrahedral mesh to get the aabb of.
+  !! @return The aabb of the mesh.
+  function get_aabb_tet_mesh(object) result(box)
+    type(tet_mesh_t), intent(in) :: object
+    type(aabb_t) :: box
+
+    integer :: i
+    type(aabb_t) :: temp_box
+
+    do i = 1, object%nelv
+       temp_box = get_aabb(object%el(i))
+       box = merge(box, temp_box)
+    end do
+
+  end function get_aabb_tet_mesh
+
+
+  ! ========================================================================== !
+  ! Initializers
+  ! ========================================================================== !
+
+  !> @brief Initialize the aabb.
+  !! @param lower_left_front The lower left front corner of the aabb.
+  !! @param upper_right_back The upper right back corner of the aabb.
+  subroutine aabb_init(this, lower_left_front, upper_right_back)
+    class(aabb_t), intent(inout) :: this
+    real(kind=dp), dimension(3), intent(in) :: lower_left_front
+    real(kind=dp), dimension(3), intent(in) :: upper_right_back
+
+    this%initialized = .true.
+    this%box_min = lower_left_front
+    this%box_max = upper_right_back
+    this%center = (this%box_min + this%box_max) / 2.0_dp
+    this%diameter = norm2(this%box_max - this%box_min)
+
+    this%surface_area = this%calculate_surface_area()
+
+  end subroutine aabb_init
+
+  ! ========================================================================== !
+  ! Getters
+  ! ========================================================================== !
+
+  !> @brief Get the minimum point of the aabb.
+  pure function aabb_get_min(this) result(min)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp), dimension(3) :: min
+
+    min = this%box_min
+  end function aabb_get_min
+
+  !> @brief Get the maximum point of the aabb.
+  pure function aabb_get_max(this) result(max)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp), dimension(3) :: max
+
+    max = this%box_max
+  end function aabb_get_max
+
+  !> @brief Get the width of the aabb. Also known as the x-axis length.
+  pure function aabb_get_width(this) result(width)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: width
+
+    width = this%box_max(1) - this%box_min(1)
+  end function aabb_get_width
+
+  !> @brief Get the depth of the aabb. Also known as the y-axis length.
+  pure function aabb_get_depth(this) result(depth)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: depth
+
+    depth = this%box_max(2) - this%box_min(2)
+  end function aabb_get_depth
+
+  !> @brief Get the height of the aabb. Also known as the z-axis length.
+  pure function aabb_get_height(this) result(height)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: height
+
+    height = this%box_max(3) - this%box_min(3)
+  end function aabb_get_height
+
+  !> @brief Get the diameter length of the aabb.
+  pure function aabb_get_diameter(this) result(diameter)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: diameter
+
+    diameter = this%diameter
+  end function aabb_get_diameter
+
+  !> @brief Get the surface area of the aabb.
+  pure function aabb_get_surface_area(this) result(surface_area)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: surface_area
+
+    surface_area = this%surface_area
+  end function aabb_get_surface_area
+
+  !> @brief Get the center of the aabb.
+  pure function aabb_get_center(this) result(center)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp), dimension(3) :: center
+
+    center = this%center
+  end function aabb_get_center
+
+  !> @brief Get the diagonal of the aabb.
+  pure function aabb_get_diagonal(this) result(diagonal)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp), dimension(3) :: diagonal
+
+    diagonal = this%box_max - this%box_min
+  end function aabb_get_diagonal
+
+  ! ========================================================================== !
+  ! Operations
+  ! ========================================================================== !
+
+  !> @brief Check if two aabbs are overlapping.
+  function aabb_overlaps(this, other) result(is_overlapping)
+    class(aabb_t), intent(in) :: this
+    class(aabb_t), intent(in) :: other
+    logical :: is_overlapping
+
+    if (.not. this%initialized .or. .not. other%initialized) then
+       !  call neko_error("aabb_overlaps: One or both aabbs are not initialized")
+       is_overlapping = .false.
+    else
+
+       is_overlapping = all(this%box_min .le. other%box_max) .and. &
+         all(this%box_max .ge. other%box_min)
+    end if
+
+  end function aabb_overlaps
+
+  !> @brief Check if this aabb contains another aabb.
+  function aabb_contains_other(this, other) result(is_contained)
+    class(aabb_t), intent(in) :: this
+    class(aabb_t), intent(in) :: other
+    logical :: is_contained
+
+    ! if (.not. this%initialized .or. .not. other%initialized) then
+    !  call neko_error("aabb_contains: One or both aabbs are not initialized")
+    ! end if
+
+    is_contained = all(this%box_min .le. other%box_min) .and. &
+      all(this%box_max .ge. other%box_max)
+
+  end function aabb_contains_other
+
+  !> @brief Check if this aabb contains a point.
+  function aabb_contains_point(this, p) result(is_contained)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp), dimension(3), intent(in) :: p
+    logical :: is_contained
+
+    ! if (.not. this%initialized) then
+    !  call neko_error("aabb_contains_point: One or both aabbs are not initialized")
+    ! end if
+
+    is_contained = all(p .ge. this%box_min) .and. all(p .le. this%box_max)
+  end function aabb_contains_point
+
+  ! ========================================================================== !
+  ! Binary operations
+  ! ========================================================================== !
+
+  !> @brief Merge two aabbs.
+  function merge_aabb(box1, box2) result(merged)
+    class(aabb_t), intent(in) :: box1
+    class(aabb_t), intent(in) :: box2
+    type(aabb_t) :: merged
+
+    real(kind=dp), dimension(3) :: box_min, box_max
+
+    box_min = min(box1%box_min, box2%box_min)
+    box_max = max(box1%box_max, box2%box_max)
+
+    call merged%init(box_min, box_max)
+  end function merge_aabb
+
+  !> @brief Get the intersection of two aabbs.
+  function intersection_aabb(box1, box2) result(intersected)
+    class(aabb_t), intent(in) :: box1
+    class(aabb_t), intent(in) :: box2
+    type(aabb_t) :: intersected
+
+    real(kind=dp), dimension(3) :: box_min, box_max
+
+    box_min = max(box1%box_min, box2%box_min)
+    box_max = min(box1%box_max, box2%box_max)
+
+    call intersected%init(box_min, box_max)
+  end function intersection_aabb
+
+  ! ========================================================================== !
+  ! Private operations
+  ! ========================================================================== !
+
+  !> @brief Calculate the surface area of the aabb.
+  pure function calculate_surface_area(this) result(surface_area)
+    class(aabb_t), intent(in) :: this
+    real(kind=dp) :: surface_area
+
+    surface_area = 2.0 * (&
+      & this%get_width() * this%get_height() &
+      & + this%get_width() * this%get_depth() &
+      & + this%get_height() * this%get_depth() &
+      &)
+  end function calculate_surface_area
+
+  ! ========================================================================== !
+  ! Comparison operators
+  ! ========================================================================== !
+
+  !> @brief Less than comparison operator.
+  pure function aabb_less(this, other)
+    class(aabb_t), intent(in) :: this
+    class(aabb_t), intent(in) :: other
+    logical :: aabb_less
+    logical :: equal
+
+    if (.not. this%initialized .or. .not. other%initialized) then
+       aabb_less = .false.
+       return
+    end if
+
+    aabb_less = this%box_min(1) .lt. other%box_min(1)
+    equal = this%box_min(1) .le. other%box_min(1)
+
+    if (.not. aabb_less .and. equal) then
+       aabb_less = this%box_min(2) .lt. other%box_min(2)
+       equal = this%box_min(2) .le. other%box_min(2)
+    end if
+    if (.not. aabb_less .and. equal) then
+       aabb_less = this%box_min(3) .lt. other%box_min(3)
+       equal = this%box_min(3) .le. other%box_min(3)
+    end if
+    if (.not. aabb_less .and. equal) then
+       aabb_less = this%box_max(1) .lt. other%box_max(1)
+       equal = this%box_max(1) .le. other%box_max(1)
+    end if
+    if (.not. aabb_less .and. equal) then
+       aabb_less = this%box_max(2) .lt. other%box_max(2)
+       equal = this%box_max(2) .le. other%box_max(2)
+    end if
+    if (.not. aabb_less .and. equal) then
+       aabb_less = this%box_max(3) .lt. other%box_max(3)
+    end if
+
+  end function aabb_less
+
+  !> @brief Greater than comparison operator.
+  pure function aabb_greater(this, other)
+    class(aabb_t), intent(in) :: this
+    class(aabb_t), intent(in) :: other
+    logical :: aabb_greater
+    logical :: equal
+
+    if (.not. this%initialized .or. .not. other%initialized) then
+       aabb_greater = .false.
+       return
+    end if
+
+    aabb_greater = this%box_min(1) .gt. other%box_min(1)
+    equal = this%box_min(1) .ge. other%box_min(1)
+
+    if (.not. aabb_greater .and. equal) then
+       aabb_greater = this%box_min(2) .gt. other%box_min(2)
+       equal = this%box_min(2) .ge. other%box_min(2)
+    end if
+    if (.not. aabb_greater .and. equal) then
+       aabb_greater = this%box_min(3) .gt. other%box_min(3)
+       equal = this%box_min(3) .ge. other%box_min(3)
+    end if
+    if (.not. aabb_greater .and. equal) then
+       aabb_greater = this%box_max(1) .gt. other%box_max(1)
+       equal = this%box_max(1) .ge. other%box_max(1)
+    end if
+    if (.not. aabb_greater .and. equal) then
+       aabb_greater = this%box_max(2) .gt. other%box_max(2)
+       equal = this%box_max(2) .ge. other%box_max(2)
+    end if
+    if (.not. aabb_greater .and. equal) then
+       aabb_greater = this%box_max(3) .gt. other%box_max(3)
+    end if
+
+  end function aabb_greater
+
+end module aabb
diff --git a/src/mesh/tet.f90 b/src/mesh/tet.f90
index 57afabd6a37..0957489554d 100644
--- a/src/mesh/tet.f90
+++ b/src/mesh/tet.f90
@@ -49,14 +49,14 @@ module tet
   !! @details
   !! 3D element composed of 4 points
   !! @verbatim
-  !! Node numbering 
+  !! Node numbering
   !!
-  !!        3 +           ^ s                 
-  !!         /|\          |                   
-  !!        / | \         |                   
-  !!     1 +..|..+ 2      +----> r            
-  !!        \ | /        /                    
-  !!         \|/        /                     
+  !!        3 +           ^ s
+  !!         /|\          |
+  !!        / | \         |
+  !!     1 +..|..+ 2      +----> r
+  !!        \ | /        /
+  !!         \|/        /
   !!        4 +        t
   !!
   !! @endverbatim
@@ -67,7 +67,7 @@ module tet
      procedure, pass(this) :: facet_order => tet_facet_order
      procedure, pass(this) :: diameter => tet_diameter
      procedure, pass(this) :: centroid => tet_centroid
-     procedure, pass(this) :: edge_id => tet_edge_id          
+     procedure, pass(this) :: edge_id => tet_edge_id
      procedure, pass(this) :: equal => tet_equal
      generic :: operator(.eq.) => equal
   end type tet_t
@@ -75,16 +75,16 @@ module tet
   !> Face node ids
   !! @details
   !! @verbatim
-  !! Face numbering 
+  !! Face numbering
   !!
-  !!          +   4       ^ s                 
+  !!          +   4       ^ s
   !!         /|\ /        |
   !!        / | \         |
-  !!       / 1|2 \        |                   
-  !!      +...|...+       +----> r            
+  !!       / 1|2 \        |
+  !!      +...|...+       +----> r
   !!       \  |3 /       /
   !!        \ | /       /
-  !!         \|/       /                     
+  !!         \|/       /
   !!          +       t
   !!
   !! @endverbatim
@@ -94,20 +94,20 @@ module tet
                                                                 1,2,4,&
                                                                 1,2,3/),&
                                                                 (/3,4/))
-  
+
   !> Edge node ids
   !! @details
   !! @verbatim
-  !! Edge numbering 
+  !! Edge numbering
   !!
-  !!      2   +   3       ^ s                 
+  !!      2   +   3       ^ s
   !!       \ /|\ /        |
   !!        / | \         |
-  !!       /  |  \        |                   
-  !!      +.1.|...+       +----> r            
+  !!       /  |  \        |
+  !!      +.1.|...+       +----> r
   !!       \  4  /       /
   !!   5--> \ | / <--6  /
-  !!         \|/       /                     
+  !!         \|/       /
   !!          +       t
   !!
   !! @endverbatim
@@ -118,9 +118,9 @@ module tet
                                                                 1,4,&
                                                                 2,4/),&
                                                                 (/2,6/))
-  
+
 contains
-  
+
   !> Create a tetrahedral element based upon four points
   subroutine tet_init(this, id, p1, p2, p3, p4)
     class(tet_t), intent(inout) :: this
@@ -128,7 +128,7 @@ subroutine tet_init(this, id, p1, p2, p3, p4)
     type(point_t), target, intent(in) :: p1, p2, p3, p4
 
     call this%element(id, NEKO_TET_GDIM, NEKO_TET_NPTS)
-    
+
     this%pts(1)%p => p1
     this%pts(2)%p => p2
     this%pts(3)%p => p3
@@ -137,7 +137,7 @@ subroutine tet_init(this, id, p1, p2, p3, p4)
   end subroutine tet_init
 
   !> Return the facet id for face @a i as a 3-tuple @a t
-  subroutine tet_facet_id(this, t, side) 
+  subroutine tet_facet_id(this, t, side)
     class(tet_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -165,7 +165,7 @@ subroutine tet_facet_id(this, t, side)
   end subroutine tet_facet_id
 
   !> Return the ordered points for face @a i as a 3-tuple @a t
-  subroutine tet_facet_order(this, t, side) 
+  subroutine tet_facet_order(this, t, side)
     class(tet_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -179,12 +179,12 @@ subroutine tet_facet_order(this, t, side)
     type is(tuple3_i4_t)
        t%x = (/ p1%id(), p2%id(), p3%id() /)
     end select
-    
+
   end subroutine tet_facet_order
 
 
   !> Return the edge id for an edge @a i as a 2-tuple @a t
-  subroutine tet_edge_id(this, t, side) 
+  subroutine tet_edge_id(this, t, side)
     class(tet_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
@@ -197,14 +197,14 @@ subroutine tet_edge_id(this, t, side)
     type is(tuple_i4_t)
        if (p1%id() .lt. p2%id()) then
           t%x = (/ p1%id(), p2%id() /)
-      else
+       else
           t%x = (/ p2%id(), p1%id() /)
-      endif
+       endif
 
     end select
 
   end subroutine tet_edge_id
-  
+
   !> Compute the diameter of a tetrahedral element
   function tet_diameter(this) result(res)
     class(tet_t), intent(in) :: this
@@ -260,7 +260,7 @@ function tet_centroid(this) result(res)
     do i = 1, this%gdim()
        res%x(i) = 0.25 * (p1%x(i) + p2%x(i) + p3%x(i) + p4%x(i))
     end do
-    
+
   end function tet_centroid
 
   !> Check if two tet elements are equal
@@ -286,5 +286,5 @@ pure function tet_equal(this, other) result(res)
     end select
 
   end function tet_equal
-  
+
 end module tet
diff --git a/src/mesh/tet_mesh.f90 b/src/mesh/tet_mesh.f90
index 92699c500b6..b1899561460 100644
--- a/src/mesh/tet_mesh.f90
+++ b/src/mesh/tet_mesh.f90
@@ -39,7 +39,7 @@ module tet_mesh
   use utils
   implicit none
   private
-  
+
   integer, public, parameter :: TET_MSH_OTPV = 1, TET_MSH_FVTC = 2, &
        TET_MSH_SVTC = 3
 
@@ -60,9 +60,9 @@ subroutine tet_mesh_init(this, msh, mthd)
     type(mesh_t), intent(in), target :: msh
     integer, intent(in), optional :: mthd
     integer :: bsct_mthd
-    
+
     call this%free()
-    
+
     this%msh => msh
 
     if (present(mthd)) then
@@ -98,7 +98,7 @@ subroutine tet_mesh_free(this)
     end if
 
     nullify(this%msh)
-    
+
   end subroutine tet_mesh_free
 
   ! Bisect hexahedral mesh into a tetrahedral mesh using
@@ -109,7 +109,7 @@ subroutine tet_mesh_bisect_otpv(tet_msh)
     type(tet_mesh_t), intent(inout) :: tet_msh
     integer :: i, j
     type(point_t), pointer :: p1, p2, p3, p4
-    
+
     j = 0
     do i = 1, tet_msh%msh%nelv
 
@@ -170,7 +170,7 @@ subroutine tet_mesh_bisect_otpv(tet_msh)
        call tet_msh%el(j)%init(j, p1, p2, p3, p4)
 
     end do
-    
+
   end subroutine tet_mesh_bisect_otpv
 
   !> Bisect each hexahedron into five tetrahedrons
@@ -269,10 +269,10 @@ subroutine tet_mesh_bisect_svtc(tet_msh)
        p3 => tet_msh%msh%elements(i)%e%pts(3)%p
        p4 => tet_msh%msh%elements(i)%e%pts(4)%p
        call tet_msh%el(j)%init(j, p1, p2, p3, p4)
-       
+
     end do
-    
-    
+
+
   end subroutine tet_mesh_bisect_svtc
-  
+
 end module tet_mesh
diff --git a/src/mesh/tri.f90 b/src/mesh/tri.f90
index 3ab95fdfa83..45168d811f1 100644
--- a/src/mesh/tri.f90
+++ b/src/mesh/tri.f90
@@ -50,9 +50,9 @@ module tri
   !! Node numbering
   !!
   !!    3+
-  !!     |\          
+  !!     |\
   !!     | \        ^ s
-  !!     |  \       | 
+  !!     |  \       |
   !!     |   \      |
   !!    1+----+2    +---> r
   !!
@@ -74,9 +74,9 @@ module tri
   !! Edge numbering
   !!
   !!     +
-  !!     |\          
+  !!     |\
   !!     | \       ^ s
-  !!   1 |  \ 2    | 
+  !!   1 |  \ 2    |
   !!     |   \     |
   !!     +----+    +---> r
   !!       3
@@ -109,7 +109,7 @@ subroutine tri_facet_id(this, t, side)
     class(tri_t), intent(in) :: this
     class(tuple_t), intent(inout) :: t
     integer, intent(in) :: side
-    type(point_t), pointer :: p1, p2    
+    type(point_t), pointer :: p1, p2
 
     p1 => this%p(edge_nodes(1, side))
     p2 => this%p(edge_nodes(2, side))
@@ -122,7 +122,7 @@ subroutine tri_facet_id(this, t, side)
           t%x = (/ p2%id(), p1%id() /)
        end if
     end select
-    
+
   end subroutine tri_facet_id
 
   !> Return the ordered edge for face @a i as a 2-tuple @a t
@@ -189,7 +189,7 @@ end function tri_centroid
   pure function tri_equal(this, other) result(res)
     class(tri_t), intent(in) :: this
     class(element_t), intent(in) :: other
-    integer :: i    
+    integer :: i
     logical :: res
 
     res = .false.
@@ -205,7 +205,7 @@ pure function tri_equal(this, other) result(res)
           res = .true.
        end if
     end select
-    
+
   end function tri_equal
-  
+
 end module tri
diff --git a/src/mesh/tri_mesh.f90 b/src/mesh/tri_mesh.f90
index 7a044f56ad2..a6367fee49f 100644
--- a/src/mesh/tri_mesh.f90
+++ b/src/mesh/tri_mesh.f90
@@ -31,7 +31,7 @@
 ! POSSIBILITY OF SUCH DAMAGE.
 !
 !> Defines a triangular surface mesh
-!! @details Mesh derived from a surface geometry 
+!! @details Mesh derived from a surface geometry
 module tri_mesh
   use tri
   use point, only : point_t
@@ -43,7 +43,6 @@ module tri_mesh
      type(point_t), allocatable :: points(:) !< List of points
      integer :: nelv
      integer :: mpts
-     integer, private :: melv
    contains
      procedure, pass(this) :: init => tri_mesh_init
      procedure, pass(this) :: free => tri_mesh_free
@@ -64,8 +63,8 @@ subroutine tri_mesh_init(this, nelv)
     allocate(this%points(nelv * NEKO_TRI_NPTS))
 
     this%mpts = 0
-    this%melv = 0
-    
+    this%nelv = 0
+
   end subroutine tri_mesh_init
 
   !> Deallocate a triangular surface mesh
@@ -79,7 +78,7 @@ subroutine tri_mesh_free(this)
     if (allocated(this%points)) then
        deallocate(this%points)
     end if
-    
+
   end subroutine tri_mesh_free
 
   !> Add an element to a mesh
@@ -91,13 +90,13 @@ subroutine tri_mesh_add_element(this, p1, p2, p3)
     this%points(this%mpts + 2) = p2
     this%points(this%mpts + 3) = p3
 
-    this%melv = this%melv + 1
-    call this%el(this%melv)%init(this%melv, &
+    this%nelv = this%nelv + 1
+    call this%el(this%nelv)%init(this%nelv, &
          this%points(this%mpts + 1), &
          this%points(this%mpts + 2), &
          this%points(this%mpts + 3))
     this%mpts = this%mpts + 3
 
   end subroutine tri_mesh_add_element
-  
+
 end module tri_mesh
diff --git a/src/neko.f90 b/src/neko.f90
index 483cb07aa5e..bfef342f551 100644
--- a/src/neko.f90
+++ b/src/neko.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2019-2023, The Neko Authors
+! Copyright (c) 2019-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -51,7 +51,7 @@ module neko
   use mxm_wrapper
   use global_interpolation
   use file
-  use field, only : field_t
+  use field, only : field_t, field_ptr_t
   use neko_mpi_types
   use gather_scatter
   use coefs
@@ -76,19 +76,23 @@ module neko
   use jobctrl
   use device
   use device_math
+  use map_1d
   use cpr
   use fluid_stats
   use field_list, only : field_list_t
   use fluid_user_source_term
+  use scalar_user_source_term
   use vector
+  use matrix
   use tensor
   use simulation_component
   use probes
   use spectral_error_indicator
   use system
+  use drag_torque
   use field_registry, only : neko_field_registry
   use scratch_registry, only : neko_scratch_registry
-  use simulation_component_global, only : simcomps_global_init
+  use simcomp_executor, only : neko_simcomps
   use data_streamer
   use time_interpolator
   use point_interpolator, only : point_interpolator_t
@@ -111,7 +115,7 @@ subroutine neko_init(C)
     character(8) :: date
     integer :: argc, nthrds, rw, sw
 
-    call date_and_time(time=time, date=date)           
+    call date_and_time(time=time, date=date)
 
     call comm_init
     call neko_mpi_types_init
@@ -153,12 +157,12 @@ subroutine neko_init(C)
        !
        ! Job information
        !
-       call neko_log%section("Job Information")       
+       call neko_log%section("Job Information")
        write(log_buf, '(A,A,A,A,1x,A,1x,A,A,A,A,A)') 'Start time: ',&
             time(1:2),':',time(3:4), '/', date(1:4),'-', date(5:6),'-',date(7:8)
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
        write(log_buf, '(a)') 'Running on: '
-       sw = 10 
+       sw = 10
        if (pe_size .lt. 1e1)  then
           write(log_buf(13:), '(i1,a)') pe_size, ' MPI '
           if (pe_size .eq. 1) then
@@ -184,7 +188,7 @@ subroutine neko_init(C)
           write(log_buf(13:), '(i6,a)') pe_size, ' MPI ranks'
           rw = 6
        end if
-       
+
        nthrds = 1
        !$omp parallel
        !$omp master
@@ -193,7 +197,7 @@ subroutine neko_init(C)
        !$omp end parallel
 
        if (nthrds .gt. 1) then
-          if (nthrds .lt. 1e1) then                
+          if (nthrds .lt. 1e1) then
              write(log_buf(13 + rw + sw:), '(a,i1,a)') ', using ', &
                   nthrds, ' thrds each'
           else if (nthrds .lt. 1e2) then
@@ -207,11 +211,11 @@ subroutine neko_init(C)
                   nthrds, ' thrds each'
           end if
        end if
-       call neko_log%message(log_buf)      
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
        write(log_buf, '(a)') 'CPU type  : '
        call system_cpu_name(log_buf(13:))
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
        write(log_buf, '(a)') 'Bcknd type: '
        if (NEKO_BCKND_SX .eq. 1) then
@@ -227,13 +231,13 @@ subroutine neko_init(C)
        else
           write(log_buf(13:), '(a)') 'CPU'
        end if
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
        if (NEKO_BCKND_HIP .eq. 1 .or. NEKO_BCKND_CUDA .eq. 1 .or. &
             NEKO_BCKND_OPENCL .eq. 1) then
           write(log_buf, '(a)') 'Dev. name : '
           call device_name(log_buf(13:))
-          call neko_log%message(log_buf)
+          call neko_log%message(log_buf, NEKO_LOG_QUIET)
        end if
 
        write(log_buf, '(a)') 'Real type : '
@@ -245,7 +249,7 @@ subroutine neko_init(C)
        case (real128)
           write(log_buf(13:), '(a)') 'quad precision'
        end select
-       call neko_log%message(log_buf)
+       call neko_log%message(log_buf, NEKO_LOG_QUIET)
 
        call neko_log%end()
 
@@ -257,10 +261,10 @@ subroutine neko_init(C)
        !
        ! Create simulation components
        !
-       call simcomps_global_init(C)
-       
+       call neko_simcomps%init(C)
+
     end if
-    
+
   end subroutine neko_init
 
   subroutine neko_finalize(C)
@@ -269,7 +273,7 @@ subroutine neko_finalize(C)
     if (present(C)) then
        call case_free(C)
     end if
-    
+
     call neko_field_registry%free()
     call neko_scratch_registry%free()
     call device_finalize
diff --git a/src/qoi/drag_torque.f90 b/src/qoi/drag_torque.f90
new file mode 100644
index 00000000000..a87f9f38b35
--- /dev/null
+++ b/src/qoi/drag_torque.f90
@@ -0,0 +1,367 @@
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
+!
+! The UChicago Argonne, LLC as Operator of Argonne National
+! Laboratory holds copyright in the Software. The copyright holder
+! reserves all rights except those expressly granted to licensees,
+! and U.S. Government license rights.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the disclaimer below.
+!
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions and the disclaimer (as noted below)
+! in the documentation and/or other materials provided with the
+! distribution.
+!
+! 3. Neither the name of ANL nor the names of its contributors
+! may be used to endorse or promote products derived from this software
+! without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+!
+! Additional BSD Notice
+! ---------------------
+! 1. This notice is required to be provided under our contract with
+! the U.S. Department of Energy (DOE). This work was produced at
+! Argonne National Laboratory under Contract
+! No. DE-AC02-06CH11357 with the DOE.
+!
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
+! express or implied, or assumes any liability or responsibility for the
+! accuracy, completeness, or usefulness of any information, apparatus,
+! product, or process disclosed, or represents that its use would not
+! infringe privately-owned rights.
+!
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
+! not be used for advertising or product endorsement purposes.
+!
+
+module drag_torque
+  use field, only: field_t
+  use coefs, only: coef_t
+  use mesh
+  use facet_zone
+  use comm
+  use math
+  use space, only: space_t
+  use num_types, only: rp
+  use operators
+  implicit none
+  private
+  !> Some functions to calculate the lift/drag and torque
+  !! Calculation can be done on a zone, a facet, or a point
+  !! Currently everything is CPU only
+  public :: drag_torque_zone, drag_torque_facet, drag_torque_pt
+
+contains
+  !> Calculate drag and torque over a zone.
+  !! @param dgtq, the computed drag and torque
+  !! @param tstep, the time step
+  !! @param zone, the zone which we compute the drag and toqure over
+  !! @param center, the point around which we calculate the torque
+  !! @param s11-s23, the strain rate tensor
+  !! @param p, the pressure
+  !! @param coef, coefficents
+  !! @param visc, the viscosity
+  subroutine drag_torque_zone(dgtq, tstep, zone, center, s11, s22, s33, s12, s13, s23,&
+                              p, coef, visc)
+    integer, intent(in) :: tstep
+    type(facet_zone_t) :: zone
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), intent(inout) :: s11(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(inout) :: s22(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(inout) :: s33(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(inout) :: s12(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(inout) :: s13(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(inout) :: s23(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv)
+    type(field_t), intent(inout) :: p
+    real(kind=rp), intent(in) :: visc, center(3)
+    real(kind=rp) :: dgtq(3,4)
+    real(kind=rp) :: dragpx = 0.0_rp ! pressure 
+    real(kind=rp) :: dragpy = 0.0_rp
+    real(kind=rp) :: dragpz = 0.0_rp
+    real(kind=rp) :: dragvx = 0.0_rp ! viscous
+    real(kind=rp) :: dragvy = 0.0_rp
+    real(kind=rp) :: dragvz = 0.0_rp
+    real(kind=rp) :: torqpx = 0.0_rp ! pressure 
+    real(kind=rp) :: torqpy = 0.0_rp
+    real(kind=rp) :: torqpz = 0.0_rp
+    real(kind=rp) :: torqvx = 0.0_rp ! viscous
+    real(kind=rp) :: torqvy = 0.0_rp
+    real(kind=rp) :: torqvz = 0.0_rp
+    real(kind=rp) :: dragx, dragy, dragz
+    real(kind=rp) :: torqx, torqy, torqz
+    integer :: ie, ifc, mem, ierr
+    dragx = 0.0
+    dragy = 0.0
+    dragz = 0.0
+
+!
+!     Fill up viscous array w/ default
+!
+      dragpx = 0.0
+      dragpy = 0.0
+      dragpz = 0.0
+      dragvx = 0.0
+      dragvy = 0.0
+      dragvz = 0.0
+      do mem  = 1,zone%size
+         ie   = zone%facet_el(mem)%x(2)
+         ifc   = zone%facet_el(mem)%x(1)
+         call drag_torque_facet(dgtq,coef%dof%x,coef%dof%y,coef%dof%z,&
+                                center,&
+                                s11, s22, s33, s12, s13, s23,&
+                                p%x,visc,ifc,ie, coef, coef%Xh)
+
+         dragpx = dragpx + dgtq(1,1)  ! pressure 
+         dragpy = dragpy + dgtq(2,1)
+         dragpz = dragpz + dgtq(3,1)
+
+         dragvx = dragvx + dgtq(1,2)  ! viscous
+         dragvy = dragvy + dgtq(2,2)
+         dragvz = dragvz + dgtq(3,2)
+
+         torqpx = torqpx + dgtq(1,3)  ! pressure 
+         torqpy = torqpy + dgtq(2,3)
+         torqpz = torqpz + dgtq(3,3)
+
+         torqvx = torqvx + dgtq(1,4)  ! viscous
+         torqvy = torqvy + dgtq(2,4)
+         torqvz = torqvz + dgtq(3,4)
+      enddo
+!
+!     Sum contributions from all processors
+!
+      call MPI_Allreduce(MPI_IN_PLACE,dragpx, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,dragpy, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,dragpz, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,dragvx, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,dragvy, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,dragvz, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      !Torque
+      call MPI_Allreduce(MPI_IN_PLACE,torqpx, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,torqpy, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,torqpz, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,torqvx, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,torqvy, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+      call MPI_Allreduce(MPI_IN_PLACE,torqvz, 1, &
+         MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr)
+
+      dgtq(1,1) = dragpx  ! pressure 
+      dgtq(2,1) = dragpy
+      dgtq(3,1) = dragpz
+               
+      dgtq(1,2) = dragvx  ! viscous
+      dgtq(2,2) = dragvy
+      dgtq(3,2) = dragvz
+               
+      dgtq(1,3) = torqpx  ! pressure 
+      dgtq(2,3) = torqpy
+      dgtq(3,3) = torqpz
+               
+      dgtq(1,4) = torqvx  ! viscous
+      dgtq(2,4) = torqvy
+      dgtq(3,4) = torqvz
+
+  end subroutine drag_torque_zone
+
+  !> Calculate drag and torque over a facet.
+  !! @param dgtq, the computed drag and torque
+  !! @param tstep, the time step
+  !! @param xm0, the x coords
+  !! @param ym0, the y coords
+  !! @param zm0, the z coords
+  !! @param center, the point around which we calculate the torque
+  !! @param s11-s23, the strain rate tensor
+  !! @param p, the pressure
+  !! @param coef, coefficents
+  !! @param visc, the viscosity
+  subroutine drag_torque_facet(dgtq,xm0,ym0,zm0, center,&
+                               s11, s22, s33, s12, s13, s23,&
+                               pm1,visc,f,e, coef, Xh)
+    type(coef_t), intent(in) :: coef 
+    type(space_t), intent(in) :: Xh
+    real(kind=rp), intent(out) :: dgtq(3,4)
+    real(kind=rp), intent(in) :: center(3)
+    real(kind=rp), intent(in) :: xm0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: ym0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: zm0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s11 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s22 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s33 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s12 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s13 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: s23 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: pm1 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv)
+    real(kind=rp), intent(in) :: visc
+    integer, intent(in) :: f,e
+    integer :: pf,l, k, i, j1, j2
+    real(kind=rp) ::    n1,n2,n3, j, a, r1, r2, r3, v, dgtq_i(3,4)
+    integer :: skpdat(6,6), NX, NY, NZ
+    integer :: js1   
+    integer :: jf1   
+    integer :: jskip1
+    integer :: js2   
+    integer :: jf2   
+    integer :: jskip2
+    real(kind=rp) :: s11_, s21_, s31_, s12_, s22_, s32_, s13_, s23_, s33_
+
+
+    NX = Xh%lx
+    NY = Xh%ly
+    NZ = Xh%lz
+    SKPDAT(1,1)=1
+    SKPDAT(2,1)=NX*(NY-1)+1
+    SKPDAT(3,1)=NX
+    SKPDAT(4,1)=1
+    SKPDAT(5,1)=NY*(NZ-1)+1
+    SKPDAT(6,1)=NY
+
+    SKPDAT(1,2)=1             + (NX-1)
+    SKPDAT(2,2)=NX*(NY-1)+1   + (NX-1)
+    SKPDAT(3,2)=NX
+    SKPDAT(4,2)=1
+    SKPDAT(5,2)=NY*(NZ-1)+1
+    SKPDAT(6,2)=NY
+
+    SKPDAT(1,3)=1
+    SKPDAT(2,3)=NX
+    SKPDAT(3,3)=1
+    SKPDAT(4,3)=1
+    SKPDAT(5,3)=NY*(NZ-1)+1
+    SKPDAT(6,3)=NY
+
+    SKPDAT(1,4)=1           + NX*(NY-1)
+    SKPDAT(2,4)=NX          + NX*(NY-1)
+    SKPDAT(3,4)=1
+    SKPDAT(4,4)=1
+    SKPDAT(5,4)=NY*(NZ-1)+1
+    SKPDAT(6,4)=NY
+
+    SKPDAT(1,5)=1
+    SKPDAT(2,5)=NX
+    SKPDAT(3,5)=1
+    SKPDAT(4,5)=1
+    SKPDAT(5,5)=NY
+    SKPDAT(6,5)=1
+
+    SKPDAT(1,6)=1           + NX*NY*(NZ-1)
+    SKPDAT(2,6)=NX          + NX*NY*(NZ-1)
+    SKPDAT(3,6)=1
+    SKPDAT(4,6)=1
+    SKPDAT(5,6)=NY
+    SKPDAT(6,6)=1
+    pf = f
+    js1    = skpdat(1,pf)
+    jf1    = skpdat(2,pf)
+    jskip1 = skpdat(3,pf)
+    js2    = skpdat(4,pf)
+    jf2    = skpdat(5,pf)
+    jskip2 = skpdat(6,pf)
+    call rzero(dgtq,12)
+    i = 0
+    a = 0
+    do j2=js2,jf2,jskip2
+       do j1=js1,jf1,jskip1
+         i = i+1
+         n1 = coef%nx(i,1,f,e)*coef%area(i,1,f,e)
+         n2 = coef%ny(i,1,f,e)*coef%area(i,1,f,e)
+         n3 = coef%nz(i,1,f,e)*coef%area(i,1,f,e)
+         a  = a +          coef%area(i,1,f,e)
+         v  = visc
+         s11_ = s11(j1,j2,1,e)
+         s12_ = s12(j1,j2,1,e)
+         s22_ = s22(j1,j2,1,e)
+         s13_ = s13(j1,j2,1,e)
+         s23_ = s23(j1,j2,1,e)
+         s33_ = s33(j1,j2,1,e)
+         call drag_torque_pt(dgtq_i,xm0(j1,j2,1,e), ym0(j1,j2,1,e),zm0(j1,j2,1,e), center,&
+                             s11_, s22_, s33_, s12_, s13_, s23_,&
+                             pm1(j1,j2,1,e), n1, n2, n3, v)
+         dgtq = dgtq + dgtq_i
+       end do
+    end do
+  end subroutine drag_torque_facet
+
+  !> Calculate drag and torque from one point
+  !! @param dgtq, the computed drag and torque
+  !! @param xm0, the x coord
+  !! @param ym0, the y coord
+  !! @param zm0, the z coord
+  !! @param center, the point around which we calculate the torque
+  !! @param s11-s23, the strain rate tensor
+  !! @param p, the pressure
+  !! @param n1, normal vector x
+  !! @param n2, normal vector y
+  !! @param n3, normal vector z
+  !! @param v, the viscosity
+  subroutine drag_torque_pt(dgtq,x,y,z, center, s11, s22, s33, s12, s13, s23,&
+                            p,n1, n2, n3,v)
+    real(kind=rp), intent(inout) :: dgtq(3,4)
+    real(kind=rp), intent(in) :: x 
+    real(kind=rp), intent(in) :: y
+    real(kind=rp), intent(in) :: z
+    real(kind=rp), intent(in) :: p
+    real(kind=rp), intent(in) :: v
+    real(kind=rp), intent(in) :: n1, n2, n3, center(3)
+    real(kind=rp), intent(in) :: s11, s12, s22, s13, s23, s33
+    real(kind=rp) ::  s21, s31, s32, r1, r2, r3 
+    call rzero(dgtq,12)
+    s21 = s12
+    s32 = s23
+    s31 = s13
+    !pressure drag
+    dgtq(1,1) = p*n1  
+    dgtq(2,1) = p*n2
+    dgtq(3,1) = p*n3
+    ! viscous drag
+    dgtq(1,2) = -v*(s11*n1 + s12*n2 + s13*n3)
+    dgtq(2,2) = -v*(s21*n1 + s22*n2 + s23*n3)
+    dgtq(3,2) = -v*(s31*n1 + s32*n2 + s33*n3)
+    r1 = x-center(1)
+    r2 = y-center(2)
+    r3 = z-center(3)
+    !pressure torque
+    dgtq(1,3) = (r2*dgtq(3,1)-r3*dgtq(2,1)) 
+    dgtq(2,3) = (r3*dgtq(1,1)-r1*dgtq(3,1))
+    dgtq(3,3) = (r1*dgtq(2,1)-r2*dgtq(1,1))
+    !viscous torque
+    dgtq(1,4) = (r2*dgtq(3,2)-r3*dgtq(2,2))
+    dgtq(2,4) = (r3*dgtq(1,2)-r1*dgtq(3,2)) 
+    dgtq(3,4) = (r1*dgtq(2,2)-r2*dgtq(1,2))
+  end subroutine drag_torque_pt
+
+end module drag_torque
diff --git a/src/scalar/bcknd/cpu/scalar_residual_cpu.f90 b/src/scalar/bcknd/cpu/scalar_residual_cpu.f90
index bc8d8dd1a9d..e0609d2c69f 100644
--- a/src/scalar/bcknd/cpu/scalar_residual_cpu.f90
+++ b/src/scalar/bcknd/cpu/scalar_residual_cpu.f90
@@ -1,26 +1,41 @@
-!> Residuals in the Pn-Pn formulation (CPU version)
+!> Residuals in the scalar equation (CPU version).
 module scalar_residual_cpu
   use gather_scatter
   use scalar_residual
   use operators
   implicit none
   private
-  
+
+  !> Wrapper type for the routine to compute the scalar residual on the CPU.
   type, public, extends(scalar_residual_t) :: scalar_residual_cpu_t
    contains
+     !> Compute the residual.
      procedure, nopass :: compute => scalar_residual_cpu_compute
   end type scalar_residual_cpu_t
 
 contains
 
+  !> Compute the residual.
+  !! @param Ax The Helmholtz operator.
+  !! @param s The values of the scalar.
+  !! @param s_res The values of the scalar residual.
+  !! @param f_xH The right hand side.
+  !! @param c_xH The SEM coefficients.
+  !! @param msh The mesh.
+  !! @param Xh The SEM function space.
+  !! @param lambda The thermal conductivity.
+  !! @param rhocp The density multiplied by the specific heat capacity.
+  !! @param bd The coefficeints from the BDF differencing scheme.
+  !! @param dt The timestep.
+  !! @param n The total number of degrees of freedom.
   subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
       lambda, rhocp, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: s
     type(field_t), intent(inout) :: s_res
-    type(source_scalar_t), intent(inout) :: f_Xh
+    type(field_t), intent(inout) :: f_Xh
     type(coef_t), intent(inout) :: c_Xh
     real(kind=rp), intent(in) :: lambda
     real(kind=rp), intent(in) :: rhocp
@@ -32,6 +47,7 @@ subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
     do i = 1, n
        c_Xh%h1(i,1,1,1) = lambda
        ! todo :should not be just rho here.
+       ! Tim M. 2023-12-19: What is this todo?
        c_Xh%h2(i,1,1,1) = rhocp * (bd / dt)
     end do
     c_Xh%ifh2 = .true.
@@ -39,9 +55,9 @@ subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
     call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh)
 
     do i = 1, n
-       s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%s(i,1,1,1)
+       s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%x(i,1,1,1)
     end do
-    
-  end subroutine scalar_residual_cpu_compute  
-     
+
+  end subroutine scalar_residual_cpu_compute
+
 end module scalar_residual_cpu
diff --git a/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h b/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h
index 8e5e9dcbc85..7151c818a3d 100644
--- a/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h
+++ b/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
+#define __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -46,3 +48,5 @@ __global__ void scalar_residual_update_kernel(T * __restrict__ s_res,
 
 }
 
+
+#endif // __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
diff --git a/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h b/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h
index 8e5e9dcbc85..7151c818a3d 100644
--- a/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h
+++ b/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
+#define __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -46,3 +48,5 @@ __global__ void scalar_residual_update_kernel(T * __restrict__ s_res,
 
 }
 
+
+#endif // __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__
diff --git a/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl b/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl
index fb36fc63cde..0ca3857f397 100644
--- a/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl
+++ b/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__
+#define __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__
 /*
  Copyright (c) 2022-2023, The Neko Authors
  All rights reserved.
@@ -40,3 +42,5 @@ __kernel void scalar_residual_update_kernel(__global real * __restrict__ s_res,
   }
 
 }
+
+#endif // __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__
diff --git a/src/scalar/bcknd/device/scalar_residual_device.F90 b/src/scalar/bcknd/device/scalar_residual_device.F90
index e8f328260a1..9e65f812ce0 100644
--- a/src/scalar/bcknd/device/scalar_residual_device.F90
+++ b/src/scalar/bcknd/device/scalar_residual_device.F90
@@ -39,7 +39,7 @@ module scalar_residual_device
   use, intrinsic :: iso_c_binding
   implicit none
   private
- 
+
   type, public, extends(scalar_residual_t) :: scalar_residual_device_t
    contains
      procedure, nopass :: compute => scalar_residual_device_compute
@@ -57,7 +57,7 @@ subroutine scalar_residual_update_hip(s_res_d,f_s_d, n) &
      end subroutine scalar_residual_update_hip
   end interface
 #elif HAVE_CUDA
-  
+
   interface
      subroutine scalar_residual_update_cuda(s_res_d,f_s_d, n) &
           bind(c, name='scalar_residual_update_cuda')
@@ -69,7 +69,7 @@ subroutine scalar_residual_update_cuda(s_res_d,f_s_d, n) &
      end subroutine scalar_residual_update_cuda
   end interface
 #elif HAVE_OPENCL
-  
+
   interface
      subroutine scalar_residual_update_opencl(s_res_d,f_s_d, n) &
           bind(c, name='scalar_residual_update_opencl')
@@ -82,7 +82,7 @@ end subroutine scalar_residual_update_opencl
   end interface
 #endif
 
-  
+
 contains
 
 
@@ -90,31 +90,31 @@ subroutine scalar_residual_device_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
              lambda, rhocp, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: s
     type(field_t), intent(inout) :: s_res
-    type(source_scalar_t), intent(inout) :: f_Xh
+    type(field_t), intent(inout) :: f_Xh
     type(coef_t), intent(inout) :: c_Xh
     real(kind=rp), intent(in) :: lambda
     real(kind=rp), intent(in) :: rhocp
     real(kind=rp), intent(in) :: bd
     real(kind=rp), intent(in) :: dt
     integer, intent(in) :: n
-    
+
     call device_cfill(c_Xh%h1_d, lambda, n)
     call device_cfill(c_Xh%h2_d, rhocp * (bd / dt), n)
     c_Xh%ifh2 = .true.
-    
+
     call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh)
 
 #ifdef HAVE_HIP
-    call scalar_residual_update_hip(s_res%x_d, f_Xh%s_d, n)
+    call scalar_residual_update_hip(s_res%x_d, f_Xh%x_d, n)
 #elif HAVE_CUDA
-    call scalar_residual_update_cuda(s_res%x_d, f_Xh%s_d, n)
+    call scalar_residual_update_cuda(s_res%x_d, f_Xh%x_d, n)
 #elif HAVE_OPENCL
-    call scalar_residual_update_opencl(s_res%x_d, f_Xh%s_d, n)
+    call scalar_residual_update_opencl(s_res%x_d, f_Xh%x_d, n)
 #endif
-    
+
   end subroutine scalar_residual_device_compute
-  
+
 end module scalar_residual_device
diff --git a/src/scalar/bcknd/sx/scalar_residual_sx.f90 b/src/scalar/bcknd/sx/scalar_residual_sx.f90
index 959a1dccc9a..ff64bc3cf74 100644
--- a/src/scalar/bcknd/sx/scalar_residual_sx.f90
+++ b/src/scalar/bcknd/sx/scalar_residual_sx.f90
@@ -5,7 +5,7 @@ module scalar_residual_sx
   use operators
   implicit none
   private
-  
+
   type, public, extends(scalar_residual_t) :: scalar_residual_sx_t
    contains
      procedure, nopass :: compute => scalar_residual_sx_compute
@@ -17,10 +17,10 @@ subroutine scalar_residual_sx_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
       lambda, rhocp, bd, dt, n)
     class(ax_t), intent(in) :: Ax
     type(mesh_t), intent(inout) :: msh
-    type(space_t), intent(inout) :: Xh    
+    type(space_t), intent(inout) :: Xh
     type(field_t), intent(inout) :: s
     type(field_t), intent(inout) :: s_res
-    type(source_scalar_t), intent(inout) :: f_Xh
+    type(field_t), intent(inout) :: f_Xh
     type(coef_t), intent(inout) :: c_Xh
     real(kind=rp), intent(in) :: lambda
     real(kind=rp), intent(in) :: rhocp
@@ -39,9 +39,9 @@ subroutine scalar_residual_sx_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
     call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh)
 
     do i = 1, n
-       s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%s(i,1,1,1)
+       s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%x(i,1,1,1)
     end do
-    
-  end subroutine scalar_residual_sx_compute  
-     
+
+  end subroutine scalar_residual_sx_compute
+
 end module scalar_residual_sx
diff --git a/src/scalar/scalar_aux.f90 b/src/scalar/scalar_aux.f90
index 740b967d893..8911199eae9 100644
--- a/src/scalar/scalar_aux.f90
+++ b/src/scalar/scalar_aux.f90
@@ -9,7 +9,7 @@ module scalar_aux
 contains
 
   !> Prints for prs, velx, vely, velz the following:
-  !! Number of iterations, start residual, end residual 
+  !! Number of iterations, start residual, end residual
   subroutine scalar_step_info(step, t, dt, ksp_results)
     type(ksp_monitor_t), intent(in) :: ksp_results(1)
     integer, intent(in) :: step
diff --git a/src/scalar/scalar_ic.f90 b/src/scalar/scalar_ic.f90
new file mode 100644
index 00000000000..b9e18619c06
--- /dev/null
+++ b/src/scalar/scalar_ic.f90
@@ -0,0 +1,151 @@
+! Copyright (c) 2021, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Scalar initial condition
+module scalar_ic
+  use gather_scatter, only : gs_t, GS_OP_ADD
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use num_types, only : rp
+  use device_math, only : device_col2
+  use device, only : device_memcpy, HOST_TO_DEVICE
+  use field, only : field_t
+  use utils, only : neko_error
+  use coefs, only : coef_t
+  use math, only : col2, cfill
+  use user_intf, only : useric_scalar
+  use json_module, only : json_file
+  use json_utils, only: json_get
+  implicit none
+  private
+
+  interface set_scalar_ic
+     module procedure set_scalar_ic_int, set_scalar_ic_usr
+  end interface set_scalar_ic
+
+  public :: set_scalar_ic
+
+contains
+
+  !> Set scalar initial condition (builtin)
+  !! @details Set scalar initial condition using one of the builtin types
+  !! currently supported:
+  !! - uniform.
+  !! @param s Scalar field.
+  !! @param coef Coefficient.
+  !! @param gs Gather-Scatter object.
+  !! @param type Type of initial condition.
+  !! @param params JSON parameters.
+  subroutine set_scalar_ic_int(s, coef, gs, type, params)
+    type(field_t), intent(inout) :: s
+    type(coef_t), intent(in) :: coef
+    type(gs_t), intent(inout) :: gs
+    character(len=*) :: type
+    type(json_file), intent(inout) :: params
+
+    ! Variables for retrieving JSON parameters
+    real(kind=rp) :: ic_value
+
+    if (trim(type) .eq. 'uniform') then
+       call json_get(params, 'case.scalar.initial_condition.value', ic_value)
+       call set_scalar_ic_uniform(s, ic_value)
+    else
+       call neko_error('Invalid initial condition')
+    end if
+
+    call set_scalar_ic_common(s, coef, gs)
+
+  end subroutine set_scalar_ic_int
+
+  !> Set scalar intial condition (user defined)
+  !! @details Set scalar initial condition using a user defined function.
+  !! @param s Scalar field.
+  !! @param coef Coefficient.
+  !! @param gs Gather-Scatter object.
+  !! @param usr_ic User defined initial condition function.
+  !! @param params JSON parameters.
+  subroutine set_scalar_ic_usr(s, coef, gs, usr_ic, params)
+    type(field_t), intent(inout) :: s
+    type(coef_t), intent(in) :: coef
+    type(gs_t), intent(inout) :: gs
+    procedure(useric_scalar) :: usr_ic
+    type(json_file), intent(inout) :: params
+
+    call usr_ic(s, params)
+
+    call set_scalar_ic_common(s, coef, gs)
+
+  end subroutine set_scalar_ic_usr
+
+  !> Set scalar initial condition (common)
+  !! @details Finalize scalar initial condition by distributing the initial
+  !! condition across elements and multiplying by the coefficient (if any).
+  !! @param s Scalar field.
+  !! @param coef Coefficient.
+  !! @param gs Gather-Scatter object.
+  subroutine set_scalar_ic_common(s, coef, gs)
+    type(field_t), intent(inout) :: s
+    type(coef_t), intent(in) :: coef
+    type(gs_t), intent(inout) :: gs
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(s%x, s%x_d, s%dof%size(), &
+                          HOST_TO_DEVICE, sync=.false.)
+    end if
+
+    ! Ensure continuity across elements for initial conditions
+    call gs%op(s%x, s%dof%size(), GS_OP_ADD)
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_col2(s%x_d, coef%mult_d, s%dof%size())
+    else
+       call col2(s%x, coef%mult, s%dof%size())
+    end if
+
+  end subroutine set_scalar_ic_common
+
+  !> Uniform initial condition
+  !! @details Set scalar initial condition to a uniform value across the domain.
+  !! @param s Scalar field.
+  !! @param ic_value Desired value of the scalar field.
+  subroutine set_scalar_ic_uniform(s, ic_value)
+    type(field_t), intent(inout) :: s
+    real(kind=rp), intent(in) :: ic_value
+    integer :: n
+    s = ic_value
+    n = s%dof%size()
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call cfill(s%x, ic_value, n)
+    end if
+
+  end subroutine set_scalar_ic_uniform
+
+end module scalar_ic
diff --git a/src/scalar/scalar_pnpn.f90 b/src/scalar/scalar_pnpn.f90
index e7be30c7129..44863049325 100644
--- a/src/scalar/scalar_pnpn.f90
+++ b/src/scalar/scalar_pnpn.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2022-2023, The Neko Authors
+! Copyright (c) 2022-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -30,83 +30,95 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Modular version of the Classic Nek5000 Pn/Pn formulation for scalars
+!> Containts the scalar_pnpn_t type.
+
 module scalar_pnpn
+  use num_types, only: rp
   use scalar_residual_fctry, only : scalar_residual_factory
   use ax_helm_fctry, only: ax_helm_factory
-  use rhs_maker_fctry
+  use rhs_maker_fctry, only : rhs_maker_ext_fctry, rhs_maker_bdf_fctry
+  use rhs_maker, only : rhs_maker_bdf_t, rhs_maker_ext_t
   use scalar_scheme, only : scalar_scheme_t
   use dirichlet, only : dirichlet_t
+  use neumann, only : neumann_t
   use field, only : field_t
   use bc, only : bc_list_t, bc_list_init, bc_list_free, bc_list_apply_scalar, &
                  bc_list_add
   use mesh, only : mesh_t
   use checkpoint, only : chkp_t
   use coefs, only : coef_t
-  use device
+  use device, only : HOST_TO_DEVICE, device_memcpy
   use gather_scatter, only : gs_t, GS_OP_ADD
   use scalar_residual, only :scalar_residual_t
   use ax_product, only : ax_t
-  use field_series  
-  use facet_normal
-  use device_math
-  use device_mathops
-  use scalar_aux    
-  use time_scheme_controller
-  use projection
-  use math
-  use logger
-  use advection
-  use profiler
-  use json_utils, only: json_get, json_get_or_default
+  use field_series, only: field_series_t
+  use facet_normal, only : facet_normal_t
+  use krylov, only : ksp_monitor_t
+  use device_math, only : device_add2s2, device_col2
+  use scalar_aux, only : scalar_step_info
+  use time_scheme_controller, only : time_scheme_controller_t
+  use projection, only : projection_t
+  use math, only : glsc2, col2, add2s2
+  use logger, only : neko_log, LOG_SIZE, NEKO_LOG_DEBUG
+  use advection, only : advection_t
+  use advection_fctry, only : advection_factory
+  use profiler, only : profiler_start_region, profiler_end_region
+  use json_utils, only: json_get
   use json_module, only : json_file
   use user_intf, only : user_t
   use material_properties, only : material_properties_t
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use time_step_controller
   implicit none
   private
 
 
   type, public, extends(scalar_scheme_t) :: scalar_pnpn_t
-     
-     type(field_t) :: s_res
 
-     type(field_series_t) :: slag
+     !> The residual of the transport equation.
+     type(field_t) :: s_res
 
+     !> Solution increment.
      type(field_t) :: ds
 
-     type(field_t) :: wa1
-     type(field_t) :: ta1
-
-     
+     !> Helmholz operator.
      class(ax_t), allocatable :: Ax
 
+     !> Solution projection.
      type(projection_t) :: proj_s
 
-     type(dirichlet_t) :: bc_res   !< Dirichlet condition for scala 
+     !> Dirichlet conditions for the residual
+     !! Collects all the Dirichlet condition facets into one bc and applies 0,
+     !! Since the values never change there during the solve.
+     type(dirichlet_t) :: bc_res
+
+     !> A bc list for the bc_res. Contains only that, essentially just to wrap
+     !! the if statement determining whether to apply on the device or CPU.
      type(bc_list_t) :: bclst_ds
 
-     class(advection_t), allocatable :: adv 
+     !> Advection operator.
+     class(advection_t), allocatable :: adv
 
-     ! Time variables
-     type(field_t) :: abx1
-     type(field_t) :: abx2
+     ! Lag arrays for the RHS.
+     type(field_t) :: abx1, abx2
 
-     !> Residual
+     !> Computes the residual.
      class(scalar_residual_t), allocatable :: res
 
-     !> Contributions to kth order extrapolation scheme
+     !> Contributions to kth order extrapolation scheme.
      class(rhs_maker_ext_t), allocatable :: makeext
 
-     !> Contributions to F from lagged BD terms
+     !> Contributions to the RHS from lagged BDF terms.
      class(rhs_maker_bdf_t), allocatable :: makebdf
 
    contains
      !> Constructor.
      procedure, pass(this) :: init => scalar_pnpn_init
      !> To restart
-     procedure, pass(this) :: restart=> scalar_pnpn_restart
+     procedure, pass(this) :: restart => scalar_pnpn_restart
      !> Destructor.
      procedure, pass(this) :: free => scalar_pnpn_free
+     !> Solve for the current timestep.
      procedure, pass(this) :: step => scalar_pnpn_step
   end type scalar_pnpn_t
 
@@ -161,19 +173,13 @@ subroutine scalar_pnpn_init(this, msh, coef, gs, params, user, &
 
       call this%abx2%init(dm_Xh, "abx2")
 
-      call this%wa1%init(dm_Xh, 'wa1')
-
-      call this%ta1%init(dm_Xh, 'ta1')
-
       call this%ds%init(dm_Xh, 'ds')
 
-      call this%slag%init(this%s, 2)
-
     end associate
 
     ! Initialize dirichlet bcs for scalar residual
     ! todo: look that this works
-    call this%bc_res%init(this%dm_Xh)
+    call this%bc_res%init(this%c_Xh)
     do i = 1, this%n_dir_bcs
        call this%bc_res%mark_facets(this%dir_bcs(i)%marked_facet)
     end do
@@ -182,55 +188,50 @@ subroutine scalar_pnpn_init(this, msh, coef, gs, params, user, &
     if (this%user_bc%msk(0) .gt. 0) then
        call this%bc_res%mark_facets(this%user_bc%marked_facet)
     end if
+
+    call this%bc_res%mark_zones_from_list(msh%labeled_zones, 'd_s', &
+                                         this%bc_labels)
     call this%bc_res%finalize()
     call this%bc_res%set_g(0.0_rp)
+
     call bc_list_init(this%bclst_ds)
     call bc_list_add(this%bclst_ds, this%bc_res)
 
-    ! @todo not param stuff again, using velocity stuff
-    ! Intialize projection space thingy
-    if (this%projection_dim .gt. 0) then
-       call this%proj_s%init(this%dm_Xh%size(), this%projection_dim)
-    end if
+
+    ! Intialize projection space
+    call this%proj_s%init(this%dm_Xh%size(), this%projection_dim,  &
+                            this%projection_activ_step)
 
     ! Add lagged term to checkpoint
     ! @todo Init chkp object, note, adding 3 slags
-    ! call this%chkp%add_lag(this%slag, this%slag, this%slag)    
-    
-    ! Uses sthe same parameter as the fluid to set dealiasing
-    call json_get(params, 'case.numerics.dealias', logical_val)
-    call params%get('case.numerics.dealiased_polynomial_order', integer_val, &
-                    found)
-    if (.not. found) then
-       call json_get(params, 'case.numerics.polynomial_order', integer_val)
-       integer_val =  3.0_rp / 2.0_rp * (integer_val + 1) - 1
-    end if
-    call advection_factory(this%adv, this%c_Xh, logical_val, integer_val + 1)
+    ! call this%chkp%add_lag(this%slag, this%slag, this%slag)
 
+    call advection_factory(this%adv, params, this%c_Xh)
   end subroutine scalar_pnpn_init
 
-  !> Restart method
+  !> I envision the arguments to this func might need to be expanded
   subroutine scalar_pnpn_restart(this, dtlag, tlag)
     class(scalar_pnpn_t), target, intent(inout) :: this
     real(kind=rp) :: dtlag(10), tlag(10)
     integer :: n
+
+
     n = this%s%dof%size()
 
-    call col2(this%s%x, this%c_Xh%mult, n) 
-    call col2(this%slag%lf(1)%x, this%c_Xh%mult, n) 
+    call col2(this%s%x, this%c_Xh%mult, n)
+    call col2(this%slag%lf(1)%x, this%c_Xh%mult, n)
     call col2(this%slag%lf(2)%x, this%c_Xh%mult, n)
-    
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_memcpy(this%s%x, this%s%x_d, &
-                          n, HOST_TO_DEVICE)
+                          n, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, &
-                          n, HOST_TO_DEVICE)
+                          n, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, &
-                          n, HOST_TO_DEVICE)
+                          n, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%abx1%x, this%abx1%x_d, &
-                          n, HOST_TO_DEVICE)
+                          n, HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(this%abx2%x, this%abx2%x_d, &
-                          n, HOST_TO_DEVICE)
+                          n, HOST_TO_DEVICE, sync=.false.)
     end if
 
     call this%gs_Xh%op(this%s,GS_OP_ADD)
@@ -249,10 +250,6 @@ subroutine scalar_pnpn_free(this)
 
     call this%s_res%free()
 
-    call this%wa1%free()
-
-    call this%ta1%free()
-
     call this%ds%free()
 
     call this%abx1%free()
@@ -274,17 +271,15 @@ subroutine scalar_pnpn_free(this)
        deallocate(this%makebdf)
     end if
 
-
-    call this%slag%free()
-
   end subroutine scalar_pnpn_free
 
-  subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf)
+  subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf, dt_controller)
     class(scalar_pnpn_t), intent(inout) :: this
     real(kind=rp), intent(inout) :: t
     integer, intent(inout) :: tstep
     real(kind=rp), intent(in) :: dt
     type(time_scheme_controller_t), intent(inout) :: ext_bdf
+    type(time_step_controller_t), intent(in) :: dt_controller
     ! Number of degrees of freedom
     integer :: n
     ! Linear solver results monitor
@@ -292,74 +287,95 @@ subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf)
     character(len=LOG_SIZE) :: log_buf
 
     n = this%dm_Xh%size()
-    
-    call profiler_start_region('Scalar')
+
+    call profiler_start_region('Scalar', 2)
     associate(u => this%u, v => this%v, w => this%w, s => this%s, &
          cp => this%cp, lambda => this%lambda, rho => this%rho, &
          ds => this%ds, &
-         ta1 => this%ta1, &
-         wa1 => this%wa1, &
          s_res =>this%s_res, &
          Ax => this%Ax, f_Xh => this%f_Xh, Xh => this%Xh, &
          c_Xh => this%c_Xh, dm_Xh => this%dm_Xh, gs_Xh => this%gs_Xh, &
          slag => this%slag, &
          projection_dim => this%projection_dim, &
-         ksp_maxiter => this%ksp_maxiter, &
          msh => this%msh, res => this%res, &
-         makeext => this%makeext, makebdf => this%makebdf)
+         makeext => this%makeext, makebdf => this%makebdf, &
+         if_variable_dt => dt_controller%if_variable_dt, &
+         dt_last_change => dt_controller%dt_last_change)
+
+      if (neko_log%level_ .ge. NEKO_LOG_DEBUG) then
+         write(log_buf,'(A,A,E15.7,A,E15.7,A,E15.7)') 'Scalar debug',&
+              ' l2norm s', glsc2(this%s%x,this%s%x,n),&
+              ' slag1', glsc2(this%slag%lf(1)%x,this%slag%lf(1)%x,n),&
+              ' slag2', glsc2(this%slag%lf(2)%x,this%slag%lf(2)%x,n)
+         call neko_log%message(log_buf)
+         write(log_buf,'(A,A,E15.7,A,E15.7)') 'Scalar debug2',&
+              ' l2norm abx1', glsc2(this%abx1%x,this%abx1%x,n),&
+              ' abx2', glsc2(this%abx2%x,this%abx2%x,n)
+         call neko_log%message(log_buf)
+      end if
 
-      ! Evaluate the source term and scale with the mass matrix.
-      call f_Xh%eval(t)
+      ! Compute the source terms
+      call this%source_term%compute(t, tstep)
 
+      ! Pre-multiply the source terms with the mass matrix.
       if (NEKO_BCKND_DEVICE .eq. 1) then
-         call device_col2(f_Xh%s_d, c_Xh%B_d, n)
+         call device_col2(f_Xh%x_d, c_Xh%B_d, n)
       else
-         call col2(f_Xh%s, c_Xh%B, n)
+         call col2(f_Xh%x, c_Xh%B, n)
       end if
 
+      ! Apply Neumann boundary conditions
+      call bc_list_apply_scalar(this%bclst_neumann, this%f_Xh%x, dm_Xh%size())
+
       ! Add the advection operators to the right-hans-side.
-      call this%adv%compute_scalar(u, v, w, s, f_Xh%s, &
+      call this%adv%compute_scalar(u, v, w, s, f_Xh%x, &
                                    Xh, this%c_Xh, dm_Xh%size())
 
-      call makeext%compute_scalar(ta1, this%abx1, this%abx2, f_Xh%s, &
-           rho, ext_bdf%advection_coeffs, n)
+      ! At this point the RHS contains the sum of the advection operator,
+      ! Neumann boundary sources and additional source terms, evaluated using
+      ! the scalar field from the previous time-step. Now, this value is used in
+      ! the explicit time scheme to advance these terms in time.
+      call makeext%compute_scalar(this%abx1, this%abx2, f_Xh%x, rho, &
+           ext_bdf%advection_coeffs, n)
 
-      call makebdf%compute_scalar(ta1, wa1, slag, f_Xh%s, s, c_Xh%B, &
-           rho, dt, ext_bdf%diffusion_coeffs, ext_bdf%ndiff, n)
+      ! Add the RHS contributions coming from the BDF scheme.
+      call makebdf%compute_scalar(slag, f_Xh%x, s, c_Xh%B, rho, dt, &
+           ext_bdf%diffusion_coeffs, ext_bdf%ndiff, n)
 
       call slag%update()
-      ! We assume that no change of boundary conditions 
-      ! occurs between elements. I.e. we do not apply gsop here like in Nek5000
-      ! Apply dirichlet
-      call this%bc_apply()
+
+      !> Apply Dirichlet boundary conditions
+      !! We assume that no change of boundary conditions
+      !! occurs between elements. i.e. we do not apply gsop here like in Nek5000
+      call this%dirichlet_update_(this%field_dirichlet_fields, &
+           this%field_dirichlet_bcs, this%c_Xh, t, tstep, "scalar")
+      call bc_list_apply_scalar(this%bclst_dirichlet, this%s%x, this%dm_Xh%size())
 
       ! Compute scalar residual.
-      call profiler_start_region('Scalar residual')
+      call profiler_start_region('Scalar residual', 20)
       call res%compute(Ax, s,  s_res, f_Xh, c_Xh, msh, Xh, lambda, rho * cp, &
           ext_bdf%diffusion_coeffs(1), dt, &
           dm_Xh%size())
 
-      call gs_Xh%op(s_res, GS_OP_ADD) 
+      call gs_Xh%op(s_res, GS_OP_ADD)
+
+      ! Apply a 0-valued Dirichlet boundary conditions on the ds.
+      call bc_list_apply_scalar(this%bclst_ds, s_res%x, dm_Xh%size())
 
-      call bc_list_apply_scalar(this%bclst_ds,&
-           s_res%x, dm_Xh%size())
       call profiler_end_region
 
-      if (tstep .gt. 5 .and. projection_dim .gt. 0) then 
-         call this%proj_s%project_on(s_res%x, c_Xh, n)
-      end if
+      call this%proj_s%pre_solving(s_res%x, tstep, c_Xh, n, dt_controller)
 
       call this%pc%update()
-      call profiler_start_region('Scalar solve')
+      call profiler_start_region('Scalar solve', 21)
       ksp_results(1) = this%ksp%solve(Ax, ds, s_res%x, n, &
-           c_Xh, this%bclst_ds, gs_Xh, ksp_maxiter)
+           c_Xh, this%bclst_ds, gs_Xh)
       call profiler_end_region
 
-      if (tstep .gt. 5 .and. projection_dim .gt. 0) then 
-         call this%proj_s%project_back(ds%x, Ax, c_Xh, &
-              this%bclst_ds, gs_Xh, n)
-      end if
+     call this%proj_s%post_solving(ds%x, Ax, c_Xh, &
+                                 this%bclst_ds, gs_Xh, n, tstep, dt_controller)
 
+      ! Update the solution
       if (NEKO_BCKND_DEVICE .eq. 1) then
          call device_add2s2(s%x_d, ds%x_d, 1.0_rp, n)
       else
diff --git a/src/scalar/scalar_residual.f90 b/src/scalar/scalar_residual.f90
index bd029b0d998..a3a79ae4f26 100644
--- a/src/scalar/scalar_residual.f90
+++ b/src/scalar/scalar_residual.f90
@@ -32,7 +32,7 @@
 !
 !> Defines the residual for the scalar transport equation
 module scalar_residual
-  use gather_scatter, only : gs_t  
+  use gather_scatter, only : gs_t
   use ax_product, only : ax_t
   use field, only : field_t
   use coefs, only : coef_t
@@ -42,14 +42,27 @@ module scalar_residual
   use mesh, only : mesh_t
   use num_types, only : rp
   implicit none
-  
+
   !> Abstract type to compute scalar residual
   type, abstract :: scalar_residual_t
    contains
      procedure(scalar_residual_interface), nopass, deferred :: compute
   end type scalar_residual_t
-    
+
   abstract interface
+     !> Interface for computing the residual of a scalar transport equation.
+     !! @param Ax The Helmholtz operator.
+     !! @param s The values of the scalar.
+     !! @param s_res The values of the scalar residual.
+     !! @param f_xH The right hand side.
+     !! @param c_xH The SEM coefficients.
+     !! @param msh The mesh.
+     !! @param Xh The SEM function space.
+     !! @param lambda The thermal conductivity.
+     !! @param rhocp The density multiplied by the specific heat capacity.
+     !! @param bd The coefficeints from the BDF differencing scheme.
+     !! @param dt The timestep.
+     !! @param n The total number of degrees of freedom.
      subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
                                           lambda, rhocp, bd, dt, n)
        import field_t
@@ -57,16 +70,16 @@ subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
        import gs_t
        import facet_normal_t
        import source_scalar_t
-       import space_t              
+       import space_t
        import coef_t
        import mesh_t
        import rp
        class(ax_t), intent(in) :: Ax
        type(mesh_t), intent(inout) :: msh
-       type(space_t), intent(inout) :: Xh    
+       type(space_t), intent(inout) :: Xh
        type(field_t), intent(inout) :: s
        type(field_t), intent(inout) :: s_res
-       type(source_scalar_t), intent(inout) :: f_Xh
+       type(field_t), intent(inout) :: f_Xh
        type(coef_t), intent(inout) :: c_Xh
        real(kind=rp), intent(in) :: lambda
        real(kind=rp), intent(in) :: rhocp
@@ -75,5 +88,5 @@ subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, &
        integer, intent(in) :: n
      end subroutine scalar_residual_interface
   end interface
- 
+
 end module scalar_residual
diff --git a/src/scalar/scalar_residual_fctry.f90 b/src/scalar/scalar_residual_fctry.f90
index 6fc86c8f29b..10eae216449 100644
--- a/src/scalar/scalar_residual_fctry.f90
+++ b/src/scalar/scalar_residual_fctry.f90
@@ -41,7 +41,7 @@ module scalar_residual_fctry
 
 contains
 
-  
+
   subroutine scalar_residual_factory(scalar_res)
     class(scalar_residual_t), allocatable, intent(inout) :: scalar_res
 
@@ -57,7 +57,7 @@ subroutine scalar_residual_factory(scalar_res)
     else
        allocate(scalar_residual_cpu_t::scalar_res)
     end if
-       
-    
+
+
   end subroutine scalar_residual_factory
 end module scalar_residual_fctry
diff --git a/src/scalar/scalar_scheme.f90 b/src/scalar/scalar_scheme.f90
index 07f43ca7aa2..9ddbac871e7 100644
--- a/src/scalar/scalar_scheme.f90
+++ b/src/scalar/scalar_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2022, The Neko Authors
+! Copyright (c) 2022-2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -30,57 +30,110 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Modular version of the Classic Nek5000 Pn/Pn formulation for scalars
+!> Contains the scalar_scheme_t type.
 
 ! todo: module name
 module scalar_scheme
-  use gather_scatter
-  use neko_config
-  use checkpoint
-  use num_types
-  use source_scalar
-  use field
-  use space
-  use dofmap
-  use krylov
-  use coefs
-  use dirichlet
-  use krylov_fctry
-  use precon_fctry
-  use bc
-  use mesh
-  use facet_zone
-  use time_scheme_controller
-  use logger
-  use field_registry
-  use usr_scalar
+  use gather_scatter, only : gs_t
+  use checkpoint, only : chkp_t
+  use num_types, only: rp
+  use field, only : field_t
+  use field_list, only: field_list_t
+  use space, only : space_t
+  use dofmap, only :  dofmap_t
+  use krylov, only : ksp_t
+  use coefs, only : coef_t
+  use dirichlet, only : dirichlet_t
+  use neumann, only : neumann_t
+  use krylov_fctry, only : krylov_solver_factory, krylov_solver_destroy
+  use jacobi, only : jacobi_t
+  use device_jacobi, only : device_jacobi_t
+  use sx_jacobi, only : sx_jacobi_t
+  use hsmg, only : hsmg_t
+  use precon_fctry, only : precon_factory, pc_t, precon_destroy
+  use bc, only : bc_t, bc_list_t, bc_list_free, bc_list_init, &
+                 bc_list_apply_scalar, bc_list_add
+  use field_dirichlet, only: field_dirichlet_t, field_dirichlet_update
+  use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS, NEKO_MSH_MAX_ZLBL_LEN
+  use facet_zone, only : facet_zone_t
+  use time_scheme_controller, only : time_scheme_controller_t
+  use logger, only : neko_log, LOG_SIZE
+  use field_registry, only : neko_field_registry
+  use usr_scalar, only : usr_scalar_t, usr_scalar_bc_eval
   use json_utils, only : json_get, json_get_or_default
   use json_module, only : json_file
   use user_intf, only : user_t
   use material_properties, only : material_properties_t
+  use utils, only : neko_error
+  use comm, only: NEKO_COMM, MPI_INTEGER, MPI_SUM
+  use scalar_source_term, only : scalar_source_term_t
+  use field_series
+  use time_step_controller
   implicit none
 
+  !> Base type for a scalar advection-diffusion solver.
   type, abstract :: scalar_scheme_t
-     type(field_t), pointer :: u       !< x-component of Velocity
-     type(field_t), pointer :: v       !< y-component of Velocity
-     type(field_t), pointer :: w       !< z-component of Velocity
-     type(field_t), pointer :: s       !< the scalar
-     type(space_t), pointer :: Xh      !< Function space \f$ X_h \f$
-     type(dofmap_t), pointer :: dm_Xh  !< Dofmap associated with \f$ X_h \f$
-     type(gs_t), pointer :: gs_Xh      !< Gather-scatter associated with \f$ X_h \f$
-     type(coef_t), pointer  :: c_Xh    !< Coefficients associated with \f$ X_h \f$
-     type(source_scalar_t) :: f_Xh     !< Source term associated with \f$ X_h \f$
-     class(ksp_t), allocatable  :: ksp         !< Krylov solver
-     integer :: ksp_maxiter            !< Max iteration number in ksp.
-     integer :: projection_dim     !< Projection space size in ksp.
-     class(pc_t), allocatable :: pc            !< Preconditioner
-     type(dirichlet_t) :: dir_bcs(NEKO_MSH_MAX_ZLBLS)   !< Dirichlet conditions
-     type(usr_scalar_t) :: user_bc     !< Dirichlet conditions
+     !> x-component of Velocity
+     type(field_t), pointer :: u
+     !> y-component of Velocity
+     type(field_t), pointer :: v
+     !> z-component of Velocity
+     type(field_t), pointer :: w
+     !> The scalar.
+     type(field_t), pointer :: s
+     !> Lag arrays, i.e. solutions at previous timesteps.
+     type(field_series_t) :: slag
+     !> Function space \f$ X_h \f$.
+     type(space_t), pointer :: Xh
+     !> Dofmap associated with \f$ X_h \f$.
+     type(dofmap_t), pointer :: dm_Xh
+     !> Gather-scatter associated with \f$ X_h \f$.
+     type(gs_t), pointer :: gs_Xh
+     !> Coefficients associated with \f$ X_h \f$.
+     type(coef_t), pointer  :: c_Xh
+     !> Right-hand side.
+     type(field_t), pointer :: f_Xh => null()
+     !> The source term for equation.
+     type(scalar_source_term_t) :: source_term
+     !> Krylov solver.
+     class(ksp_t), allocatable  :: ksp
+     !> Max iterations in the Krylov solver.
+     integer :: ksp_maxiter
+     !> Projection space size.
+     integer :: projection_dim
+     !< Steps to activate projection for ksp
+     integer :: projection_activ_step
+     !> Preconditioner.
+     class(pc_t), allocatable :: pc
+     !> Dirichlet conditions.
+     type(dirichlet_t) :: dir_bcs(NEKO_MSH_MAX_ZLBLS)
+     !> Field Dirichlet conditions.
+     type(field_dirichlet_t) :: field_dir_bc
+     !> Pointer to user_dirichlet_update to be called in fluid_scheme_step
+     procedure(field_dirichlet_update), nopass, pointer :: dirichlet_update_ &
+          => null()
+     !> List of BC objects to pass to user_dirichlet_update
+     type(bc_list_t) :: field_dirichlet_bcs
+     !< List of fields to pass to user_dirichlet_update
+     type(field_list_t) :: field_dirichlet_fields
+     !> Neumann conditions.
+     type(neumann_t) :: neumann_bcs(NEKO_MSH_MAX_ZLBLS)
+     !> User Dirichlet conditions.
+     type(usr_scalar_t) :: user_bc
+     !> Number of Dirichlet bcs.
      integer :: n_dir_bcs = 0
-     type(bc_list_t) :: bclst                  !< List of boundary conditions
-     type(json_file), pointer :: params          !< Parameters          
-     type(mesh_t), pointer :: msh => null()    !< Mesh
-     type(chkp_t) :: chkp                      !< Checkpoint
+     !> Number of Neumann bcs.
+     integer :: n_neumann_bcs = 0
+     !> List of Dirichlet boundary conditions, including the user one.
+     type(bc_list_t) :: bclst_dirichlet
+     !> List of Neumann conditions list
+     type(bc_list_t) :: bclst_neumann
+     !> Case paramters.
+     type(json_file), pointer :: params
+     !> Mesh.
+     type(mesh_t), pointer :: msh => null()
+     !> Checkpoint for restarts.
+     type(chkp_t) :: chkp
      !> Thermal diffusivity.
      real(kind=rp), pointer :: lambda
      !> Density.
@@ -88,17 +141,23 @@ module scalar_scheme
      !> Specific heat capacity.
      real(kind=rp), pointer :: cp
      !> Boundary condition labels (if any)
-     character(len=20), allocatable :: bc_labels(:)
+     character(len=NEKO_MSH_MAX_ZLBL_LEN), allocatable :: bc_labels(:)
    contains
+     !> Constructor for the base type.
      procedure, pass(this) :: scheme_init => scalar_scheme_init
+     !> Destructor for the base type.
      procedure, pass(this) :: scheme_free => scalar_scheme_free
+     !> Validate successful initialization.
      procedure, pass(this) :: validate => scalar_scheme_validate
-     procedure, pass(this) :: bc_apply => scalar_scheme_bc_apply
-     procedure, pass(this) :: set_source => scalar_scheme_set_source
+     !> Assings the evaluation function for  `user_bc`.
      procedure, pass(this) :: set_user_bc => scalar_scheme_set_user_bc
+     !> Constructor.
      procedure(scalar_scheme_init_intrf), pass(this), deferred :: init
+     !> Destructor.
      procedure(scalar_scheme_free_intrf), pass(this), deferred :: free
+     !> Solve for the current timestep.
      procedure(scalar_scheme_step_intrf), pass(this), deferred :: step
+     !> Restart from a checkpoint.
      procedure(scalar_scheme_restart_intrf), pass(this), deferred :: restart
   end type scalar_scheme_t
 
@@ -114,7 +173,7 @@ subroutine scalar_scheme_init_intrf(this, msh, coef, gs, params, user,&
        import user_t
        import material_properties_t
        class(scalar_scheme_t), target, intent(inout) :: this
-       type(mesh_t), target, intent(inout) :: msh       
+       type(mesh_t), target, intent(inout) :: msh
        type(coef_t), target, intent(inout) :: coef
        type(gs_t), target, intent(inout) :: gs
        type(json_file), target, intent(inout) :: params
@@ -125,7 +184,7 @@ end subroutine scalar_scheme_init_intrf
 
   !> Abstract interface to restart a scalar formulation
   abstract interface
-     subroutine scalar_scheme_restart_intrf(this,dtlag, tlag)
+     subroutine scalar_scheme_restart_intrf(this, dtlag, tlag)
        import scalar_scheme_t
        import chkp_t
        import rp
@@ -133,6 +192,7 @@ subroutine scalar_scheme_restart_intrf(this,dtlag, tlag)
        real(kind=rp) :: dtlag(10), tlag(10)
      end subroutine scalar_scheme_restart_intrf
   end interface
+
   
   !> Abstract interface to dealocate a scalar formulation
   abstract interface
@@ -141,18 +201,20 @@ subroutine scalar_scheme_free_intrf(this)
        class(scalar_scheme_t), intent(inout) :: this
      end subroutine scalar_scheme_free_intrf
   end interface
-  
+
   !> Abstract interface to compute a time-step
   abstract interface
-     subroutine scalar_scheme_step_intrf(this, t, tstep, dt, ext_bdf)
+     subroutine scalar_scheme_step_intrf(this, t, tstep, dt, ext_bdf, dt_controller)
        import scalar_scheme_t
        import time_scheme_controller_t
+       import time_step_controller_t
        import rp
        class(scalar_scheme_t), intent(inout) :: this
        real(kind=rp), intent(inout) :: t
        integer, intent(inout) :: tstep
        real(kind=rp), intent(in) :: dt
        type(time_scheme_controller_t), intent(inout) :: ext_bdf
+       type(time_step_controller_t), intent(in) :: dt_controller
      end subroutine scalar_scheme_step_intrf
   end interface
 
@@ -162,47 +224,66 @@ end subroutine scalar_scheme_step_intrf
   !! @param zones List of zones
   !! @param bc_labels List of user specified bcs from the parameter file
   !! currently dirichlet 'd=X' and 'user' supported
-  subroutine scalar_scheme_add_bcs(this, zones, bc_labels) 
-    class(scalar_scheme_t), intent(inout) :: this 
+  subroutine scalar_scheme_add_bcs(this, zones, bc_labels)
+    class(scalar_scheme_t), intent(inout) :: this
     type(facet_zone_t), intent(inout) :: zones(NEKO_MSH_MAX_ZLBLS)
-    character(len=20), intent(in) :: bc_labels(NEKO_MSH_MAX_ZLBLS)
-    character(len=20) :: bc_label
+    character(len=NEKO_MSH_MAX_ZLBL_LEN), intent(in) :: bc_labels(:)
+    character(len=NEKO_MSH_MAX_ZLBL_LEN) :: bc_label
     integer :: i, j, bc_idx
-    real(kind=rp) :: dir_value
+    real(kind=rp) :: dir_value, flux_value
     logical :: bc_exists
 
-    do i = 1, NEKO_MSH_MAX_ZLBLS
+    do i = 1, size(bc_labels)
        bc_label = trim(bc_labels(i))
-       if (bc_label(1:1) .eq. 'd') then
-          bc_exists = .false.
-          bc_idx = 0
-          do j = 1, i-1
-             if (bc_label .eq. bc_labels(j)) then
-                bc_exists = .true. 
-                bc_idx = j
-             end if
-         end do
-         
-         if (bc_exists) then
-            call this%dir_bcs(j)%mark_zone(zones(i))
-         else
-            this%n_dir_bcs = this%n_dir_bcs + 1
-            call this%dir_bcs(this%n_dir_bcs)%init(this%dm_Xh)
-            call this%dir_bcs(this%n_dir_bcs)%mark_zone(zones(i))
-            read(bc_label(3:), *) dir_value
-            call this%dir_bcs(this%n_dir_bcs)%set_g(dir_value)
-         end if
+       if (bc_label(1:2) .eq. 'd=') then
+! The idea of this commented piece of code is to merge bcs with the same
+! Dirichlet value into 1 so that one has less kernel launches. Currently
+! segfaults, needs investigation.
+!          bc_exists = .false.
+!          bc_idx = 0
+!          do j = 1, i-1
+!             if (bc_label .eq. bc_labels(j)) then
+!                bc_exists = .true.
+!                bc_idx = j
+!             end if
+!          end do
+
+!          if (bc_exists) then
+!             call this%dir_bcs(j)%mark_zone(zones(i))
+!          else
+          this%n_dir_bcs = this%n_dir_bcs + 1
+          call this%dir_bcs(this%n_dir_bcs)%init(this%c_Xh)
+          call this%dir_bcs(this%n_dir_bcs)%mark_zone(zones(i))
+          read(bc_label(3:), *) dir_value
+          call this%dir_bcs(this%n_dir_bcs)%set_g(dir_value)
+!          end if
+       end if
+
+       if (bc_label(1:2) .eq. 'n=') then
+          this%n_neumann_bcs = this%n_neumann_bcs + 1
+          call this%neumann_bcs(this%n_neumann_bcs)%init(this%c_Xh)
+          call this%neumann_bcs(this%n_neumann_bcs)%mark_zone(zones(i))
+          read(bc_label(3:), *) flux_value
+          call this%neumann_bcs(this%n_neumann_bcs)%init_neumann(flux_value)
        end if
 
        !> Check if user bc on this zone
        if (bc_label(1:4) .eq. 'user') then
           call this%user_bc%mark_zone(zones(i))
        end if
+
     end do
 
     do i = 1, this%n_dir_bcs
        call this%dir_bcs(i)%finalize()
-       call bc_list_add(this%bclst, this%dir_bcs(i))
+       call bc_list_add(this%bclst_dirichlet, this%dir_bcs(i))
+    end do
+
+    ! Create list with just Neumann bcs
+    call bc_list_init(this%bclst_neumann, this%n_neumann_bcs)
+    do i=1, this%n_neumann_bcs
+       call this%neumann_bcs(i)%finalize()
+       call bc_list_add(this%bclst_neumann, this%neumann_bcs(i))
     end do
 
   end subroutine scalar_scheme_add_bcs
@@ -229,7 +310,7 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, &
     ! Variables for retrieving json parameters
     logical :: logical_val
     real(kind=rp) :: real_val, solver_abstol
-    integer :: integer_val
+    integer :: integer_val, ierr
     character(len=:), allocatable :: solver_type, solver_precon
 
     this%u => neko_field_registry%get_field('u')
@@ -257,11 +338,12 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, &
     write(log_buf, '(A,ES13.6)') 'cp         :',  this%cp
     call neko_log%message(log_buf)
 
-    call json_get_or_default(params, 'case.fluid.velocity_solver.max_iterations',&
-                             this%ksp_maxiter, 800)
     call json_get_or_default(params, &
                             'case.fluid.velocity_solver.projection_space_size',&
                             this%projection_dim, 20)
+    call json_get_or_default(params, &
+                            'case.fluid.velocity_solver.projection_hold_steps',&
+                            this%projection_activ_step, 5)
 
 
     write(log_buf, '(A, A)') 'Type       : ', trim(scheme)
@@ -275,19 +357,22 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, &
     this%dm_Xh => this%u%dof
     this%params => params
     this%msh => msh
-    call neko_field_registry%add_field(this%dm_Xh, 's')
+    if (.not. neko_field_registry%field_exists('s')) then
+       call neko_field_registry%add_field(this%dm_Xh, 's')
+    end if
     this%s => neko_field_registry%get_field('s')
 
+    call this%slag%init(this%s, 2)
+
     this%gs_Xh => gs_Xh
     this%c_Xh => c_Xh
 
-    call source_scalar_init(this%f_Xh, this%dm_Xh)
 
     !
     ! Setup scalar boundary conditions
     !
-    call bc_list_init(this%bclst)
-    call this%user_bc%init(this%dm_Xh)
+    call bc_list_init(this%bclst_dirichlet)
+    call this%user_bc%init(this%c_Xh)
 
     ! Read boundary types from the case file
     allocate(this%bc_labels(NEKO_MSH_MAX_ZLBLS))
@@ -298,12 +383,22 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, &
     if (params%valid_path('case.scalar.boundary_types')) then
        call json_get(params, &
                      'case.scalar.boundary_types', &
-                     this%bc_labels)       
+                     this%bc_labels)
     end if
-    
-    call scalar_scheme_add_bcs(this, msh%labeled_zones, this%bc_labels) 
 
 
+    !
+    ! Setup right-hand side field.
+    !
+    allocate(this%f_Xh)
+    call this%f_Xh%init(this%dm_Xh, fld_name="scalar_rhs")
+
+    ! Initialize the source term
+    call this%source_term%init(params, this%f_Xh, this%c_Xh, user)
+
+    call scalar_scheme_add_bcs(this, msh%labeled_zones, this%bc_labels)
+
+    ! Mark BC zones
     call this%user_bc%mark_zone(msh%wall)
     call this%user_bc%mark_zone(msh%inlet)
     call this%user_bc%mark_zone(msh%outlet)
@@ -311,15 +406,46 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, &
     call this%user_bc%mark_zone(msh%sympln)
     call this%user_bc%finalize()
     call this%user_bc%set_coef(this%c_Xh)
-    if (this%user_bc%msk(0) .gt. 0) call bc_list_add(this%bclst, this%user_bc)
-  
+    if (this%user_bc%msk(0) .gt. 0) call bc_list_add(this%bclst_dirichlet,&
+                                                     this%user_bc)
+
+    ! Add field dirichlet BCs
+    call this%field_dir_bc%init(this%c_Xh)
+    call this%field_dir_bc%mark_zones_from_list(msh%labeled_zones, &
+         'd_s', this%bc_labels)
+    call this%field_dir_bc%finalize()
+    call MPI_Allreduce(this%field_dir_bc%msk(0), integer_val, 1, &
+         MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr)
+    if (integer_val .gt. 0) call this%field_dir_bc%init_field('d_s')
+
+    call bc_list_add(this%bclst_dirichlet, this%field_dir_bc)
+
+    !
+    ! Associate our field dirichlet update to the user one.
+    !
+    this%dirichlet_update_ => user%user_dirichlet_update
+
+    !
+    ! Initialize field list and bc list for user_dirichlet_update
+    !
+    allocate(this%field_dirichlet_fields%fields(1))
+    this%field_dirichlet_fields%fields(1)%f => &
+         this%field_dir_bc%field_bc
+
+    call bc_list_init(this%field_dirichlet_bcs, size=1)
+    call bc_list_add(this%field_dirichlet_bcs, this%field_dir_bc)
+
+
     ! todo parameter file ksp tol should be added
+    call json_get_or_default(params, 'case.fluid.velocity_solver.max_iterations',&
+                             integer_val, 800)
     call scalar_scheme_solver_factory(this%ksp, this%dm_Xh%size(), &
-         solver_type, solver_abstol)
+         solver_type, integer_val, solver_abstol)
     call scalar_scheme_precon_factory(this%pc, this%ksp, &
-         this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst, solver_precon)
-  
+         this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_dirichlet, solver_precon)
+
     call neko_log%end_section()
+
   end subroutine scalar_scheme_init
 
 
@@ -347,9 +473,21 @@ subroutine scalar_scheme_free(this)
        deallocate(this%bc_labels)
     end if
 
-    call source_scalar_free(this%f_Xh)
+    call this%source_term%free()
 
-    call bc_list_free(this%bclst)
+    call bc_list_free(this%bclst_dirichlet)
+    call bc_list_free(this%bclst_neumann)
+
+    call this%slag%free()
+
+    ! Free everything related to field dirichlet BCs
+    call this%field_dirichlet_fields%free()
+    call bc_list_free(this%field_dirichlet_bcs)
+    call this%field_dir_bc%field_bc%free()
+    call this%field_dir_bc%free()
+    if (associated(this%dirichlet_update_)) then
+       this%dirichlet_update_ => null()
+    end if
 
   end subroutine scalar_scheme_free
 
@@ -380,9 +518,9 @@ subroutine scalar_scheme_validate(this)
     if (.not. associated(this%c_Xh)) then
        call neko_error('No coefficients defined')
     end if
-    
-    if (.not. associated(this%f_Xh%eval)) then
-       call neko_error('No source term defined')
+
+    if (.not. associated(this%f_Xh)) then
+       call neko_error('No rhs allocated')
     end if
 
     if (.not. associated(this%params)) then
@@ -397,23 +535,17 @@ subroutine scalar_scheme_validate(this)
 
   end subroutine scalar_scheme_validate
 
-  !> Apply all boundary conditions defined for velocity
-  !! @todo Why can't we call the interface here?
-  subroutine scalar_scheme_bc_apply(this)
-    class(scalar_scheme_t), intent(inout) :: this
-    call bc_list_apply_scalar(this%bclst, this%s%x, this%dm_Xh%size())
-  end subroutine scalar_scheme_bc_apply
-  
   !> Initialize a linear solver
   !! @note Currently only supporting Krylov solvers
-  subroutine scalar_scheme_solver_factory(ksp, n, solver, abstol)
+  subroutine scalar_scheme_solver_factory(ksp, n, solver, max_iter, abstol)
     class(ksp_t), allocatable, target, intent(inout) :: ksp
     integer, intent(in), value :: n
+    integer, intent(in) :: max_iter
     character(len=*), intent(in) :: solver
     real(kind=rp) :: abstol
 
-    call krylov_solver_factory(ksp, n, solver, abstol)
-    
+    call krylov_solver_factory(ksp, n, solver, max_iter, abstol)
+
   end subroutine scalar_scheme_solver_factory
 
   !> Initialize a Krylov preconditioner
@@ -425,9 +557,9 @@ subroutine scalar_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype)
     type(gs_t), target, intent(inout) :: gs
     type(bc_list_t), target, intent(inout) :: bclst
     character(len=*) :: pctype
-    
+
     call precon_factory(pc, pctype)
-    
+
     select type(pcp => pc)
     type is(jacobi_t)
        call pcp%init(coef, dof, gs)
@@ -449,28 +581,9 @@ subroutine scalar_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype)
     end select
 
     call ksp%set_pc(pc)
-    
-  end subroutine scalar_scheme_precon_factory
 
-  !> Initialize source term
-  subroutine scalar_scheme_set_source(this, source_term_type, usr_f, usr_f_vec)
-    class(scalar_scheme_t), intent(inout) :: this
-    character(len=*) :: source_term_type
-    procedure(source_scalar_term_pw), optional :: usr_f
-    procedure(source_scalar_term), optional :: usr_f_vec
-
-    if (trim(source_term_type) .eq. 'noforce') then
-       call source_scalar_set_type(this%f_Xh, source_scalar_eval_noforce)
-    else if (trim(source_term_type) .eq. 'user' .and. present(usr_f)) then
-       call source_scalar_set_pw_type(this%f_Xh, usr_f)
-    else if (trim(source_term_type) .eq. 'user_vector' .and. present(usr_f_vec)) then
-       call source_scalar_set_type(this%f_Xh, usr_f_vec)
-    else
-       call neko_error('Invalid scalar source term '//source_term_type)
-    end if
+  end subroutine scalar_scheme_precon_factory
 
-  end subroutine scalar_scheme_set_source
- 
   !> Initialize a user defined scalar bc
   !! @param usr_eval User specified boundary condition for scalar field
   subroutine scalar_scheme_set_user_bc(this, usr_eval)
@@ -478,8 +591,8 @@ subroutine scalar_scheme_set_user_bc(this, usr_eval)
     procedure(usr_scalar_bc_eval) :: usr_eval
 
     call this%user_bc%set_eval(usr_eval)
-    
+
   end subroutine scalar_scheme_set_user_bc
 
-    
+
 end module scalar_scheme
diff --git a/src/scalar/scalar_source_term.f90 b/src/scalar/scalar_source_term.f90
new file mode 100644
index 00000000000..fbe9696d7d1
--- /dev/null
+++ b/src/scalar/scalar_source_term.f90
@@ -0,0 +1,202 @@
+
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the `scalar_source_term_t` type.
+module scalar_source_term
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use num_types, only : rp
+  use scalar_user_source_term, only: scalar_user_source_term_t
+  use source_term, only : source_term_wrapper_t, source_term_t
+  use source_term_fctry, only : source_term_factory
+  use field, only : field_t
+  use field_list, only : field_list_t
+  use json_utils, only : json_get
+  use json_module, only : json_file, json_core, json_value
+  use coefs, only : coef_t
+  use user_intf, only : user_t
+  use utils, only : neko_warning
+  implicit none
+  private
+
+  !> Wrapper contaning and executing the scalar source terms.
+  !! @details
+  !! Exists mainly to keep the `scalar_scheme_t` type smaller and also as
+  !! placeholder for future optimizations.
+  type, public :: scalar_source_term_t
+     !> Array of ordinary source terms.
+     class(source_term_wrapper_t), allocatable :: source_terms(:)
+     !> The right-hand side.
+     type(field_t), pointer :: f => null()
+   contains
+     !> Constructor.
+     procedure, pass(this) :: init => scalar_source_term_init
+     !> Destructor.
+     procedure, pass(this) :: free => scalar_source_term_free
+     !> Add all the source terms to the passed right-hand side fields.
+     procedure, pass(this) :: compute => scalar_source_term_compute
+     !> Initialize the user source term.
+     procedure, nopass, private :: init_user_source
+
+  end type scalar_source_term_t
+
+contains
+
+  !> Constructor.
+  subroutine scalar_source_term_init(this, json, f, coef, user)
+    class(scalar_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    type(field_t), pointer, intent(in) :: f
+    type(coef_t), intent(inout) :: coef
+    type(user_t), intent(in) :: user
+
+    type(field_list_t) :: rhs_fields
+    ! Json low-level manipulator.
+    type(json_core) :: core
+    ! Pointer to the source_terms JSON object and the individual sources.
+    type(json_value), pointer :: source_object, source_pointer
+    ! Buffer for serializing the json.
+    character(len=:), allocatable :: buffer
+    ! A single source term as its own json_file.
+    type(json_file) :: source_subdict
+    ! Source type
+    character(len=:), allocatable :: type
+    ! Dummy source strenth values
+    real(kind=rp) :: values(3)
+    logical :: found
+    integer :: n_sources, i
+
+    call this%free()
+
+    this%f => f
+
+
+    if (json%valid_path('case.scalar.source_terms')) then
+       ! We package the fields for the source term to operate on in a field list.
+       allocate(rhs_fields%fields(1))
+       rhs_fields%fields(1)%f => f
+
+       call json%get_core(core)
+       call json%get('case.scalar.source_terms', source_object, found)
+
+       n_sources = core%count(source_object)
+       allocate(this%source_terms(n_sources))
+
+
+       do i=1, n_sources
+          ! Create a new json containing just the subdict for this source.
+          call core%get_child(source_object, i, source_pointer, found)
+          call core%print_to_string(source_pointer, buffer)
+          call source_subdict%load_from_string(buffer)
+          call json_get(source_subdict, "type", type)
+
+          ! The user source is treated separately
+          if ((trim(type) .eq. "user_vector") .or. &
+              (trim(type) .eq. "user_pointwise")) then
+             if (source_subdict%valid_path("start_time") .or. &
+                 source_subdict%valid_path("end_time")) then
+                 call neko_warning("The start_time and end_time parameters have&
+                                    & no effect on the scalar user source term")
+             end if
+
+             call init_user_source(this%source_terms(i)%source_term, &
+                                    rhs_fields, coef, type, user)
+          else
+
+             call source_term_factory(this%source_terms(i)%source_term, &
+                                       source_subdict, rhs_fields, coef)
+          end if
+       end do
+    end if
+
+  end subroutine scalar_source_term_init
+
+  !> Initialize the user source term.
+  !! @param source_term The allocatable source term to be initialized to a user.
+  !! @param rhs_fields The field list with the right-hand-side.
+  !! @param coef The SEM coefs.
+  !! @param type The type of the user source term, "user_vector" or
+  !! "user_poinwise".
+  !! @param user The user type containing the user source term routines.
+  subroutine init_user_source(source_term, rhs_fields, coef, type, user)
+    class(source_term_t), allocatable, intent(inout) :: source_term
+    type(field_list_t) :: rhs_fields
+    type(coef_t), intent(inout) :: coef
+    character(len=*) :: type
+    type(user_t), intent(in) :: user
+
+    allocate(scalar_user_source_term_t::source_term)
+
+    select type (source_term)
+    type is (scalar_user_source_term_t)
+       call source_term%init_from_components(rhs_fields, coef, type, &
+                                            user%scalar_user_f_vector, &
+                                            user%scalar_user_f)
+    end select
+  end subroutine init_user_source
+
+  !> Destructor.
+  subroutine scalar_source_term_free(this)
+    class(scalar_source_term_t), intent(inout) :: this
+    integer :: i
+
+    nullify(this%f)
+
+    if (allocated(this%source_terms)) then
+       do i=1, size(this%source_terms)
+          call this%source_terms(i)%free()
+       end do
+       deallocate(this%source_terms)
+    end if
+
+  end subroutine scalar_source_term_free
+
+  !> Add all the source term to the passed right-hand side fields.
+  !! @param t The time value.
+  !! @param tstep The current time step.
+  subroutine scalar_source_term_compute(this, t, tstep)
+    class(scalar_source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    integer :: i, n
+
+    this%f = 0.0_rp
+
+    ! Add contribution from all source terms.
+    if (allocated(this%source_terms)) then
+       do i=1, size(this%source_terms)
+          call this%source_terms(i)%source_term%compute(t, tstep)
+       end do
+    end if
+
+  end subroutine scalar_source_term_compute
+end module scalar_source_term
diff --git a/src/scalar/scalar_user_source_term.f90 b/src/scalar/scalar_user_source_term.f90
new file mode 100644
index 00000000000..47c5b3d98c3
--- /dev/null
+++ b/src/scalar/scalar_user_source_term.f90
@@ -0,0 +1,242 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the `scalar_user_source_term_t` type.
+module scalar_user_source_term
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use num_types, only : rp
+  use utils, only : neko_error
+  use source_term, only : source_term_t
+  use json_module, only : json_file
+  use field_list, only : field_list_t
+  use coefs, only : coef_t
+  use device, only : device_map, device_free
+  use device_math, only : device_add2
+  use math, only : add2
+  use dofmap, only : dofmap_t
+  use, intrinsic :: iso_c_binding
+  implicit none
+  private
+
+  public :: scalar_source_compute_pointwise, scalar_source_compute_vector
+
+  !> A source-term for the scalar, with procedure pointers pointing to the
+  !! actual implementation in the user file.
+  !! @details The user source term can be applied either pointiwse or acting
+  !! on the whole array in a single call, which is referred to as "vector"
+  !! application.
+  !! @warning
+  !! The user source term does not support init from JSON and should instead be
+  !! directly initialized from components.
+  type, public, extends(source_term_t) :: scalar_user_source_term_t
+     !> Pointer to the dofmap of the right-hand-side fields.
+     type(dofmap_t), pointer :: dm
+     !> The source term.
+     real(kind=rp), allocatable :: s(:, :, :, :)
+
+     !> Device pointer for `s`.
+     type(c_ptr) :: s_d = C_NULL_PTR
+     !> Compute the source term for a single point
+     procedure(scalar_source_compute_pointwise), nopass, pointer :: compute_pw_ &
+       => null()
+     !> Compute the source term for the entire boundary
+     procedure(scalar_source_compute_vector), nopass, pointer :: compute_vector_&
+       => null()
+   contains
+     !> Constructor from JSON (will throw!).
+     procedure, pass(this) :: init => scalar_user_source_term_init
+     !> Constructor from components.
+     procedure, pass(this) :: init_from_components => &
+       scalar_user_source_term_init_from_components
+     !> Destructor.
+     procedure, pass(this) :: free => scalar_user_source_term_free
+     !> Computes the source term and adds the result to `fields`.
+     procedure, pass(this) :: compute_ => scalar_user_source_term_compute
+  end type scalar_user_source_term_t
+
+  abstract interface
+     !> Computes the source term and adds the result to `fields`.
+     !! @param t The time value.
+     !! @param tstep The current time-step.
+     subroutine scalar_source_compute_vector(this, t)
+       import scalar_user_source_term_t, rp
+       class(scalar_user_source_term_t), intent(inout) :: this
+       real(kind=rp), intent(in) :: t
+     end subroutine scalar_source_compute_vector
+  end interface
+
+  abstract interface
+     !> Computes the source term at a single point.
+     !! @param s The source value.
+     !! @param j The x-index of GLL point.
+     !! @param k The y-index of GLL point.
+     !! @param l The z-index of GLL point.
+     !! @param e The index of element.
+     !! @param t The time value.
+     subroutine scalar_source_compute_pointwise(s, j, k, l, e, t)
+       import rp
+       real(kind=rp), intent(inout) :: s
+       integer, intent(in) :: j
+       integer, intent(in) :: k
+       integer, intent(in) :: l
+       integer, intent(in) :: e
+       real(kind=rp), intent(in) :: t
+     end subroutine scalar_source_compute_pointwise
+  end interface
+
+contains
+
+  !> Constructor from JSON.
+  !! @details
+  !! This will throw, as the user source term should be initialized directly
+  !! from components.
+  subroutine scalar_user_source_term_init(this, json, fields, coef)
+    class(scalar_user_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    type(field_list_t), intent(inout), target :: fields
+    type(coef_t), intent(inout) :: coef
+
+    call neko_error("The user scalar source term should be init from components")
+
+  end subroutine scalar_user_source_term_init
+
+  !> Constructor from components.
+  !! @param fields A list with 1 field for adding the source values.
+  !! @param coef The SEM coeffs.
+  !! @param sourc_term_type The type of the user source term, "user_vector" or
+  !! "user_pointwise".
+  !! @param eval_vector The procedure to vector-compute the source term.
+  !! @param eval_pointwise The procedure to pointwise-compute the source term.
+  subroutine scalar_user_source_term_init_from_components(this, fields, coef, &
+    source_term_type, eval_vector, eval_pointwise)
+    class(scalar_user_source_term_t), intent(inout) :: this
+    type(field_list_t), intent(inout), target :: fields
+    type(coef_t), intent(inout) :: coef
+    character(len=*) :: source_term_type
+    procedure(scalar_source_compute_vector), optional :: eval_vector
+    procedure(scalar_source_compute_pointwise), optional :: eval_pointwise
+
+    call this%free()
+    call this%init_base(fields, coef, 0.0_rp, huge(0.0_rp))
+
+    this%dm => fields%fields(1)%f%dof
+
+    allocate(this%s(this%dm%Xh%lx, this%dm%Xh%ly, this%dm%Xh%lz, &
+             this%dm%msh%nelv))
+
+    this%s = 0d0
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_map(this%s, this%s_d, this%dm%size())
+    end if
+
+
+    if (trim(source_term_type) .eq. 'user_pointwise' .and. &
+              present(eval_pointwise)) then
+       if (NEKO_BCKND_DEVICE .eq. 1) then
+          call neko_error('Pointwise source terms not supported on accelerators')
+       end if
+       this%compute_vector_ => pointwise_eval_driver
+       this%compute_pw_ => eval_pointwise
+    else if (trim(source_term_type) .eq. 'user_vector' .and. &
+             present(eval_vector)) then
+       this%compute_vector_ => eval_vector
+    else
+       call neko_error('Invalid fluid source term '//source_term_type)
+    end if
+  end subroutine scalar_user_source_term_init_from_components
+
+  !> Destructor.
+  subroutine scalar_user_source_term_free(this)
+    class(scalar_user_source_term_t), intent(inout) :: this
+
+    if (allocated(this%s)) deallocate(this%s)
+
+    if (c_associated(this%s_d)) call device_free(this%s_d)
+
+    nullify(this%compute_vector_)
+    nullify(this%compute_pw_)
+    nullify(this%dm)
+
+    call this%free_base()
+  end subroutine scalar_user_source_term_free
+
+  !> Computes the source term and adds the result to `fields`.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  subroutine scalar_user_source_term_compute(this, t, tstep)
+    class(scalar_user_source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    integer :: n
+
+    call this%compute_vector_(this, t)
+    n = this%fields%fields(1)%f%dof%size()
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_add2(this%fields%fields(1)%f%x_d, this%s_d, n)
+    else
+       call add2(this%fields%fields(1)%f%x, this%s, n)
+    end if
+
+  end subroutine scalar_user_source_term_compute
+
+  !> Driver for all pointwise source term evaluatons.
+  !! @param t The time value.
+  subroutine pointwise_eval_driver(this, t)
+    class(scalar_user_source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer :: j, k, l, e
+    integer :: jj, kk, ll, ee
+
+    select type (this)
+    type is (scalar_user_source_term_t)
+       do e = 1, size(this%s, 4)
+          ee = e
+          do l = 1, size(this%s, 3)
+             ll = l
+             do k = 1, size(this%s, 2)
+                kk = k
+                do j = 1, size(this%s, 1)
+                   jj =j
+                   call this%compute_pw_(this%s(j,k,l,e), jj, kk, ll, ee, t)
+                end do
+             end do
+          end do
+       end do
+    class default
+       call neko_error('Incorrect source type in pointwise eval driver!')
+    end select
+
+  end subroutine pointwise_eval_driver
+
+end module scalar_user_source_term
diff --git a/src/scalar/source_scalar.f90 b/src/scalar/source_scalar.f90
index d43ba4753f3..80b6621a955 100644
--- a/src/scalar/source_scalar.f90
+++ b/src/scalar/source_scalar.f90
@@ -72,7 +72,7 @@ subroutine source_scalar_term_pw(s, j, k, l, e, t)
        real(kind=rp), intent(in) :: t
      end subroutine source_scalar_term_pw
   end interface
-  
+
 contains
 
   !> Initialize a source_scalar term @a f
@@ -91,7 +91,7 @@ subroutine source_scalar_init(f, dm)
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(f%s, f%s_d, dm%size())
     end if
-    
+
   end subroutine source_scalar_init
 
   !> Deallocate a source_scalar term @a f
@@ -107,7 +107,7 @@ subroutine source_scalar_free(f)
     if (c_associated(f%s_d)) then
        call device_free(f%s_d)
     end if
-    
+
   end subroutine source_scalar_free
 
   !> Set the eval method for the source_scalar term @a f
@@ -160,7 +160,7 @@ subroutine source_scalar_eval_pw(f, t)
           end do
        end do
     end do
-    
+
   end subroutine source_scalar_eval_pw
-  
+
 end module source_scalar
diff --git a/src/sem/bcknd/device/cuda/coef_kernel.h b/src/sem/bcknd/device/cuda/coef_kernel.h
index 3a7bb750c6c..2eb69472504 100644
--- a/src/sem/bcknd/device/cuda/coef_kernel.h
+++ b/src/sem/bcknd/device/cuda/coef_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __SEM_COEF_KERNEL_H__
+#define __SEM_COEF_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -280,3 +282,5 @@ __global__ void coef_generate_drst_kernel(T * __restrict__ jac,
   }
 
 }
+
+#endif // __SEM_COEF_KERNEL_H__
diff --git a/src/sem/bcknd/device/device_coef.F90 b/src/sem/bcknd/device/device_coef.F90
index 1f4917ad6f6..4aaa035c9db 100644
--- a/src/sem/bcknd/device/device_coef.F90
+++ b/src/sem/bcknd/device/device_coef.F90
@@ -38,9 +38,9 @@ module device_coef
   private
 
   public :: device_coef_generate_geo, device_coef_generate_dxydrst
-  
+
 #ifdef HAVE_HIP
-    interface
+  interface
      subroutine hip_coef_generate_geo(G11, G12, G13, G22, G23, G33, &
           drdx, drdy, drdz, dsdx, dsdy, dsdz, dtdx, dtdy, dtdz, &
           jacinv, w3, nel, lx, gdim) &
@@ -49,7 +49,7 @@ subroutine hip_coef_generate_geo(G11, G12, G13, G22, G23, G33, &
        implicit none
        type(c_ptr), value :: G11, G12, G13, G22, G23, G33
        type(c_ptr), value :: drdx, drdy, drdz
-       type(c_ptr), value :: dsdx, dsdy, dsdz 
+       type(c_ptr), value :: dsdx, dsdy, dsdz
        type(c_ptr), value :: dtdx, dtdy, dtdz
        type(c_ptr), value :: jacinv, w3
        integer(c_int) :: nel, gdim, lx
@@ -84,7 +84,7 @@ subroutine cuda_coef_generate_geo(G11, G12, G13, G22, G23, G33, &
        implicit none
        type(c_ptr), value :: G11, G12, G13, G22, G23, G33
        type(c_ptr), value :: drdx, drdy, drdz
-       type(c_ptr), value :: dsdx, dsdy, dsdz 
+       type(c_ptr), value :: dsdx, dsdy, dsdz
        type(c_ptr), value :: dtdx, dtdy, dtdz
        type(c_ptr), value :: jacinv, w3
        integer(c_int) :: nel, gdim, lx
@@ -119,7 +119,7 @@ subroutine opencl_coef_generate_geo(G11, G12, G13, G22, G23, G33, &
        implicit none
        type(c_ptr), value :: G11, G12, G13, G22, G23, G33
        type(c_ptr), value :: drdx, drdy, drdz
-       type(c_ptr), value :: dsdx, dsdy, dsdz 
+       type(c_ptr), value :: dsdx, dsdy, dsdz
        type(c_ptr), value :: dtdx, dtdy, dtdz
        type(c_ptr), value :: jacinv, w3
        integer(c_int) :: nel, gdim, lx
diff --git a/src/sem/bcknd/device/hip/coef_kernel.h b/src/sem/bcknd/device/hip/coef_kernel.h
index 70980ceeef2..7ae59c27463 100644
--- a/src/sem/bcknd/device/hip/coef_kernel.h
+++ b/src/sem/bcknd/device/hip/coef_kernel.h
@@ -1,3 +1,5 @@
+#ifndef __SEM_COEF_KERNEL_H__
+#define __SEM_COEF_KERNEL_H__
 /*
  Copyright (c) 2022, The Neko Authors
  All rights reserved.
@@ -280,3 +282,5 @@ __global__ void coef_generate_drst_kernel(T * __restrict__ jac,
   }
 
 }
+
+#endif // __SEM_COEF_KERNEL_H__
diff --git a/src/sem/bcknd/device/opencl/coef_kernel.cl b/src/sem/bcknd/device/opencl/coef_kernel.cl
index 673fc6aef39..714c9fad84f 100644
--- a/src/sem/bcknd/device/opencl/coef_kernel.cl
+++ b/src/sem/bcknd/device/opencl/coef_kernel.cl
@@ -1,3 +1,5 @@
+#ifndef __SEM_COEF_KERNEL_CL__
+#define __SEM_COEF_KERNEL_CL__
 /*
  Copyright (c) 2022-2023, The Neko Authors
  All rights reserved.
@@ -314,3 +316,5 @@ __kernel void coef_generate_drst_kernel(__global real * __restrict__ jac,
   }
 
 }
+
+#endif // __SEM_COEF_KERNEL_CL__
diff --git a/src/sem/coef.f90 b/src/sem/coef.f90
index 821e5fd60d9..0be5d82b575 100644
--- a/src/sem/coef.f90
+++ b/src/sem/coef.f90
@@ -30,7 +30,7 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Coefficients 
+!> Coefficients
 module coefs
   use gather_scatter
   use gs_ops
@@ -48,7 +48,7 @@ module coefs
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   !> Coefficients defined on a given (mesh, \f$ X_h \f$) tuple.
   !! Arrays use indices (i,j,k,e): element e, local coordinate (i,j,k).
   type, public :: coef_t
@@ -66,24 +66,24 @@ module coefs
      real(kind=rp), allocatable :: G23(:,:,:,:)
 
      real(kind=rp), allocatable :: mult(:,:,:,:) !< Multiplicity
-     !> generate mapping data between element and reference element 
+     !> generate mapping data between element and reference element
      !! \f$ dx/dr, dy/dr, dz/dr \f$
      !! \f$ dx/ds, dy/ds, dz/ds \f$
      !! \f$ dx/dt, dy/dt, dz/dt \f$
-     real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:) 
+     real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:)
      real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:)
-     real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:) 
+     real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:)
      !> \f$ dr/dx, dr/dy, dr/dz \f$
      !! \f$ ds/dx, ds/dy, ds/dz \f$
      !! \f$ dt/dx, dt/dy, dt/dz \f$
-     real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:) 
+     real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:)
      real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:)
-     real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:) 
-     
+     real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:)
+
      real(kind=rp), allocatable :: h1(:,:,:,:) !< Stiffness scaling
      real(kind=rp), allocatable :: h2(:,:,:,:) !< Mass scaling
      logical :: ifh2 !< True if h2 .ne. 0
-     
+
      real(kind=rp), allocatable :: jac(:,:,:,:) !< Jacobian
      real(kind=rp), allocatable :: jacinv(:,:,:,:) !< Inverted Jacobian
      real(kind=rp), allocatable :: B(:,:,:,:) !< Mass matrix/volume matrix
@@ -93,10 +93,10 @@ module coefs
      real(kind=rp), allocatable :: nx(:,:,:,:)   !< x-direction of facet normal
      real(kind=rp), allocatable :: ny(:,:,:,:)   !< y-direction of facet normal
      real(kind=rp), allocatable :: nz(:,:,:,:)   !< z-direction of facet normal
-     !> Pointers to main fields 
-     
+     !> Pointers to main fields
+
      real(kind=rp) :: volume
-     
+
      type(space_t), pointer :: Xh => null()
      type(mesh_t), pointer :: msh => null()
      type(dofmap_t), pointer :: dof => null()
@@ -104,8 +104,8 @@ module coefs
 
      !
      ! Device pointers (if present)
-     ! 
-     
+     !
+
      type(c_ptr) :: G11_d = C_NULL_PTR
      type(c_ptr) :: G22_d = C_NULL_PTR
      type(c_ptr) :: G33_d = C_NULL_PTR
@@ -148,38 +148,39 @@ module coefs
      procedure, private, pass(this) :: init_all => coef_init_all
      procedure, pass(this) :: free => coef_free
      procedure, pass(this) :: get_normal => coef_get_normal
+     procedure, pass(this) :: get_area => coef_get_area
      generic :: init => init_empty, init_all
   end type coef_t
-  
+
 contains
-  
-  !> Initialize empty coefs for a space and a mesh 
+
+  !> Initialize empty coefs for a space and a mesh
   subroutine coef_init_empty(this, Xh, msh)
     class(coef_t), intent(inout) :: this
     type(space_t), intent(inout), target :: Xh
     type(mesh_t), intent(inout), target :: msh
-    integer :: n    
+    integer :: n
     call this%free()
     this%msh => msh
     this%Xh => Xh
-    
+
     allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
 
     !
     ! Setup device memory (if present)
     !
-    
+
     n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
         (NEKO_BCKND_OPENCL .eq. 1)) then
@@ -195,9 +196,9 @@ subroutine coef_init_empty(this, Xh, msh)
        call device_map(this%dtdx, this%dtdx_d, n)
        call device_map(this%dtdy, this%dtdy_d, n)
        call device_map(this%dtdz, this%dtdz_d, n)
-       
+
     end if
-    
+
   end subroutine coef_init_empty
 
   !> Initialize coefficients
@@ -206,7 +207,7 @@ subroutine coef_init_all(this, gs_h)
     type(gs_t), intent(inout), target :: gs_h
     integer :: n, m
     call this%free()
-    
+
     this%msh => gs_h%dofmap%msh
     this%Xh => gs_h%dofmap%Xh
     this%dof => gs_h%dofmap
@@ -222,39 +223,39 @@ subroutine coef_init_all(this, gs_h)
     allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
     allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
     allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
     allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
     allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv))
-    
+
     allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
     allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
@@ -262,12 +263,12 @@ subroutine coef_init_all(this, gs_h)
     allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
 
     allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv))
-    
+
 
     !
     ! Setup device memory (if present)
     !
-    
+
     n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(this%G11, this%G11_d, n)
@@ -276,7 +277,7 @@ subroutine coef_init_all(this, gs_h)
        call device_map(this%G12, this%G12_d, n)
        call device_map(this%G13, this%G13_d, n)
        call device_map(this%G23, this%G23_d, n)
-       
+
        call device_map(this%dxdr, this%dxdr_d, n)
        call device_map(this%dydr, this%dydr_d, n)
        call device_map(this%dzdr, this%dzdr_d, n)
@@ -284,7 +285,7 @@ subroutine coef_init_all(this, gs_h)
        call device_map(this%dxds, this%dxds_d, n)
        call device_map(this%dyds, this%dyds_d, n)
        call device_map(this%dzds, this%dzds_d, n)
-       
+
        call device_map(this%dxdt, this%dxdt_d, n)
        call device_map(this%dydt, this%dydt_d, n)
        call device_map(this%dzdt, this%dzdt_d, n)
@@ -311,16 +312,16 @@ subroutine coef_init_all(this, gs_h)
        call device_map(this%Binv, this%Binv_d, n)
 
        m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv
-       
+
        call device_map(this%area, this%area_d, m)
        call device_map(this%nx, this%nx_d, m)
        call device_map(this%ny, this%ny_d, m)
        call device_map(this%nz, this%nz_d, m)
-       
+
     end if
 
     call coef_generate_dxyzdrst(this)
-    
+
     call coef_generate_geo(this)
 
     call coef_generate_area_and_normal(this)
@@ -333,15 +334,17 @@ subroutine coef_init_all(this, gs_h)
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_rone(this%h1_d, n)
        call device_rone(this%h2_d, n)
-       call device_memcpy(this%h1, this%h1_d, n, DEVICE_TO_HOST)
-       call device_memcpy(this%h2, this%h2_d, n, DEVICE_TO_HOST)
+       call device_memcpy(this%h1, this%h1_d, n, &
+                          DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(this%h2, this%h2_d, n, &
+                          DEVICE_TO_HOST, sync=.false.)
     else
        call rone(this%h1,n)
        call rone(this%h2,n)
     end if
 
     this%ifh2 = .false.
-    
+
     !
     ! Set up multiplicity
     !
@@ -350,13 +353,14 @@ subroutine coef_init_all(this, gs_h)
     else
        call rone(this%mult, n)
     end if
-       
+
     call gs_h%op(this%mult, n, GS_OP_ADD)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_invcol1(this%mult_d, n)
-       call device_memcpy(this%mult, this%mult_d, n, DEVICE_TO_HOST, sync=.true.)
-    else    
+       call device_memcpy(this%mult, this%mult_d, n, &
+                          DEVICE_TO_HOST, sync=.true.)
+    else
        call invcol1(this%mult, n)
     end if
 
@@ -385,7 +389,7 @@ subroutine coef_free(this)
     if (allocated(this%G13)) then
        deallocate(this%G13)
     end if
-    
+
     if (allocated(this%G23)) then
        deallocate(this%G23)
     end if
@@ -393,99 +397,99 @@ subroutine coef_free(this)
     if (allocated(this%mult)) then
        deallocate(this%mult)
     end if
-    
+
     if (allocated(this%B)) then
        deallocate(this%B)
     end if
-    
+
     if (allocated(this%Binv)) then
        deallocate(this%Binv)
     end if
-    
+
     if(allocated(this%dxdr)) then
        deallocate(this%dxdr)
     end if
-    
+
     if(allocated(this%dxds)) then
        deallocate(this%dxds)
     end if
-    
+
     if(allocated(this%dxdt)) then
        deallocate(this%dxdt)
     end if
-    
+
     if(allocated(this%dydr)) then
        deallocate(this%dydr)
     end if
-    
+
     if(allocated(this%dyds)) then
        deallocate(this%dyds)
     end if
-    
+
     if(allocated(this%dydt)) then
        deallocate(this%dydt)
     end if
-    
+
     if(allocated(this%dzdr)) then
        deallocate(this%dzdr)
     end if
-    
+
     if(allocated(this%dzds)) then
        deallocate(this%dzds)
     end if
-    
+
     if(allocated(this%dzdt)) then
        deallocate(this%dzdt)
     end if
-    
+
     if(allocated(this%drdx)) then
        deallocate(this%drdx)
     end if
-    
+
     if(allocated(this%dsdx)) then
        deallocate(this%dsdx)
     end if
-    
+
     if(allocated(this%dtdx)) then
        deallocate(this%dtdx)
     end if
-    
+
     if(allocated(this%drdy)) then
        deallocate(this%drdy)
     end if
-    
+
     if(allocated(this%dsdy)) then
        deallocate(this%dsdy)
     end if
-    
+
     if(allocated(this%dtdy)) then
        deallocate(this%dtdy)
     end if
-    
+
     if(allocated(this%drdz)) then
        deallocate(this%drdz)
     end if
-    
+
     if(allocated(this%dsdz)) then
        deallocate(this%dsdz)
     end if
-    
+
     if(allocated(this%dtdz)) then
        deallocate(this%dtdz)
     end if
-    
+
     if(allocated(this%jac)) then
        deallocate(this%jac)
     end if
-    
+
     if(allocated(this%jacinv)) then
        deallocate(this%jacinv)
     end if
-    
+
     if(allocated(this%h1)) then
        deallocate(this%h1)
     end if
-    
+
     if(allocated(this%h2)) then
        deallocate(this%h2)
     end if
@@ -505,8 +509,8 @@ subroutine coef_free(this)
     if (allocated(this%nz)) then
        deallocate(this%nz)
     end if
-    
-    
+
+
     nullify(this%msh)
     nullify(this%Xh)
     nullify(this%dof)
@@ -514,7 +518,7 @@ subroutine coef_free(this)
     !
     ! Cleanup the device (if present)
     !
-    
+
     if (c_associated(this%G11_d)) then
        call device_free(this%G11_d)
     end if
@@ -562,7 +566,7 @@ subroutine coef_free(this)
     if (c_associated(this%dzds_d)) then
        call device_free(this%dzds_d)
     end if
-    
+
     if (c_associated(this%dxdt_d)) then
        call device_free(this%dxdt_d)
     end if
@@ -598,7 +602,7 @@ subroutine coef_free(this)
     if (c_associated(this%dsdz_d)) then
        call device_free(this%dsdz_d)
     end if
-    
+
     if (c_associated(this%dtdx_d)) then
        call device_free(this%dtdx_d)
     end if
@@ -610,7 +614,7 @@ subroutine coef_free(this)
     if (c_associated(this%dtdz_d)) then
        call device_free(this%dtdz_d)
     end if
-    
+
     if (c_associated(this%mult_d)) then
        call device_free(this%mult_d)
     end if
@@ -618,7 +622,7 @@ subroutine coef_free(this)
     if (c_associated(this%h1_d)) then
        call device_free(this%h1_d)
     end if
-    
+
     if (c_associated(this%h2_d)) then
        call device_free(this%h2_d)
     end if
@@ -630,11 +634,11 @@ subroutine coef_free(this)
     if (c_associated(this%jacinv_d)) then
        call device_free(this%jacinv_d)
     end if
-    
+
     if (c_associated(this%B_d)) then
        call device_free(this%B_d)
     end if
-    
+
     if (c_associated(this%Binv_d)) then
        call device_free(this%Binv_d)
     end if
@@ -642,7 +646,7 @@ subroutine coef_free(this)
     if (c_associated(this%area_d)) then
        call device_free(this%area_d)
     end if
-    
+
     if (c_associated(this%nx_d)) then
        call device_free(this%nx_d)
     end if
@@ -654,18 +658,18 @@ subroutine coef_free(this)
     if (c_associated(this%nz_d)) then
        call device_Free(this%nz_d)
     end if
-    
+
 
   end subroutine coef_free
 
   subroutine coef_generate_dxyzdrst(c)
     type(coef_t), intent(inout) :: c
     integer :: e,i,lxy,lyz,ntot
-    
+
     lxy=c%Xh%lx*c%Xh%ly
     lyz=c%Xh%ly*c%Xh%lz
     ntot = c%dof%size()
-       
+
     associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, &
          dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, &
          dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, &
@@ -678,7 +682,7 @@ subroutine coef_generate_dxyzdrst(c)
          dyt => c%Xh%dyt, dzt => c%Xh%dzt, &
          jacinv => c%jacinv, jac => c%jac)
 
-        if (NEKO_BCKND_DEVICE .eq. 1) then
+      if (NEKO_BCKND_DEVICE .eq. 1) then
 
          call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, &
               c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, &
@@ -687,39 +691,40 @@ subroutine coef_generate_dxyzdrst(c)
               c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, &
               c%Xh%lx, c%msh%nelv)
 
-         call device_memcpy(dxdr, c%dxdr_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dydr, c%dydr_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dzdr, c%dzdr_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dxds, c%dxds_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dyds, c%dyds_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dzds, c%dzds_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dxdt, c%dxdt_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dydt, c%dydt_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dzdt, c%dzdt_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(drdx, c%drdx_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(drdy, c%drdy_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(drdz, c%drdz_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dsdx, c%dsdx_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dsdy, c%dsdy_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dsdz, c%dsdz_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dtdx, c%dtdx_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dtdy, c%dtdy_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(dtdz, c%dtdz_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(jac, c%jac_d, ntot, DEVICE_TO_HOST)
-         call device_memcpy(jacinv, c%jacinv_d, ntot, DEVICE_TO_HOST, sync=.true.)
+         call device_memcpy(dxdr, c%dxdr_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dydr, c%dydr_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dzdr, c%dzdr_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dxds, c%dxds_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dyds, c%dyds_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dzds, c%dzds_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dxdt, c%dxdt_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dydt, c%dydt_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dzdt, c%dzdt_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(drdx, c%drdx_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(drdy, c%drdy_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(drdz, c%drdz_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dsdx, c%dsdx_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dsdy, c%dsdy_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dsdz, c%dsdz_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dtdx, c%dtdx_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dtdy, c%dtdy_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(dtdz, c%dtdz_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(jac, c%jac_d, ntot, DEVICE_TO_HOST, sync=.false.)
+         call device_memcpy(jacinv, c%jacinv_d, ntot, &
+                            DEVICE_TO_HOST, sync=.true.)
 
       else
          do e = 1, c%msh%nelv
             call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz)
             call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz)
             call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz)
-            
+
             do i = 1, lz
                call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly)
                call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly)
                call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly)
             end do
-       
+
             ! We actually take 2d into account, wow, need to do that for the rest.
             if(c%msh%gdim .eq. 3) then
                call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz)
@@ -747,11 +752,11 @@ subroutine coef_generate_dxyzdrst(c)
             call rone    (dtdz, ntot)
          else
 
-            do i = 1, ntot               
+            do i = 1, ntot
                c%jac(i, 1, 1, 1) = 0.0_rp
             end do
 
-            do i = 1, ntot               
+            do i = 1, ntot
                c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1)  &
                                  * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) )
 
@@ -779,7 +784,7 @@ subroutine coef_generate_dxyzdrst(c)
 
                c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
                                   - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1)
-               
+
                c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) &
                                   - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1)
             end do
@@ -790,18 +795,18 @@ subroutine coef_generate_dxyzdrst(c)
 
                c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) &
                                   - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
-               
+
                c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) &
                                   - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1)
             end do
 
-            do i = 1, ntot 
+            do i = 1, ntot
                c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) &
                                   - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1)
 
                c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) &
                                   - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1)
-               
+
                c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) &
                                   - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1)
             end do
@@ -810,9 +815,9 @@ subroutine coef_generate_dxyzdrst(c)
          call invers2(jacinv, jac, ntot)
       end if
     end associate
-    
+
   end subroutine coef_generate_dxyzdrst
-  
+
   !> Generate geometric data for the given mesh
   !! @note Current implementation assumes regular shaped hex elements
   subroutine coef_generate_geo(c)
@@ -832,25 +837,25 @@ subroutine coef_generate_geo(c)
                                      c%jacinv_d, c%Xh%w3_d, c%msh%nelv, &
                                      c%Xh%lx, c%msh%gdim)
 
-       call device_memcpy(c%G11, c%G11_d, ntot, DEVICE_TO_HOST)
-       call device_memcpy(c%G22, c%G22_d, ntot, DEVICE_TO_HOST)
-       call device_memcpy(c%G33, c%G33_d, ntot, DEVICE_TO_HOST)
-       call device_memcpy(c%G12, c%G12_d, ntot, DEVICE_TO_HOST)
-       call device_memcpy(c%G13, c%G13_d, ntot, DEVICE_TO_HOST)
+       call device_memcpy(c%G11, c%G11_d, ntot, DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(c%G22, c%G22_d, ntot, DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(c%G33, c%G33_d, ntot, DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(c%G12, c%G12_d, ntot, DEVICE_TO_HOST, sync=.false.)
+       call device_memcpy(c%G13, c%G13_d, ntot, DEVICE_TO_HOST, sync=.false.)
        call device_memcpy(c%G23, c%G23_d, ntot, DEVICE_TO_HOST, sync=.true.)
-       
+
     else
        if(c%msh%gdim .eq. 2) then
 
           do i = 1, ntot
              c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) &
-                               + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) 
+                               + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1)
 
              c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
                                + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
 
              c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) &
-                               + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) 
+                               + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1)
           end do
 
           do i = 1, ntot
@@ -864,12 +869,12 @@ subroutine coef_generate_geo(c)
 
           do e = 1, c%msh%nelv
              do i = 1, lxyz
-               c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
-               c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
-               c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
-            end do
-         end do
-            
+                c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
+                c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
+                c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
+             end do
+          end do
+
        else
 
           do i = 1, ntot
@@ -917,7 +922,7 @@ subroutine coef_generate_geo(c)
                 c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1)
                 c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1)
                 c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1)
-                
+
                 c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1)
                 c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1)
                 c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1)
@@ -926,18 +931,18 @@ subroutine coef_generate_geo(c)
 
        end if
     end if
-    
+
   end subroutine coef_generate_geo
- 
+
   !> Generate mass matrix B for the given mesh and space
   !! @note This is also a stapleholder, we need to go through the coef class properly.
   subroutine coef_generate_mass(c)
     type(coef_t), intent(inout) :: c
     integer :: e, i, lxyz, ntot
-    
+
     lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz
     ntot = c%dof%size()
-    
+
     !> @todo rewrite this nest into a device kernel
     do e = 1, c%msh%nelv
        ! Here we need to handle things differently for axis symmetric elements
@@ -948,15 +953,15 @@ subroutine coef_generate_mass(c)
     end do
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(c%B, c%B_d, ntot, HOST_TO_DEVICE)
-       call device_memcpy(c%Binv, c%Binv_d, ntot, HOST_TO_DEVICE)
+       call device_memcpy(c%B, c%B_d, ntot, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c%Binv, c%Binv_d, ntot, HOST_TO_DEVICE, sync=.false.)
     end if
-    
+
     call c%gs_h%op(c%Binv, ntot, GS_OP_ADD)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_invcol1(c%Binv_d, ntot)
-       call device_memcpy(c%Binv, c%Binv_d, ntot, DEVICE_TO_HOST)
+       call device_memcpy(c%Binv, c%Binv_d, ntot, DEVICE_TO_HOST, sync=.true.)
     else
        call invcol1(c%Binv, ntot)
     end if
@@ -974,22 +979,38 @@ pure function coef_get_normal(this, i, j, k, e, facet) result(normal)
     class(coef_t), intent(in) :: this
     integer, intent(in) :: i, j, k, e, facet
     real(kind=rp) :: normal(3)
+
+    select case (facet)
+    case(1,2)
+       normal(1) = this%nx(j, k, facet, e)
+       normal(2) = this%ny(j, k, facet, e)
+       normal(3) = this%nz(j, k, facet, e)
+    case(3,4)
+       normal(1) = this%nx(i, k, facet, e)
+       normal(2) = this%ny(i, k, facet, e)
+       normal(3) = this%nz(i, k, facet, e)
+    case(5,6)
+       normal(1) = this%nx(i, j, facet, e)
+       normal(2) = this%ny(i, j, facet, e)
+       normal(3) = this%nz(i, j, facet, e)
+    end select
+  end function coef_get_normal
+
+  pure function coef_get_area(this, i, j, k, e, facet) result(area)
+    class(coef_t), intent(in) :: this
+    integer, intent(in) :: i, j, k, e, facet
+    real(kind=rp) :: area
       
     select case (facet)               
       case(1,2)
-        normal(1) = this%nx(j, k, facet, e)
-        normal(2) = this%ny(j, k, facet, e)
-        normal(3) = this%nz(j, k, facet, e)
+        area = this%area(j, k, facet, e)
       case(3,4)
-        normal(1) = this%nx(i, k, facet, e)
-        normal(2) = this%ny(i, k, facet, e)
-        normal(3) = this%nz(i, k, facet, e)
+        area = this%area(i, k, facet, e)
       case(5,6)
-        normal(1) = this%nx(i, j, facet, e)
-        normal(2) = this%ny(i, j, facet, e)
-        normal(3) = this%nz(i, j, facet, e)
+        area = this%area(i, j, facet, e)
       end select
-  end function coef_get_normal
+  end function coef_get_area
+
 
   !> Generate facet area and surface normals
   subroutine coef_generate_area_and_normal(coef)
@@ -1002,7 +1023,7 @@ subroutine coef_generate_area_and_normal(coef)
     real(kind=rp) :: weight, len
     n = coef%dof%size()
     lx = coef%Xh%lx
-    
+
     allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
     allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
     allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv))
@@ -1012,7 +1033,7 @@ subroutine coef_generate_area_and_normal(coef)
     do i = 1, n
        a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
                      - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
- 
+
        b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
                      - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
 
@@ -1046,7 +1067,7 @@ subroutine coef_generate_area_and_normal(coef)
     do i = 1, n
        a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) &
                      - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1)
- 
+
        b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) &
                      - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1)
 
@@ -1071,7 +1092,7 @@ subroutine coef_generate_area_and_normal(coef)
              coef%ny(j,k, 3, e) =  B(j, 1, k, e)
              coef%ny(j,k, 4, e) = -B(j, lx, k, e)
              coef%nz(j,k, 3, e) =  C(j, 1, k, e)
-             coef%nz(j,k, 4, e) = -C(j, lx, k, e)             
+             coef%nz(j,k, 4, e) = -C(j, lx, k, e)
           end do
        end do
     end do
@@ -1080,7 +1101,7 @@ subroutine coef_generate_area_and_normal(coef)
     do i = 1, n
        a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) &
                      - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1)
- 
+
        b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) &
                      - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1)
 
@@ -1105,11 +1126,11 @@ subroutine coef_generate_area_and_normal(coef)
              coef%ny(j,k, 5, e) = -B(j, k, 1, e)
              coef%ny(j,k, 6, e) =  B(j, k, lx, e)
              coef%nz(j,k, 5, e) = -C(j, k, 1, e)
-             coef%nz(j,k, 6, e) =  C(j, k, lx, e)             
+             coef%nz(j,k, 6, e) =  C(j, k, lx, e)
           end do
        end do
     end do
-    
+
     ! Normalize
     do j = 1, size(coef%nz)
        len = sqrt(coef%nx(j,1,1,1)**2 + &
@@ -1128,13 +1149,17 @@ subroutine coef_generate_area_and_normal(coef)
     !>  @todo cleanup once we have device math in place
     if (NEKO_BCKND_DEVICE .eq. 1) then
        n = size(coef%area)
-       call device_memcpy(coef%area, coef%area_d, n, HOST_TO_DEVICE)
-       call device_memcpy(coef%nx, coef%nx_d, n, HOST_TO_DEVICE)
-       call device_memcpy(coef%ny, coef%ny_d, n, HOST_TO_DEVICE)
-       call device_memcpy(coef%nz, coef%nz_d, n, HOST_TO_DEVICE)
+       call device_memcpy(coef%area, coef%area_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(coef%nx, coef%nx_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(coef%ny, coef%ny_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(coef%nz, coef%nz_d, n, &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
-    
+
   end subroutine coef_generate_area_and_normal
-  
-  
+
+
 end module coefs
diff --git a/src/sem/cpr.f90 b/src/sem/cpr.f90
index a933013128e..09ad81306cb 100644
--- a/src/sem/cpr.f90
+++ b/src/sem/cpr.f90
@@ -52,9 +52,9 @@ module cpr
      real(kind=rp), allocatable :: v(:,:) !< Transformation matrix
 
      real(kind=rp), allocatable :: vt(:,:) !< Transformation matrix transposed
-     real(kind=rp), allocatable :: vinv(:,:) !< Transformation matrix inversed 
+     real(kind=rp), allocatable :: vinv(:,:) !< Transformation matrix inversed
      real(kind=rp), allocatable :: vinvt(:,:) !< Transformation matrix
-     !! inversed and transposed 
+     !! inversed and transposed
      real(kind=rp), allocatable :: w(:,:) !< Diagonal matrix with weights
 
      real(kind=rp), allocatable :: fldhat(:,:,:,:) !< transformed Field data
@@ -147,7 +147,7 @@ subroutine cpr_generate_specmat(cpr)
     real(kind=rp) :: L(0:cpr%Xh%lx-1)
     real(kind=rp) :: delta(cpr%Xh%lx)
     integer :: i, kj, j, j2, kk
-    character(len=LOG_SIZE) :: log_buf 
+    character(len=LOG_SIZE) :: log_buf
 
     associate(Xh => cpr%Xh, v=> cpr%v, vt => cpr%vt, &
          vinv => cpr%vinv, vinvt => cpr%vinvt, w => cpr%w)
@@ -159,14 +159,14 @@ subroutine cpr_generate_specmat(cpr)
          L(1) = Xh%zg(j,1)
          do j2 = 2, Xh%lx-1
             L(j2) = ( (2*j2-1) * Xh%zg(j,1) * L(j2-1) &
-                 - (j2-1) * L(j2-2) ) / j2 
+                 - (j2-1) * L(j2-2) ) / j2
          end do
          do kk = 1, Xh%lx
             kj = kj+1
             v(kj,1) = L(KK-1)
          end do
       end do
-      
+
       ! transpose the matrix
       call trsp1(v, Xh%lx) !< non orthogonal wrt weights
 
@@ -174,24 +174,24 @@ subroutine cpr_generate_specmat(cpr)
       do i = 1, Xh%lx
          delta(i) = 2.0_rp / (2*(i-1)+1)
       end do
-      ! modify last entry  
+      ! modify last entry
       delta(Xh%lx) = 2.0_rp / (Xh%lx-1)
-      
+
       ! calculate the inverse to multiply the matrix
       do i = 1, Xh%lx
          delta(i) = sqrt(1.0_rp / delta(i))
       end do
-      ! scale the matrix      
+      ! scale the matrix
       do i = 1, Xh%lx
          do j = 1, Xh%lx
             v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
          end do
       end do
-    
+
       ! get the trasposed
       call copy(vt, v, Xh%lx * Xh%lx)
       call trsp1(vt, Xh%lx)
-      
+
       !populate the mass matrix
       kk = 1
       do i = 1, Xh%lx
@@ -204,15 +204,15 @@ subroutine cpr_generate_specmat(cpr)
             end if
          end do
       end do
-      
+
       !Get the inverse of the transform matrix
       call mxm(vt, Xh%lx, w, Xh%lx, vinv, Xh%lx)
-      
+
       !get the transposed of the inverse
       call copy(vinvt, vinv, Xh%lx * Xh%lx)
       call trsp1(vinvt, Xh%lx)
     end associate
-    
+
   end subroutine cpr_generate_specmat
 
 
@@ -225,14 +225,14 @@ subroutine cpr_goto_space(cpr, space)
     real(kind=rp) :: specmat(cpr%Xh%lx,cpr%Xh%lx)
     real(kind=rp) :: specmatt(cpr%Xh%lx,cpr%Xh%lx)
     integer :: i, j, k, e, nxyz, nelv
-    character(len=LOG_SIZE) :: log_buf 
-    character(len=4) :: space 
+    character(len=LOG_SIZE) :: log_buf
+    character(len=4) :: space
 
     ! define some constants
     nxyz = cpr%Xh%lx*cpr%Xh%lx*cpr%Xh%lx
     nelv = cpr%msh%nelv
 
-    ! Define the matrix according to which transform to do 
+    ! Define the matrix according to which transform to do
     if (space .eq. 'spec') then
        call copy(specmat, cpr%vinv, cpr%Xh%lx*cpr%Xh%lx)
        call copy(specmatt, cpr%vinvt, cpr%Xh%lx*cpr%Xh%lx)
@@ -271,15 +271,15 @@ subroutine cpr_truncate_wn(cpr, coef)
     real(kind=rp) :: vsort(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx)
     real(kind=rp) :: vtrunc(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx)
     real(kind=rp) :: vtemp(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx)
-    real(kind=rp) :: errvec(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) 
-    real(kind=rp) :: fx(cpr%Xh%lx, cpr%Xh%lx) 
-    real(kind=rp) :: fy(cpr%Xh%lx, cpr%Xh%lx) 
-    real(kind=rp) :: fz(cpr%Xh%lx, cpr%Xh%lx) 
+    real(kind=rp) :: errvec(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx)
+    real(kind=rp) :: fx(cpr%Xh%lx, cpr%Xh%lx)
+    real(kind=rp) :: fy(cpr%Xh%lx, cpr%Xh%lx)
+    real(kind=rp) :: fz(cpr%Xh%lx, cpr%Xh%lx)
     real(kind=rp) :: l2norm, oldl2norm, targeterr
     integer :: isort(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx)
     integer :: i, j, k, e, nxyz, nelv
     integer :: kut, kutx, kuty, kutz, nx
-    character(len=LOG_SIZE) :: log_buf 
+    character(len=LOG_SIZE) :: log_buf
 
     ! define some constants
     nx = cpr%Xh%lx
@@ -293,7 +293,7 @@ subroutine cpr_truncate_wn(cpr, coef)
        call copy(vtemp, cpr%fldhat(1,1,1,e), nxyz)
        call copy(vtrunc, cpr%fldhat(1,1,1,e), nxyz)
        ! sort the coefficients by absolute value
-       call sortcoeff(vsort, cpr%fldhat(1,1,1,e), isort, nxyz) 
+       call sortcoeff(vsort, cpr%fldhat(1,1,1,e), isort, nxyz)
        ! initialize values for iterative procedure
        l2norm = 0.0_rp
        kut = 0
@@ -374,10 +374,10 @@ subroutine cpr_truncate_wn(cpr, coef)
 
   end subroutine cpr_truncate_wn
 
-  !> Sort the spectral coefficient in descending order 
+  !> Sort the spectral coefficient in descending order
   !! array vsort. The original indices are stored in the isort vector.
-  subroutine sortcoeff(vsort, v, isort, nxyz) 
-    integer, intent(in) :: nxyz      
+  subroutine sortcoeff(vsort, v, isort, nxyz)
+    integer, intent(in) :: nxyz
     real(kind=rp), intent(inout) :: vsort(nxyz)
     real(kind=rp), intent(inout) :: v(nxyz)
     integer, intent(inout) :: isort(nxyz)
@@ -404,9 +404,9 @@ subroutine sortcoeff(vsort, v, isort, nxyz)
 
   end subroutine sortcoeff
 
-  !> Flip vector b and ind 
+  !> Flip vector b and ind
   subroutine flipv(b, ind, n)
-    integer, intent(in) :: n      
+    integer, intent(in) :: n
     real(kind=rp), intent(inout) :: b(n)
     integer, intent(inout) :: ind(n)
     real(kind=rp) :: temp(n)
@@ -427,7 +427,7 @@ end subroutine flipv
 
   !> sort the array acording to ind vector
   subroutine swap(b, ind, n)
-    integer, intent(in) :: n      
+    integer, intent(in) :: n
     real(kind=rp), intent(inout) :: b(n)
     integer, intent(inout) :: ind(n)
     real(kind=rp) :: temp(n)
@@ -445,7 +445,7 @@ end subroutine swap
 
   !> reorder the array - inverse of swap
   subroutine reord(b, ind, n)
-    integer, intent(in) :: n      
+    integer, intent(in) :: n
     real(kind=rp), intent(inout) :: b(n)
     integer, intent(inout) :: ind(n)
     real(kind=rp) :: temp(n)
@@ -461,10 +461,10 @@ subroutine reord(b, ind, n)
 
   end subroutine reord
 
-  !> create filter transfer function 
-  subroutine build_filter_tf(fx, fy, fz, kut, lx) 
-    integer, intent(in) :: lx      
-    integer, intent(in) :: kut      
+  !> create filter transfer function
+  subroutine build_filter_tf(fx, fy, fz, kut, lx)
+    integer, intent(in) :: lx
+    integer, intent(in) :: kut
     real(kind=rp), intent(inout) :: fx(lx,lx)
     real(kind=rp), intent(inout) :: fy(lx,lx)
     real(kind=rp), intent(inout) :: fz(lx,lx)
@@ -532,7 +532,7 @@ function get_elem_l2norm(elemdata, coef, space, e) result(l2e)
     real(kind=rp) :: elemdata(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx)
     real(kind=rp) :: vole, suma, l2e
     integer i, e, eg, nxyz
-    character(len=4) :: space 
+    character(len=4) :: space
 
     ! Get the volume of the element
     nxyz = coef%Xh%lx*coef%Xh%lx*coef%Xh%lx
diff --git a/src/sem/dofmap.f90 b/src/sem/dofmap.f90
index 36d136005bb..16f4e0b4018 100644
--- a/src/sem/dofmap.f90
+++ b/src/sem/dofmap.f90
@@ -46,7 +46,7 @@ module dofmap
   use element, only : element_t
   use quad, only : quad_t
   use hex, only : hex_t
-  use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR    
+  use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR
   implicit none
   private
 
@@ -76,7 +76,7 @@ module dofmap
   interface dofmap_t
      module procedure dofmap_init
   end interface dofmap_t
-  
+
 contains
 
   function dofmap_init(msh, Xh) result(this)
@@ -96,11 +96,11 @@ function dofmap_init(msh, Xh) result(this)
     this%Xh => Xh
 
     this%ntot = Xh%lx* Xh%ly * Xh%lz * msh%nelv
-        
+
     !
     ! Assign a unique id for all dofs
-    ! 
-    
+    !
+
     allocate(this%dof(Xh%lx, Xh%ly, Xh%lz, msh%nelv))
     allocate(this%shared_dof(Xh%lx, Xh%ly, Xh%lz, msh%nelv))
 
@@ -120,28 +120,31 @@ function dofmap_init(msh, Xh) result(this)
     !
     ! Generate x,y,z-coordinates for all dofs
     !
-    
+
     allocate(this%x(Xh%lx, Xh%ly, Xh%lz, msh%nelv))
     allocate(this%y(Xh%lx, Xh%ly, Xh%lz, msh%nelv))
     allocate(this%z(Xh%lx, Xh%ly, Xh%lz, msh%nelv))
-    
+
     this%x = 0d0
     this%y = 0d0
     this%z = 0d0
     !> @note should be intialised differently in axissymmetric case
 
-    call dofmap_generate_xyz(this)    
+    call dofmap_generate_xyz(this)
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
        call device_map(this%x, this%x_d, this%ntot)
        call device_map(this%y, this%y_d, this%ntot)
        call device_map(this%z, this%z_d, this%ntot)
 
-       call device_memcpy(this%x, this%x_d, this%ntot, HOST_TO_DEVICE)
-       call device_memcpy(this%y, this%y_d, this%ntot, HOST_TO_DEVICE)
-       call device_memcpy(this%z, this%z_d, this%ntot, HOST_TO_DEVICE)
+       call device_memcpy(this%x, this%x_d, this%ntot, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%y, this%y_d, this%ntot, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%z, this%z_d, this%ntot, &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
-    
+
   end function dofmap_init
 
   !> Deallocate the dofmap
@@ -185,7 +188,7 @@ subroutine dofmap_free(this)
     if (c_associated(this%z_d)) then
        call device_free(this%z_d)
     end if
-    
+
   end subroutine dofmap_free
 
   !> Return the total number of dofs in the dofmap, lx*ly*lz*nelv
@@ -222,15 +225,15 @@ subroutine dofmap_number_edges(this)
     type(mesh_t), pointer :: msh
     type(space_t), pointer :: Xh
     integer :: i,j,k
-    integer :: global_id        
+    integer :: global_id
     type(tuple_i4_t) :: edge
     integer(kind=i8) :: num_dofs_edges(3) ! #dofs for each dir (r, s, t)
     integer(kind=i8) :: edge_id, edge_offset
     logical :: shared_dof
-        
+
     msh => this%msh
     Xh => this%Xh
-    
+
     ! Number of dofs on an edge excluding end-points
     num_dofs_edges(1) =  int(Xh%lx - 2, i8)
     num_dofs_edges(2) =  int(Xh%ly - 2, i8)
@@ -238,12 +241,12 @@ subroutine dofmap_number_edges(this)
     edge_offset = int(msh%glb_mpts, i8) + int(1, 4)
 
     do i = 1, msh%nelv
-       
+
        select type(ep=>msh%elements(i)%e)
        type is (hex_t)
           !
           ! Number edges in r-direction
-          ! 
+          !
           call ep%edge_id(edge, 1)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -256,7 +259,7 @@ subroutine dofmap_number_edges(this)
              this%shared_dof(k, 1, 1, i) = shared_dof
              edge_id = edge_id + 1
           end do
-          
+
           call ep%edge_id(edge, 3)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -296,7 +299,7 @@ subroutine dofmap_number_edges(this)
 
           !
           ! Number edges in s-direction
-          ! 
+          !
           call ep%edge_id(edge, 5)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -308,7 +311,7 @@ subroutine dofmap_number_edges(this)
              this%shared_dof(1, k, 1, i) = shared_dof
              edge_id = edge_id + 1
           end do
-          
+
           call ep%edge_id(edge, 7)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -339,7 +342,7 @@ subroutine dofmap_number_edges(this)
           edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(2)
           do j = 2, Xh%ly - 1
              k = j
-             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,Xh%lz,i)) k = Xh%lz+1-j 
+             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,Xh%lz,i)) k = Xh%lz+1-j
              this%dof(Xh%lx, k, Xh%lz, i) = edge_id
              this%shared_dof(Xh%lx, k, Xh%lz, i) = shared_dof
              edge_id = edge_id + 1
@@ -347,26 +350,26 @@ subroutine dofmap_number_edges(this)
 
           !
           ! Number edges in t-direction
-          ! 
+          !
           call ep%edge_id(edge, 9)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
           edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3)
           do j = 2, Xh%lz - 1
              k = j
-             if(int(edge%x(1), i8) .ne. this%dof(1,1,1,i)) k = Xh%lz+1-j 
+             if(int(edge%x(1), i8) .ne. this%dof(1,1,1,i)) k = Xh%lz+1-j
              this%dof(1, 1, k, i) = edge_id
              this%shared_dof(1, 1, k, i) = shared_dof
              edge_id = edge_id + 1
           end do
-          
+
           call ep%edge_id(edge, 10)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
           edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3)
           do j = 2, Xh%lz - 1
              k = j
-             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,1,i)) k = Xh%lz+1-j 
+             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,1,i)) k = Xh%lz+1-j
              this%dof(Xh%lx, 1, k, i) = edge_id
              this%shared_dof(Xh%lx, 1, k, i) = shared_dof
              edge_id = edge_id + 1
@@ -378,7 +381,7 @@ subroutine dofmap_number_edges(this)
           edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3)
           do j = 2, Xh%lz - 1
              k = j
-             if(int(edge%x(1), i8) .ne. this%dof(1,Xh%ly,1,i)) k = Xh%lz+1-j 
+             if(int(edge%x(1), i8) .ne. this%dof(1,Xh%ly,1,i)) k = Xh%lz+1-j
              this%dof(1, Xh%ly, k, i) = edge_id
              this%shared_dof(1, Xh%ly, k, i) = shared_dof
              edge_id = edge_id + 1
@@ -390,7 +393,7 @@ subroutine dofmap_number_edges(this)
           edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3)
           do j = 2, Xh%lz - 1
              k = j
-             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,Xh%ly,1,i)) k = Xh%lz+1-j 
+             if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,Xh%ly,1,i)) k = Xh%lz+1-j
              this%dof(Xh%lx, Xh%ly, k, i) = edge_id
              this%shared_dof(Xh%lx, Xh%ly, k, i) = shared_dof
              edge_id = edge_id + 1
@@ -398,7 +401,7 @@ subroutine dofmap_number_edges(this)
        type is (quad_t)
           !
           ! Number edges in r-direction
-          ! 
+          !
           call ep%facet_id(edge, 3)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -426,7 +429,7 @@ subroutine dofmap_number_edges(this)
 
           !
           ! Number edges in s-direction
-          ! 
+          !
           call ep%facet_id(edge, 1)
           shared_dof = msh%is_shared(edge)
           global_id = msh%get_global(edge)
@@ -450,9 +453,9 @@ subroutine dofmap_number_edges(this)
              this%shared_dof(Xh%lx, k, 1, i) = shared_dof
              edge_id = edge_id + 1
           end do
-          
+
        end select
-       
+
     end do
   end subroutine dofmap_number_edges
 
@@ -460,13 +463,13 @@ end subroutine dofmap_number_edges
   subroutine dofmap_number_faces(this)
     type(dofmap_t), target :: this
     type(mesh_t), pointer :: msh
-    type(space_t), pointer :: Xh    
+    type(space_t), pointer :: Xh
     integer :: i,j,k
     integer :: global_id
     type(tuple4_i4_t) :: face, face_order
-    integer(kind=i8) :: num_dofs_faces(3) ! #dofs for each dir (r, s, t)        
+    integer(kind=i8) :: num_dofs_faces(3) ! #dofs for each dir (r, s, t)
     integer(kind=i8) :: facet_offset, facet_id
-    logical :: shared_dof        
+    logical :: shared_dof
 
     msh => this%msh
     Xh => this%Xh
@@ -481,7 +484,7 @@ subroutine dofmap_number_faces(this)
     num_dofs_faces(3) =  int((Xh%lx - 2) * (Xh%ly - 2), i8)
 
     do i = 1, msh%nelv
-       
+
        !
        ! Number facets in r-direction (s, t)-plane
        !
@@ -497,7 +500,7 @@ subroutine dofmap_number_faces(this)
              this%shared_dof(1, j, k, i) = shared_dof
           end do
        end do
-       
+
        call msh%elements(i)%e%facet_id(face, 2)
        call msh%elements(i)%e%facet_order(face_order, 2)
        shared_dof = msh%is_shared(face)
@@ -527,7 +530,7 @@ subroutine dofmap_number_faces(this)
              this%shared_dof(j, 1, k, i) = shared_dof
           end do
        end do
-       
+
        call msh%elements(i)%e%facet_id(face, 4)
        call msh%elements(i)%e%facet_order(face_order, 4)
        shared_dof = msh%is_shared(face)
@@ -557,7 +560,7 @@ subroutine dofmap_number_faces(this)
              this%shared_dof(j, k, 1, i) = shared_dof
           end do
        end do
-       
+
        call msh%elements(i)%e%facet_id(face, 6)
        call msh%elements(i)%e%facet_order(face_order, 6)
        shared_dof = msh%is_shared(face)
@@ -574,57 +577,57 @@ subroutine dofmap_number_faces(this)
 
   end subroutine dofmap_number_faces
 
- !> Get idx for GLL point on face depending on face ordering k and j
+  !> Get idx for GLL point on face depending on face ordering k and j
   function dofmap_facetidx(face_order, face, facet_id, k1, j1, lk1, lj1) result(facet_idx)
-     type(tuple4_i4_t) :: face_order, face
-     integer(kind=i8) :: facet_idx, facet_id
-     integer :: k1, j1, lk1, lj1
-     integer :: k,j,lk,lj
-     
-     k = k1 - 2
-     j = j1 - 2
-     lk = lk1 - 2
-     lj = lj1 - 2
-
-     ! Given the indexes k,j for a GLL point on the inner part of the
-     ! face, we assign a unique number to it that depends on the
-     ! corner with the lowest id and its neighbour with the lowest
-     ! id. The id is assigned in this way to be consistent regardless
-     ! of how the faces are rotated or mirrored.
-     !
-     !   4 -------- 3
-     !     |      |      k
-     !     |----->|      ^
-     !     |----->|      |
-     !     |----->|      |
-     !   1 -------- 2    0--->j
-     
-
-     if(face_order%x(1) .eq. face%x(1)) then
+    type(tuple4_i4_t) :: face_order, face
+    integer(kind=i8) :: facet_idx, facet_id
+    integer :: k1, j1, lk1, lj1
+    integer :: k,j,lk,lj
+
+    k = k1 - 2
+    j = j1 - 2
+    lk = lk1 - 2
+    lj = lj1 - 2
+
+    ! Given the indexes k,j for a GLL point on the inner part of the
+    ! face, we assign a unique number to it that depends on the
+    ! corner with the lowest id and its neighbour with the lowest
+    ! id. The id is assigned in this way to be consistent regardless
+    ! of how the faces are rotated or mirrored.
+    !
+    !   4 -------- 3
+    !     |      |      k
+    !     |----->|      ^
+    !     |----->|      |
+    !     |----->|      |
+    !   1 -------- 2    0--->j
+
+
+    if(face_order%x(1) .eq. face%x(1)) then
        if(face_order%x(2) .lt. face_order%x(4)) then
-         facet_idx = facet_id + j + k*lj 
+          facet_idx = facet_id + j + k*lj
        else
-         facet_idx = facet_id + j*lk + k 
-      end if
-     else  if(face_order%x(2) .eq. face%x(1)) then
+          facet_idx = facet_id + j*lk + k
+       end if
+    else  if(face_order%x(2) .eq. face%x(1)) then
        if(face_order%x(3) .lt. face_order%x(1)) then
-         facet_idx = facet_id + lk*(lj-1-j) + k 
+          facet_idx = facet_id + lk*(lj-1-j) + k
        else
-         facet_idx = facet_id + (lj-1-j) + k*lj
-      end if
-     else if(face_order%x(3) .eq. face%x(1)) then
+          facet_idx = facet_id + (lj-1-j) + k*lj
+       end if
+    else if(face_order%x(3) .eq. face%x(1)) then
        if(face_order%x(4) .lt. face_order%x(2)) then
-         facet_idx = facet_id + (lj-1-j) + lj*(lk-1-k) 
+          facet_idx = facet_id + (lj-1-j) + lj*(lk-1-k)
        else
-         facet_idx = facet_id + lk*(lj-1-j) + (lk-1-k)
-      end if
-     else if(face_order%x(4) .eq. face%x(1)) then
+          facet_idx = facet_id + lk*(lj-1-j) + (lk-1-k)
+       end if
+    else if(face_order%x(4) .eq. face%x(1)) then
        if(face_order%x(1) .lt. face_order%x(3)) then
-         facet_idx = facet_id + lk*j + (lk-1-k) 
+          facet_idx = facet_id + lk*j + (lk-1-k)
        else
-         facet_idx = facet_id + j + lj*(lk-1-k)
-      end if
-   end if
+          facet_idx = facet_id + j + lj*(lk-1-k)
+       end if
+    end if
 
   end function dofmap_facetidx
 
@@ -641,18 +644,18 @@ subroutine dofmap_generate_xyz(this)
 
     msh => this%msh
     Xh => this%Xh
-    
+
     if (msh%gdim .eq. 3) then
        n_edge = 12
     else
        n_edge = 4
     end if
 
-    do i = 1, msh%nelv       
+    do i = 1, msh%nelv
        call dofmap_xyzlin(Xh, msh, msh%elements(i)%e, this%x(1,1,1,i), &
-                          this%y(1,1,1,i), this%z(1,1,1,i)) 
+                          this%y(1,1,1,i), this%z(1,1,1,i))
     end do
-    do i =1, msh%curve%size 
+    do i =1, msh%curve%size
        midpoint = .false.
        el_idx = msh%curve%curve_el(i)%el_idx
        curve_type = msh%curve%curve_el(i)%curve_type
@@ -668,7 +671,7 @@ subroutine dofmap_generate_xyz(this)
                               this%z(1,1,1,el_idx),curve_type, curve_data_tot)
        end if
     end do
-    do i =1, msh%curve%size 
+    do i =1, msh%curve%size
        el_idx = msh%curve%curve_el(i)%el_idx
        do j = 1, 8
           if (msh%curve%curve_el(i)%curve_type(j) .eq. 3) then
@@ -677,7 +680,7 @@ subroutine dofmap_generate_xyz(this)
                               this%x(1,1,1,el_idx), &
                               this%y(1,1,1,el_idx), &
                               this%z(1,1,1, el_idx), &
-                              Xh, msh%elements(el_idx)%e, msh%gdim) 
+                              Xh, msh%elements(el_idx)%e, msh%gdim)
           end if
        end do
     end do
@@ -706,19 +709,19 @@ subroutine dofmap_xyzlin(Xh, msh, element, x, y, z)
     real(kind=rp) :: jxt(Xh%lx*2), jyt(Xh%lx*2), jzt(Xh%lx*2)
     real(kind=rp) :: w(4*Xh%lx**3), tmp(Xh%lx, Xh%lx, Xh%lx)
     real(kind=rp), dimension(2), parameter :: zlin = (/-1d0, 1d0/)
-    
+
     integer :: j, k
 
     zgml = 0d0
     xyzb = 0d0
- 
+
     w = 0d0
-    call copy(zgml(1,1), Xh%zg(1,1), Xh%lx)                               
+    call copy(zgml(1,1), Xh%zg(1,1), Xh%lx)
     call copy(zgml(1,2), Xh%zg(1,2), Xh%ly)
     if (msh%gdim .gt. 2) then
        call copy(zgml(1,3), Xh%zg(1,3), Xh%lz)
     end if
-    
+
     k = 1
     do j = 1, Xh%lx
        call fd_weights_full(zgml(j,1), zlin, 1, 0, jxt(k))
@@ -797,9 +800,9 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data)
           y3(eindx(k),1,1) = curve_data(2,k)
           z3(eindx(k),1,1) = curve_data(3,k)
        end if
-    end do 
+    end do
     zg(1) = -1
-    zg(2) =  0 
+    zg(2) =  0
     zg(3) =  1
     if (msh%gdim .eq. 3) then
        call gh_face_extend_3d(x3,zg,3,2,w(1,1),w(1,2)) ! 2 --> edge extend
@@ -810,7 +813,7 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data)
        call gh_face_extend_2d(x3,zg,3,2,w(1,1),w(1,2)) ! 2 --> edge extend
        call gh_face_extend_2d(y3,zg,3,2,w(1,1),w(1,2))
     end if
-    k =1 
+    k =1
     do j = 1, Xh%lx
        call fd_weights_full(Xh%zg(j,1),zquad,2,0,jxt(k))
        call fd_weights_full(Xh%zg(j,2),zquad,2,0,jyt(k))
@@ -837,151 +840,151 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data)
     call Xh3%free()
   end subroutine dofmap_xyzquad
 
- !> Extend faces into interior via gordon hall
- !! gh_type:  1 - vertex only                   
- !!           2 - vertex and edges
- !!           3 - vertex, edges, and faces      
- !! Original in Nek5000/core/navier5.f
- subroutine gh_face_extend_3d(x, zg, n, gh_type, e, v)
-   integer, intent(in) :: n
-   real(kind=rp), intent(inout) ::  x(n, n, n)
-   real(kind=rp), intent(in) ::  zg(n)
-   real(kind=rp), intent(inout) ::  e(n, n, n)
-   real(kind=rp), intent(inout) ::  v(n, n, n)
-   integer :: gh_type, ntot, kk, jj, ii, k, j, i
-   real(kind=rp) :: si, sj, sk, hi, hj, hk
-   
+  !> Extend faces into interior via gordon hall
+  !! gh_type:  1 - vertex only
+  !!           2 - vertex and edges
+  !!           3 - vertex, edges, and faces
+  !! Original in Nek5000/core/navier5.f
+  subroutine gh_face_extend_3d(x, zg, n, gh_type, e, v)
+    integer, intent(in) :: n
+    real(kind=rp), intent(inout) ::  x(n, n, n)
+    real(kind=rp), intent(in) ::  zg(n)
+    real(kind=rp), intent(inout) ::  e(n, n, n)
+    real(kind=rp), intent(inout) ::  v(n, n, n)
+    integer :: gh_type, ntot, kk, jj, ii, k, j, i
+    real(kind=rp) :: si, sj, sk, hi, hj, hk
+
 !
 !  Build vertex interpolant
 !
-   ntot = n**3
-   call rzero(v, ntot)
-   do kk = 1, n, n-1
-      do jj = 1, n, n-1
-         do ii = 1, n, n-1
-            do k = 1, n
-               do j = 1, n
-                  do i = 1, n
-                     si       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
-                     sj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
-                     sk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
-                     v(i,j,k) = v(i,j,k) + si*sj*sk*x(ii,jj,kk)
-                  end do
-               end do
-            end do
-         end do
-      end do
-   end do
-
-   if (gh_type .eq. 1) then
-      call copy(x, v, ntot)
-      return
-   end if
+    ntot = n**3
+    call rzero(v, ntot)
+    do kk = 1, n, n-1
+       do jj = 1, n, n-1
+          do ii = 1, n, n-1
+             do k = 1, n
+                do j = 1, n
+                   do i = 1, n
+                      si       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
+                      sj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
+                      sk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
+                      v(i,j,k) = v(i,j,k) + si*sj*sk*x(ii,jj,kk)
+                   end do
+                end do
+             end do
+          end do
+       end do
+    end do
+
+    if (gh_type .eq. 1) then
+       call copy(x, v, ntot)
+       return
+    end if
 !
 !
 !  Extend 12 edges
-   call rzero(e, ntot)
+    call rzero(e, ntot)
 !
 !  x-edges
 !
-   do kk = 1, n, n-1
-      do jj = 1, n, n-1
-         do k = 1, n
-            do j = 1, n
-               do i = 1, n
-                  hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
-                  hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
-                  e(i,j,k) = e(i,j,k) + hj*hk*(x(i,jj,kk)-v(i,jj,kk))
-               end do
-            end do
-         end do
-      end do
-   end do
+    do kk = 1, n, n-1
+       do jj = 1, n, n-1
+          do k = 1, n
+             do j = 1, n
+                do i = 1, n
+                   hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
+                   hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
+                   e(i,j,k) = e(i,j,k) + hj*hk*(x(i,jj,kk)-v(i,jj,kk))
+                end do
+             end do
+          end do
+       end do
+    end do
 !
 !  y-edges
 !
-   do kk = 1, n, n-1
-      do ii = 1, n, n-1
-         do k = 1, n
-            do j = 1, n
-               do i = 1, n
-                  hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
-                  hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
-                  e(i,j,k) = e(i,j,k) + hi*hk*(x(ii,j,kk)-v(ii,j,kk))
-               end do
-            end do
-         end do
-      end do
-   end do
+    do kk = 1, n, n-1
+       do ii = 1, n, n-1
+          do k = 1, n
+             do j = 1, n
+                do i = 1, n
+                   hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
+                   hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
+                   e(i,j,k) = e(i,j,k) + hi*hk*(x(ii,j,kk)-v(ii,j,kk))
+                end do
+             end do
+          end do
+       end do
+    end do
 !
 !  z-edges
 !
-   do jj = 1, n, n-1
-      do ii = 1, n, n-1
-         do k = 1, n
-            do j = 1, n
-               do i = 1, n
-                  hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
-                  hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
-                  e(i,j,k) = e(i,j,k) + hi*hj*(x(ii,jj,k)-v(ii,jj,k))
-               end do
-            end do
-         end do
-      end do
-   end do
-   
-   call add2(e, v, ntot)
-   
-   if (gh_type .eq. 2) then
-      call copy(x, e, ntot)
-      return
-   end if
+    do jj = 1, n, n-1
+       do ii = 1, n, n-1
+          do k = 1, n
+             do j = 1, n
+                do i = 1, n
+                   hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
+                   hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
+                   e(i,j,k) = e(i,j,k) + hi*hj*(x(ii,jj,k)-v(ii,jj,k))
+                end do
+             end do
+          end do
+       end do
+    end do
+
+    call add2(e, v, ntot)
+
+    if (gh_type .eq. 2) then
+       call copy(x, e, ntot)
+       return
+    end if
 !
 !  Extend faces
 !
-   call rzero(v, ntot)
+    call rzero(v, ntot)
 !
 !  x-edges
 !
-   do ii = 1, n, n-1
-      do k = 1, n
-         do j = 1, n
-            do i = 1, n
-               hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
-               v(i,j,k) = v(i,j,k) + hi*(x(ii,j,k)-e(ii,j,k))
-            end do
-         end do
-      end do
-   end do
+    do ii = 1, n, n-1
+       do k = 1, n
+          do j = 1, n
+             do i = 1, n
+                hi       = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1)
+                v(i,j,k) = v(i,j,k) + hi*(x(ii,j,k)-e(ii,j,k))
+             end do
+          end do
+       end do
+    end do
 !
 ! y-edges
 !
-   do jj = 1, n, n-1
-      do k = 1, n
-         do j = 1, n
-            do i = 1, n
-               hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
-               v(i,j,k) = v(i,j,k) + hj*(x(i,jj,k)-e(i,jj,k))
-            end do
-         end do
-      end do
-   end do
-! 
+    do jj = 1, n, n-1
+       do k = 1, n
+          do j = 1, n
+             do i = 1, n
+                hj       = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1)
+                v(i,j,k) = v(i,j,k) + hj*(x(i,jj,k)-e(i,jj,k))
+             end do
+          end do
+       end do
+    end do
+!
 !  z-edges
-! 
-   do kk= 1 , n, n-1
-      do k = 1, n
-         do j = 1, n
-            do i = 1, n
-               hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
-               v(i,j,k) = v(i,j,k) + hk*(x(i,j,kk)-e(i,j,kk))
-            end do
-         end do
-      end do
-   end do
-   
-   call add2(v, e, ntot)
-   call copy(x, v ,ntot)
+!
+    do kk= 1 , n, n-1
+       do k = 1, n
+          do j = 1, n
+             do i = 1, n
+                hk       = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1)
+                v(i,j,k) = v(i,j,k) + hk*(x(i,j,kk)-e(i,j,kk))
+             end do
+          end do
+       end do
+    end do
+
+    call add2(v, e, ntot)
+    call copy(x, v ,ntot)
 
   end subroutine gh_face_extend_3d
 
@@ -996,7 +999,7 @@ subroutine gh_face_extend_2d(x, zg, n, gh_type, e, v)
     real(kind=rp), intent(inout) :: v(n, n)
     integer, intent(in) :: gh_type
     integer :: i,j , jj, ii, ntot
-    real(kind=rp) :: si, sj, hi, hj 
+    real(kind=rp) :: si, sj, hi, hj
 
     !Build vertex interpolant
 
@@ -1054,18 +1057,18 @@ subroutine arc_surface(isid, curve_data, x, y, z, Xh, element, gdim)
     type(space_t), intent(in) :: Xh
     class(element_t) :: element
     real(kind=rp), dimension(5), intent(in) :: curve_data
-    real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz), intent(inout) :: x, y, z 
+    real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz), intent(inout) :: x, y, z
     real(kind=rp) :: pt1x, pt1y, pt2x, pt2y, pt12x, pt12y
-    real(kind=rp) :: radius, gap, dtheta, r, xys 
+    real(kind=rp) :: radius, dtheta, r, xys
     real(kind=rp) :: theta0, xcenn, ycenn, h(Xh%lx, 3, 2)
     real(kind=rp) :: xcrved(Xh%lx), ycrved(Xh%lx), xs, ys
     integer :: isid1, ixt, iyt, izt, ix, itmp
     integer(i4),  dimension(6), parameter :: fcyc_to_sym = (/3, 2, 4, 1, 5, 6/) ! cyclic to symmetric face mapping
     integer(i4),  dimension(12), parameter :: ecyc_to_sym = (/1, 6, 2, 5, 3, 8, &
-         & 4, 7, 9, 10, 12, 11/) ! cyclic to symmetric edge mapping
+    & 4, 7, 9, 10, 12, 11/) ! cyclic to symmetric edge mapping
     integer, parameter, dimension(2, 12) :: edge_nodes = reshape((/1, 2, 3, 4, 5, 6, &
-         & 7, 8, 1, 3, 2, 4, 5, 7, 6, 8, 1, 5, 2, 6, 3, 7, 4, 8/), (/2,12/)) ! symmetric edge to vertex mapping
-                                                                           ! copy from hex as this has private attribute there
+    & 7, 8, 1, 3, 2, 4, 5, 7, 6, 8, 1, 5, 2, 6, 3, 7, 4, 8/), (/2,12/)) ! symmetric edge to vertex mapping
+    ! copy from hex as this has private attribute there
 
     ! this subroutine is a mess of symmetric and cyclic edge/face numberring and
     ! cannot be cleaned without changing an input format (isid seems to be
@@ -1092,7 +1095,7 @@ subroutine arc_surface(isid, curve_data, x, y, z, Xh, element, gdim)
     xys = sqrt(xs**2 + ys**2)
     ! sanity check
     if (abs(2.0 * radius) <= xys * 1.00001) &
-         & call neko_error('Radius to small for arced element surface')
+    & call neko_error('Radius to small for arced element surface')
     ! find center
     dtheta = abs(asin(0.5*xys/radius))
     pt12x  = (pt1x + pt2x)/2.0
@@ -1137,18 +1140,18 @@ subroutine compute_h(h, zgml, gdim, lx)
     integer, intent(in) :: lx, gdim
     real(kind=rp), intent(inout) ::  h(lx, 3, 2)
     real(kind=rp), intent(in) :: zgml(lx, 3)
-    integer :: ix, iy, iz 
+    integer :: ix, iy, iz
 
     do ix = 1, lx
        h(ix,1,1) = (1.0_rp - zgml(ix, 1)) * 0.5_rp
        h(ix,1,2) = (1.0_rp + zgml(ix, 1)) * 0.5_rp
     end do
-    
+
     do iy = 1, lx
        h(iy,2,1) = (1.0_rp - zgml(iy, 2)) * 0.5_rp
        h(iy,2,2) = (1.0_rp + zgml(iy, 2)) * 0.5_rp
     end do
-    
+
     if (gdim .eq. 3) then
        do iz = 1, lx
           h(iz,3,1) = (1.0_rp - zgml(iz, 3)) * 0.5_rp
@@ -1158,7 +1161,7 @@ subroutine compute_h(h, zgml, gdim, lx)
        call rone(h(1,3,1), lx)
        call rone(h(1,3,2), lx)
     end if
-    
+
   end subroutine compute_h
-  
+
 end module dofmap
diff --git a/src/sem/interpolation.f90 b/src/sem/interpolation.f90
index 66be00d23dc..ec3ae35cdae 100644
--- a/src/sem/interpolation.f90
+++ b/src/sem/interpolation.f90
@@ -36,13 +36,9 @@ module interpolation
   use num_types, only : rp
   use device
   use fast3d
-  use field, only : field_t
   use tensor, only : tnsr3d
   use tensor_cpu, only : tnsr3d_cpu
   use space, only : space_t, operator(.eq.), GL, GLL
-  use mxm_wrapper, only : mxm
-  use coefs, only : coef_t
-  use point, only : point_t
   implicit none
   private
 
@@ -119,10 +115,14 @@ subroutine interpolator_init(this, Xh, Yh)
        call device_map(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx)
        call device_map(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx)
        call device_map(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx)
-       call device_memcpy(this%Xh_to_Yh, this%Xh_Yh_d, Yh%lx*Xh%lx, HOST_TO_DEVICE)
-       call device_memcpy(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx, HOST_TO_DEVICE)
-       call device_memcpy(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx, HOST_TO_DEVICE)
-       call device_memcpy(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx, HOST_TO_DEVICE)
+       call device_memcpy(this%Xh_to_Yh, this%Xh_Yh_d, Yh%lx*Xh%lx, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx, &
+                          HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx, &
+                          HOST_TO_DEVICE, sync=.false.)
     end if
 
   end subroutine interpolator_init
diff --git a/src/sem/local_interpolation.f90 b/src/sem/local_interpolation.f90
index c88865d3f02..07fac665c48 100644
--- a/src/sem/local_interpolation.f90
+++ b/src/sem/local_interpolation.f90
@@ -30,7 +30,7 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Routines to obtain interpolated values on a set of points with known 
+!> Routines to obtain interpolated values on a set of points with known
 !! rst coordinates in elements local to this process.
 module local_interpolation
   use tensor, only: triple_tensor_product, tnsr3d_el_list
@@ -84,7 +84,7 @@ subroutine local_interpolator_init(this, Xh, r, s, t, n_points)
     class(local_interpolator_t), intent(inout), target :: this
     type(space_t), intent(in), target :: Xh
     integer, intent(in) :: n_points
-    real(kind=rp) :: r(n_points), s(n_points), t(n_points) 
+    real(kind=rp) :: r(n_points), s(n_points), t(n_points)
     integer :: size_weights
     call this%free()
     if ((Xh%t .eq. GL) .or. (Xh%t .eq. GLL)) then
diff --git a/src/sem/map_1d.f90 b/src/sem/map_1d.f90
new file mode 100644
index 00000000000..28481d6f23c
--- /dev/null
+++ b/src/sem/map_1d.f90
@@ -0,0 +1,220 @@
+
+!> Creates a 1d GLL point map along a specified direction based on the connectivity in the mesh.
+module map_1d
+  use num_types, only: rp
+  use space, only: space_t
+  use dofmap, only: dofmap_t
+  use gather_scatter
+  use mesh, only: mesh_t
+  use device
+  use comm
+  use logger, only: neko_log, LOG_SIZE
+  use utils, only: neko_error, neko_warning
+  use math, only: glmax, glmin, glimax, glimin, relcmp
+  use neko_mpi_types
+  use, intrinsic :: iso_c_binding
+  implicit none
+  private
+  !> Type that encapsulates a mapping from each gll point in the mesh 
+  !! to its corresponding (global) GLL point index in one direction.
+  !! @remark Could also be rather easily extended to say polar coordinates as well.
+  type, public :: map_1d_t
+     !> Checks whether the specified direction is in the r,s, or t direction for each element.
+     integer, allocatable :: dir_el(:)
+     !> Checks which level an element belongs to.
+     integer, allocatable :: el_lvl(:)
+     !> Checks which level or id in the 1D GLL mapping each point in the dofmap is.
+     integer, allocatable :: pt_lvl(:,:,:,:) 
+     !> Number of elements stacked on top of eachother in the specified direction
+     integer :: n_el_lvls
+     !> Dofmap
+     type(dofmap_t), pointer :: dof => null()
+     !> Mesh
+     type(mesh_t), pointer :: msh => null()
+     !> The specified direction in which we create the 1D mapping
+     integer :: dir
+     !> Tolerance for the mesh
+     real(kind=rp) :: tol = 1e-7
+   contains
+     !> Constructor
+     procedure, pass(this) :: init => map_1d_init
+     !> Destructor
+     procedure, pass(this) :: free => map_1d_free
+  end type map_1d_t
+
+
+contains
+  
+  subroutine map_1d_init(this, dof, gs, dir, tol)
+    class(map_1d_t) :: this
+    type(dofmap_t), target, intent(in) :: dof
+    type(gs_t), intent(inout) :: gs
+    integer, intent(in) :: dir
+    real(kind=rp), intent(in) :: tol
+    integer :: nelv, lx, n, i, e, lvl
+    real(kind=rp), contiguous, pointer :: line(:,:,:,:)
+    real(kind=rp), allocatable :: min_vals(:,:,:,:)
+    type(c_ptr) :: min_vals_d = c_null_ptr
+    real(kind=rp) :: el_dim(3,3), glb_min, glb_max, el_min
+    call this%free()
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+        if (pe_rank .eq. 0) then
+           call neko_warning('map_1d does not copy indices to device, but ok if used on cpu')
+        end if
+    end if
+    
+    this%dir = dir
+    this%dof => dof
+    this%msh => dof%msh
+    nelv = this%msh%nelv
+    lx = this%dof%Xh%lx
+    n = dof%size()
+  
+    if (dir .eq. 1) then
+        line => dof%x
+    else if (dir .eq. 2) then
+        line => dof%y
+    else if(dir .eq. 3) then
+        line => dof%z
+    else
+        call neko_error('Invalid dir for geopmetric comm')
+    end if
+    
+    allocate(this%dir_el(nelv))
+    allocate(this%el_lvl(nelv))
+    allocate(min_vals(lx, lx, lx, nelv))
+    allocate(this%pt_lvl(lx, lx, lx, nelv))
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_map(min_vals,min_vals_d,n)
+    end if
+
+    do i = 1, nelv
+       !store which direction r,s,t corresponds to speciifed direction, x,y,z
+       !we assume elements are stacked on each other...
+       ! Check which one of the normalized vectors are closest to dir
+       ! If we want to incorporate other directions, we should look here
+       el_dim(1,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(2)%p%x)
+       el_dim(1,:) = el_dim(1,:)/norm2(el_dim(1,:))
+       el_dim(2,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(3)%p%x)
+       el_dim(2,:) = el_dim(2,:)/norm2(el_dim(2,:))
+       el_dim(3,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(5)%p%x)
+       el_dim(3,:) = el_dim(3,:)/norm2(el_dim(3,:))
+       ! Checks which directions in rst the xyz corresponds to
+       ! 1 corresponds to r, 2 to s, 3 to t and are stored in dir_el
+       this%dir_el(i) = maxloc(el_dim(:,this%dir),dim=1)
+    end do
+    glb_min =  glmin(line,n)
+    glb_max =  glmax(line,n)
+
+    i = 1
+    this%el_lvl = -1
+    ! Check what the mimum value in each element and put in min_vals
+    do e = 1, nelv
+       el_min = minval(line(:,:,:,e))
+       min_vals(:,:,:,e) = el_min
+       ! Check if this element is on the bottom, in this case assign el_lvl = i = 1 
+       if (relcmp(el_min, glb_min, this%tol)) then
+          if(this%el_lvl(e) .eq. -1) this%el_lvl(e) = i
+       end if
+    end do
+    ! While loop where at each iteation the global maximum value propagates down one level.
+    ! When the minumum value has propagated to the highest level this stops.
+    ! Only works when the bottom plate of the domain is flat.
+    do while (.not. relcmp(glmax(min_vals,n), glb_min, this%tol))
+      i = i + 1
+      do e = 1, nelv
+         !Sets the value at the bottom of each element to glb_max
+         if (this%dir_el(e) .eq. 1) then
+            if (line(1,1,1,e) .gt. line(lx,1,1,e)) then
+               min_vals(lx,:,:,e) = glb_max
+            else
+               min_vals(1,:,:,e) = glb_max
+            end if
+         end if
+         if (this%dir_el(e) .eq. 2) then
+            if (line(1,1,1,e) .gt. line(1,lx,1,e)) then
+               min_vals(:,lx,:,e) = glb_max
+            else
+               min_vals(:,1,:,e) = glb_max
+            end if
+         end if
+         if (this%dir_el(e) .eq. 3) then
+            if (line(1,1,1,e) .gt. line(1,1,lx,e)) then
+               min_vals(:,:,lx,e) = glb_max
+            else
+               min_vals(:,:,1,e) = glb_max
+            end if
+         end if
+      end do
+      if (NEKO_BCKND_DEVICE .eq. 1) &
+         call device_memcpy(min_vals, min_vals_d, n,&
+                            HOST_TO_DEVICE, sync=.false.)
+      !Propagates the minumum value along the element boundary.
+      call gs%op(min_vals,n,GS_OP_MIN)
+      if (NEKO_BCKND_DEVICE .eq. 1) &
+          call device_memcpy(min_vals, min_vals_d, n,&
+                             DEVICE_TO_HOST, sync=.true.)
+      !Checks the new minimum value on each element
+      !Assign this value to all points in this element in min_val
+      !If the element has not already been assinged a level, 
+      !and it has obtained the minval, set el_lvl = i
+      do e = 1, nelv
+         el_min = minval(min_vals(:,:,:,e))
+         min_vals(:,:,:,e) = el_min
+         if (relcmp(el_min, glb_min, this%tol)) then
+            if (this%el_lvl(e) .eq. -1) this%el_lvl(e) = i
+         end if
+      end do
+    end do
+    this%n_el_lvls = glimax(this%el_lvl,nelv)
+    if ( pe_rank .eq. 0) then
+       write(*,*) 'Number of element levels', this%n_el_lvls
+    end if
+    !Numbers the points in each element based on the element level
+    !and its orientation
+    do e = 1, nelv
+       do i = 1, lx
+         lvl = lx * (this%el_lvl(e) - 1) + i
+         if (this%dir_el(e) .eq. 1) then
+            if (line(1,1,1,e) .gt. line(lx,1,1,e)) then
+               this%pt_lvl(lx-i+1,:,:,e) = lvl
+            else
+               this%pt_lvl(i,:,:,e) = lvl
+            end if
+         end if
+         if (this%dir_el(e) .eq. 2) then
+            if (line(1,1,1,e) .gt. line(1,lx,1,e)) then
+               this%pt_lvl(:,lx-i+1,:,e) = lvl
+            else
+               this%pt_lvl(:,i,:,e) = lvl
+            end if
+         end if
+         if (this%dir_el(e) .eq. 3) then
+            if (line(1,1,1,e) .gt. line(1,1,lx,e)) then
+               this%pt_lvl(:,:,lx-i+1,e) = lvl
+            else
+               this%pt_lvl(:,:,i,e) = lvl
+            end if
+         end if
+       end do
+    end do
+    call device_deassociate(min_vals)
+    call device_free(min_vals_d)
+    deallocate(min_vals)
+  end subroutine map_1d_init
+
+  subroutine map_1d_free(this)
+    class(map_1d_t) :: this
+
+    if(allocated(this%dir_el)) deallocate(this%dir_el)
+    if(allocated(this%el_lvl)) deallocate(this%el_lvl)
+    if(allocated(this%pt_lvl)) deallocate(this%pt_lvl)
+    if(associated(this%dof)) nullify(this%dof)
+    if(associated(this%msh)) nullify(this%msh)
+    this%dir = 0
+    this%n_el_lvls = 0
+
+  end subroutine map_1d_free
+
+end module map_1d
diff --git a/src/sem/point_interpolator.f90 b/src/sem/point_interpolator.f90
index 2411b120a71..0017d5cdbe3 100644
--- a/src/sem/point_interpolator.f90
+++ b/src/sem/point_interpolator.f90
@@ -30,24 +30,22 @@
 ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 ! POSSIBILITY OF SUCH DAMAGE.
 !
-!> Routines to interpolate fields on a given element 
+!> Routines to interpolate fields on a given element
 !! on a point in that element with given r,s,t coordinates
 module point_interpolator
-  use tensor, only: triple_tensor_product, tnsr3d_el_list
+  use tensor, only: triple_tensor_product
   use space, only: space_t, GL, GLL
   use num_types, only: rp
   use point, only: point_t
   use math, only: abscmp
-  use fast3d, only: fd_weights_full, setup_intp
+  use fast3d, only: fd_weights_full
   use utils, only: neko_error
-  use field, only: field_t
-  use field_list, only: field_list_t
   use device
   use device_math, only: device_rzero
   use neko_config, only: NEKO_BCKND_DEVICE
   implicit none
   private
-  
+
   !> Field interpolator to arbitrary points within an element.
   !! Tailored for experimentation, and convenience, not performance
   !! Does all interpolation on the CPU.
diff --git a/src/sem/space.f90 b/src/sem/space.f90
index c6b4fa259b0..29e7801997f 100644
--- a/src/sem/space.f90
+++ b/src/sem/space.f90
@@ -36,7 +36,7 @@ module space
   use num_types, only : rp
   use speclib
   use device
-  use utils, only : neko_error    
+  use utils, only : neko_error
   use fast3d, only : setup_intp
   use math
   use tensor, only : trsp1
@@ -44,7 +44,7 @@ module space
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   integer, public, parameter :: GL = 0, GLL = 1, GJ = 2
 
   !> The function space for the SEM solution fields
@@ -52,7 +52,7 @@ module space
   !! In SEM, the solution fields are represented as a linear combination of
   !! basis functions for a particular function space. Thus, the properties of
   !! the space define that of the solution. The global SEM basis is never build,
-  !! but is implictly defined by the local basis for each element. 
+  !! but is implictly defined by the local basis for each element.
   !! The latter is polynomial, and is therefore defined by the order of the
   !! polys and the selected locations of the nodes for storing the solution.
   !! In SEM, the latter coincides with a Gaussian quadrature (GL, GLL, etc.)
@@ -68,9 +68,9 @@ module space
      integer :: lyz             !< Number of points in yz-plane
      integer :: lxz             !< Number of points in xz-plane
      integer :: lxyz            !< Number of points in xyz-block
-     
+
      real(kind=rp), allocatable :: zg(:,:) !< Quadrature points
-     
+
      real(kind=rp), allocatable :: dr_inv(:) !< 1/dist quadrature points
      real(kind=rp), allocatable :: ds_inv(:) !< 1/dist quadrature points
      real(kind=rp), allocatable :: dt_inv(:) !< 1/dist quadrature points
@@ -94,7 +94,7 @@ module space
      real(kind=rp), allocatable :: dyt(:,:)
      !> Transposed derivative operator \f$ D_3^T \f$
      real(kind=rp), allocatable :: dzt(:,:)
-     
+
      !> Legendre transformation matrices
      real(kind=rp), allocatable :: v(:,:)        !< legendre to physical
      real(kind=rp), allocatable :: vt(:,:)       !< legendre to physical t
@@ -140,7 +140,7 @@ module space
   end interface operator(.ne.)
 
   public :: operator(.eq.), operator(.ne.)
-  
+
 contains
 
   !> Initialize a function space @a s with given polynomial dimensions
@@ -151,7 +151,7 @@ subroutine space_init(s, t, lx, ly, lz)
     integer, intent(in) :: ly           !< Polynomial dimension in y-direction
     integer, optional, intent(in) :: lz !< Polynomial dimension in z-direction
     integer :: ix, iy, iz
- 
+
     call space_free(s)
 
     s%lx = lx
@@ -180,7 +180,7 @@ subroutine space_init(s, t, lx, ly, lz)
     allocate(s%wx(s%lx))
     allocate(s%wy(s%ly))
     allocate(s%wz(s%lz))
-    
+
     allocate(s%dr_inv(s%lx))
     allocate(s%ds_inv(s%ly))
     allocate(s%dt_inv(s%lz))
@@ -194,13 +194,13 @@ subroutine space_init(s, t, lx, ly, lz)
     allocate(s%dxt(s%lx, s%lx))
     allocate(s%dyt(s%ly, s%ly))
     allocate(s%dzt(s%lz, s%lz))
-    
+
     allocate(s%v(s%lx, s%lx))
     allocate(s%vt(s%lx, s%lx))
     allocate(s%vinv(s%lx, s%lx))
     allocate(s%vinvt(s%lx, s%lx))
     allocate(s%w(s%lx, s%lx))
-    
+
     ! Call low-level routines to compute nodes and quadrature weights
     if (t .eq. GLL) then
        call zwgll(s%zg(1,1), s%wx, s%lx)
@@ -233,27 +233,27 @@ subroutine space_init(s, t, lx, ly, lz)
     end do
     !> Setup derivative matrices
     if (t .eq. GLL) then
-        call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
-        call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
-        if (s%lz .gt. 1) then
-           call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
-        else
-           s%dz = 0d0
-           s%dzt = 0d0
-        end if
+       call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx)
+       call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly)
+       if (s%lz .gt. 1) then
+          call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz)
+       else
+          s%dz = 0d0
+          s%dzt = 0d0
+       end if
     else if (t .eq. GL) then
        call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1)
        call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1)
-        if (s%lz .gt. 1) then
-           call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
-        else
-           s%dz = 0d0
-           s%dzt = 0d0
-        end if
-     else
-        call neko_error("Invalid quadrature rule")
-     end if
-    
+       if (s%lz .gt. 1) then
+          call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1)
+       else
+          s%dz = 0d0
+          s%dzt = 0d0
+       end if
+    else
+       call neko_error("Invalid quadrature rule")
+    end if
+
     call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx)
     call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly)
     if (s%lz .gt. 1) then
@@ -282,29 +282,29 @@ subroutine space_init(s, t, lx, ly, lz)
        call device_map(s%vinvt, s%vinvt_d, s%lxy)
        call device_map(s%w,     s%w_d,     s%lxy)
 
-       call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%wx, s%wx_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%wy, s%wy_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%wz, s%wz_d, s%lx, HOST_TO_DEVICE)
-       call device_memcpy(s%dx, s%dx_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%dy, s%dy_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%dz, s%dz_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%dxt, s%dxt_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%dyt, s%dyt_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%dzt, s%dzt_d, s%lxy, HOST_TO_DEVICE)
-       call device_memcpy(s%w3, s%w3_d, s%lxyz, HOST_TO_DEVICE)
+       call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%wx, s%wx_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%wy, s%wy_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%wz, s%wz_d, s%lx, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dx, s%dx_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dy, s%dy_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dz, s%dz_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dxt, s%dxt_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dyt, s%dyt_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%dzt, s%dzt_d, s%lxy, HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(s%w3, s%w3_d, s%lxyz, HOST_TO_DEVICE, sync=.false.)
 
        ix = s%lx * 3
        call device_map(s%zg, s%zg_d, ix)
-       call device_memcpy(s%zg, s%zg_d, ix, HOST_TO_DEVICE)
+       call device_memcpy(s%zg, s%zg_d, ix, HOST_TO_DEVICE, sync=.false.)
     end if
-    
+
     call space_generate_transformation_matrices(s)
 
   end subroutine space_init
-   
+
   !> Deallocate a space @a s
   subroutine space_free(s)
     class(space_t), intent(inout) :: s
@@ -352,19 +352,19 @@ subroutine space_free(s)
     if (allocated(s%dzt)) then
        deallocate(s%dzt)
     end if
-    
+
     if (allocated(s%dr_inv)) then
        deallocate(s%dr_inv)
     end if
-    
+
     if (allocated(s%ds_inv)) then
        deallocate(s%ds_inv)
     end if
-    
+
     if (allocated(s%dt_inv)) then
        deallocate(s%dt_inv)
     end if
-    
+
     if(allocated(s%v)) then
        deallocate(s%v)
     end if
@@ -388,7 +388,7 @@ subroutine space_free(s)
     !
     ! Cleanup the device (if present)
     !
-    
+
     if (c_associated(s%dr_inv_d)) then
        call device_free(s%dr_inv_d)
     end if
@@ -412,7 +412,7 @@ subroutine space_free(s)
     if (c_associated(s%dzt_d)) then
        call device_free(s%dzt_d)
     end if
-    
+
     if (c_associated(s%dx_d)) then
        call device_free(s%dx_d)
     end if
@@ -444,7 +444,7 @@ subroutine space_free(s)
     if (c_associated(s%zg_d)) then
        call device_free(s%zg_d)
     end if
-    
+
     if (c_associated(s%v_d)) then
        call device_free(s%v_d)
     end if
@@ -481,7 +481,7 @@ pure function space_eq(Xh, Yh) result(res)
     else
        res = .false.
     end if
-    
+
   end function space_eq
 
   !> Check if \f$ X_h \ne Y_H \f$
@@ -498,9 +498,9 @@ pure function space_ne(Xh, Yh) result(res)
     else
        res = .true.
     end if
-    
+
   end function space_ne
-  
+
   subroutine space_compute_dist(dx, x, lx)
     integer, intent(in) :: lx
     real(kind=rp), intent(inout) :: dx(lx), x(lx)
@@ -514,13 +514,13 @@ subroutine space_compute_dist(dx, x, lx)
        dx(i) = 1.0_rp / dx(i)
     end do
   end subroutine space_compute_dist
-  
-  
+
+
   !> Generate spectral tranform matrices
   !! @param Xh SEM function space.
   subroutine space_generate_transformation_matrices(Xh)
     type(space_t), intent(inout) :: Xh
-    
+
     real(kind=rp) :: L(0:Xh%lx-1)
     real(kind=rp) :: delta(Xh%lx)
     integer :: i, kj, j, j2, kk
@@ -535,7 +535,7 @@ subroutine space_generate_transformation_matrices(Xh)
          L(1) = Xh%zg(j,1)
          do j2 = 2, Xh%lx-1
             L(j2) = ( (2*j2-1) * Xh%zg(j,1) * L(j2-1) &
-                  - (j2-1) * L(j2-2) ) / j2 
+                  - (j2-1) * L(j2-2) ) / j2
          end do
          do kk = 1, Xh%lx
             kj = kj+1
@@ -550,14 +550,14 @@ subroutine space_generate_transformation_matrices(Xh)
       do i = 1, Xh%lx
          delta(i) = 2.0_rp / (2*(i-1)+1)
       end do
-      ! modify last entry  
+      ! modify last entry
       delta(Xh%lx) = 2.0_rp / (Xh%lx-1)
 
       ! calculate the inverse to multiply the matrix
       do i = 1, Xh%lx
          delta(i) = sqrt(1.0_rp / delta(i))
       end do
-      ! scale the matrix      
+      ! scale the matrix
       do i = 1, Xh%lx
          do j = 1, Xh%lx
             v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights
@@ -590,20 +590,20 @@ subroutine space_generate_transformation_matrices(Xh)
     end associate
 
     ! Copy the data to the GPU
-    ! Move all this to space.f90 to for next version 
+    ! Move all this to space.f90 to for next version
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
-    (NEKO_BCKND_OPENCL .eq. 1)) then 
+    (NEKO_BCKND_OPENCL .eq. 1)) then
 
        call device_memcpy(Xh%v,     Xh%v_d,     Xh%lxy, &
-                          HOST_TO_DEVICE)
+                          HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(Xh%vt,    Xh%vt_d,    Xh%lxy, &
-                          HOST_TO_DEVICE)
+                          HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(Xh%vinv,  Xh%vinv_d,  Xh%lxy, &
-                          HOST_TO_DEVICE)
+                          HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(Xh%vinvt, Xh%vinvt_d, Xh%lxy, &
-                          HOST_TO_DEVICE)
+                          HOST_TO_DEVICE, sync=.false.)
        call device_memcpy(Xh%w,     Xh%w_d,     Xh%lxy, &
-                          HOST_TO_DEVICE)
+                          HOST_TO_DEVICE, sync=.false.)
 
     end if
 
diff --git a/src/sem/speclib.f90 b/src/sem/speclib.f90
index bcf054be8f4..6c0d136b082 100644
--- a/src/sem/speclib.f90
+++ b/src/sem/speclib.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !==============================================================================
@@ -150,21 +150,21 @@ module speclib
 
 
 contains
-      !> Generate `NP` Gauss Legendre points `Z` and weights `W`
-      !! associated with Jacobi polynomial \f$ P(N)(\alpha=0, \beta=0) \f$.
-      !! The polynomial degree `N = NP-1`.
-      !! @param Z Quadrature points.
-      !! @param W Quadrature weights.
-      !! @param NP Number of quadrature points.
-      SUBROUTINE ZWGL (Z,W,NP)
-      REAL(KIND=RP) Z(1),W(1), ALPHA, BETA
-      ALPHA = 0.
-      BETA  = 0.
-      CALL ZWGJ (Z,W,NP,ALPHA,BETA)
-      RETURN
-      END SUBROUTINE
+  !> Generate `NP` Gauss Legendre points `Z` and weights `W`
+  !! associated with Jacobi polynomial \f$ P(N)(\alpha=0, \beta=0) \f$.
+  !! The polynomial degree `N = NP-1`.
+  !! @param Z Quadrature points.
+  !! @param W Quadrature weights.
+  !! @param NP Number of quadrature points.
+  SUBROUTINE ZWGL (Z,W,NP)
+    REAL(KIND=RP) Z(1),W(1), ALPHA, BETA
+    ALPHA = 0.
+    BETA  = 0.
+    CALL ZWGJ (Z,W,NP,ALPHA,BETA)
+    RETURN
+  end subroutine ZWGL
 
-      SUBROUTINE ZWGLL (Z,W,NP)
+  SUBROUTINE ZWGLL (Z,W,NP)
 !--------------------------------------------------------------------
 !
 !     Generate NP Gauss-Lobatto Legendre points (Z) and weights (W)
@@ -174,14 +174,14 @@ SUBROUTINE ZWGLL (Z,W,NP)
 !     operations are done in double precision.
 !
 !--------------------------------------------------------------------
-      REAL(KIND=RP) Z(1),W(1), ALPHA, BETA
-      ALPHA = 0.
-      BETA  = 0.
-      CALL ZWGLJ (Z,W,NP,ALPHA,BETA)
-      RETURN
-      END SUBROUTINE
+    REAL(KIND=RP) Z(1),W(1), ALPHA, BETA
+    ALPHA = 0.
+    BETA  = 0.
+    CALL ZWGLJ (Z,W,NP,ALPHA,BETA)
+    RETURN
+  end subroutine ZWGLL
 
-      SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA)
+  SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA)
 !--------------------------------------------------------------------
 !
 !     Generate NP GAUSS JACOBI points (Z) and weights (W)
@@ -190,29 +190,29 @@ SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA)
 !     Single precision version.
 !
 !--------------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZD = NMAX)
-      REAL(KIND=RP)  ZD(NZD),WD(NZD),ALPHAD,BETAD
-      REAL(KIND=RP) Z(1),W(1),ALPHA,BETA
+    PARAMETER (NMAX=84)
+    PARAMETER (NZD = NMAX)
+    REAL(KIND=RP)  ZD(NZD),WD(NZD),ALPHAD,BETAD
+    REAL(KIND=RP) Z(1),W(1),ALPHA,BETA
 
-      NPMAX = NZD
-      IF (NP.GT.NPMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in ZWGJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here NP=',NP
-         call neko_error
-      ENDIF
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD)
-      DO 100 I=1,NP
-         Z(I) = ZD(I)
-         W(I) = WD(I)
- 100  CONTINUE
-      RETURN
-      END SUBROUTINE
+    NPMAX = NZD
+    IF (NP.GT.NPMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in ZWGJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here NP=',NP
+       call neko_error
+    ENDIF
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD)
+    DO 100 I=1,NP
+       Z(I) = ZD(I)
+       W(I) = WD(I)
+100 CONTINUE
+    RETURN
+  end subroutine ZWGJ
 
-      SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA)
+  SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA)
 !--------------------------------------------------------------------
 !
 !     Generate NP GAUSS JACOBI points (Z) and weights (W)
@@ -221,50 +221,50 @@ SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA)
 !     Double precision version.
 !
 !--------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  Z(1),W(1),ALPHA,BETA
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  Z(1),W(1),ALPHA,BETA
 
-      N     = NP-1
-      DN    = ((N))
-      ONE   = 1.
-      TWO   = 2.
-      APB   = ALPHA+BETA
+    N     = NP-1
+    DN    = ((N))
+    ONE   = 1.
+    TWO   = 2.
+    APB   = ALPHA+BETA
 
-      IF (NP.LE.0) THEN
-         WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np
-         call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
-         WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    IF (NP.LE.0) THEN
+       WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np
+       call neko_error
+    ENDIF
+    IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+       WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      IF (NP.EQ.1) THEN
-         Z(1) = (BETA-ALPHA)/(APB+TWO)
-         W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) &
+    IF (NP.EQ.1) THEN
+       Z(1) = (BETA-ALPHA)/(APB+TWO)
+       W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) &
               * TWO**(APB+ONE)
-         RETURN
-      ENDIF
+       RETURN
+    ENDIF
 
-      CALL JACG (Z,NP,ALPHA,BETA)
+    CALL JACG (Z,NP,ALPHA,BETA)
 
-      NP1   = N+1
-      NP2   = N+2
-      DNP1  = ((NP1))
-      DNP2  = ((NP2))
-      FAC1  = DNP1+ALPHA+BETA+ONE
-      FAC2  = FAC1+DNP1
-      FAC3  = FAC2+ONE
-      FNORM = PNORMJ(NP1,ALPHA,BETA)
-      RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2)
-      DO 100 I=1,NP
-         CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I))
-         W(I) = -RCOEF/(P*PDM1)
- 100  CONTINUE
-      RETURN
-      END SUBROUTINE
+    NP1   = N+1
+    NP2   = N+2
+    DNP1  = ((NP1))
+    DNP2  = ((NP2))
+    FAC1  = DNP1+ALPHA+BETA+ONE
+    FAC2  = FAC1+DNP1
+    FAC3  = FAC2+ONE
+    FNORM = PNORMJ(NP1,ALPHA,BETA)
+    RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2)
+    DO 100 I=1,NP
+       CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I))
+       W(I) = -RCOEF/(P*PDM1)
+100 CONTINUE
+    RETURN
+  end subroutine ZWGJD
 
-      SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA)
+  SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA)
 !--------------------------------------------------------------------
 !
 !     Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W)
@@ -273,29 +273,29 @@ SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA)
 !     Single precision version.
 !
 !--------------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZD = NMAX)
-      REAL(KIND=RP)  ZD(NZD),WD(NZD),ALPHAD,BETAD
-      REAL(KIND=RP) Z(1),W(1),ALPHA,BETA
+    PARAMETER (NMAX=84)
+    PARAMETER (NZD = NMAX)
+    REAL(KIND=RP)  ZD(NZD),WD(NZD),ALPHAD,BETAD
+    REAL(KIND=RP) Z(1),W(1),ALPHA,BETA
 
-      NPMAX = NZD
-      IF (NP.GT.NPMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in ZWGLJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here NP=',NP
-         call neko_error
-      ENDIF
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD)
-      DO 100 I=1,NP
-         Z(I) = ZD(I)
-         W(I) = WD(I)
- 100  CONTINUE
-      RETURN
-      END SUBROUTINE
+    NPMAX = NZD
+    IF (NP.GT.NPMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in ZWGLJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here NP=',NP
+       call neko_error
+    ENDIF
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD)
+    DO 100 I=1,NP
+       Z(I) = ZD(I)
+       W(I) = WD(I)
+100 CONTINUE
+    RETURN
+  end subroutine ZWGLJ
 
-      SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA)
+  SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA)
 !--------------------------------------------------------------------
 !
 !     Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W)
@@ -304,181 +304,181 @@ SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA)
 !     Double precision version.
 !
 !--------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  Z(NP),W(NP),ALPHA,BETA
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  Z(NP),W(NP),ALPHA,BETA
 
-      N     = NP-1
-      NM1   = N-1
-      ONE   = 1.
-      TWO   = 2.
+    N     = NP-1
+    NM1   = N-1
+    ONE   = 1.
+    TWO   = 2.
 
-      IF (NP.LE.1) THEN
+    IF (NP.LE.1) THEN
        WRITE (6,*) 'ZWGLJD: Minimum number of Gauss-Lobatto points is 2'
        WRITE (6,*) 'ZWGLJD: alpha,beta:',alpha,beta,np
        call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
-         WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    ENDIF
+    IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+       WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      IF (NM1.GT.0) THEN
-         ALPG  = ALPHA+ONE
-         BETG  = BETA+ONE
-         CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG)
-      ENDIF
-      Z(1)  = -ONE
-      Z(NP) =  ONE
-      DO 100  I=2,NP-1
-         W(I) = W(I)/(ONE-Z(I)**2)
- 100  CONTINUE
-      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1))
-      W(1)  = ENDW1 (N,ALPHA,BETA)/(TWO*PD)
-      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP))
-      W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD)
+    IF (NM1.GT.0) THEN
+       ALPG  = ALPHA+ONE
+       BETG  = BETA+ONE
+       CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG)
+    ENDIF
+    Z(1)  = -ONE
+    Z(NP) =  ONE
+    DO 100  I=2,NP-1
+       W(I) = W(I)/(ONE-Z(I)**2)
+100 CONTINUE
+    CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1))
+    W(1)  = ENDW1 (N,ALPHA,BETA)/(TWO*PD)
+    CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP))
+    W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD)
 
 !      RETURN
-      END SUBROUTINE
+  end subroutine ZWGLJD
 
-      REAL(KIND=RP)  FUNCTION ENDW1 (N,ALPHA,BETA)
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  ALPHA,BETA
-      ZERO  = 0.
-      ONE   = 1.
-      TWO   = 2.
-      THREE = 3.
-      FOUR  = 4.
-      APB   = ALPHA+BETA
-      IF (N.EQ.0) THEN
-         ENDW1 = ZERO
-         RETURN
-      ENDIF
-      F1   = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
-      F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
-      IF (N.EQ.1) THEN
-         ENDW1 = F1
-         RETURN
-      ENDIF
-      FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
-      FINT1 = FINT1*TWO**(APB+TWO)
-      FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
-      FINT2 = FINT2*TWO**(APB+THREE)
-      F2    = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) &
+  REAL(KIND=RP)  FUNCTION ENDW1 (N,ALPHA,BETA)
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  ALPHA,BETA
+    ZERO  = 0.
+    ONE   = 1.
+    TWO   = 2.
+    THREE = 3.
+    FOUR  = 4.
+    APB   = ALPHA+BETA
+    IF (N.EQ.0) THEN
+       ENDW1 = ZERO
+       RETURN
+    ENDIF
+    F1   = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
+    F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
+    IF (N.EQ.1) THEN
+       ENDW1 = F1
+       RETURN
+    ENDIF
+    FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE)
+    FINT1 = FINT1*TWO**(APB+TWO)
+    FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
+    FINT2 = FINT2*TWO**(APB+THREE)
+    F2    = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) &
            * (APB+THREE)/FOUR
-      IF (N.EQ.2) THEN
-         ENDW1 = F2
-         RETURN
-      ENDIF
-      DO 100 I=3,N
-         DI   = ((I-1))
-         ABN  = ALPHA+BETA+DI
-         ABNN = ABN+DI
-         A1   = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
-         A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
-         A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
-         F3   =  -(A2*F2+A1*F1)/A3
-         F1   = F2
-         F2   = F3
- 100  CONTINUE
-      ENDW1  = F3
-      RETURN
-      END FUNCTION
+    IF (N.EQ.2) THEN
+       ENDW1 = F2
+       RETURN
+    ENDIF
+    DO 100 I=3,N
+       DI   = ((I-1))
+       ABN  = ALPHA+BETA+DI
+       ABNN = ABN+DI
+       A1   = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
+       A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
+       A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
+       F3   =  -(A2*F2+A1*F1)/A3
+       F1   = F2
+       F2   = F3
+100 CONTINUE
+    ENDW1  = F3
+    RETURN
+  end function ENDW1
 
-      REAL(KIND=RP)  FUNCTION ENDW2 (N,ALPHA,BETA)
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  ALPHA,BETA
-      ZERO  = 0.
-      ONE   = 1.
-      TWO   = 2.
-      THREE = 3.
-      FOUR  = 4.
-      APB   = ALPHA+BETA
-      IF (N.EQ.0) THEN
-         ENDW2 = ZERO
-         RETURN
-      ENDIF
-      F1   = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
-      F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
-      IF (N.EQ.1) THEN
-         ENDW2 = F1
-         RETURN
-      ENDIF
-      FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
-      FINT1 = FINT1*TWO**(APB+TWO)
-      FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
-      FINT2 = FINT2*TWO**(APB+THREE)
-      F2    = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) &
+  REAL(KIND=RP)  FUNCTION ENDW2 (N,ALPHA,BETA)
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  ALPHA,BETA
+    ZERO  = 0.
+    ONE   = 1.
+    TWO   = 2.
+    THREE = 3.
+    FOUR  = 4.
+    APB   = ALPHA+BETA
+    IF (N.EQ.0) THEN
+       ENDW2 = ZERO
+       RETURN
+    ENDIF
+    F1   = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
+    F1   = F1*(APB+TWO)*TWO**(APB+TWO)/TWO
+    IF (N.EQ.1) THEN
+       ENDW2 = F1
+       RETURN
+    ENDIF
+    FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE)
+    FINT1 = FINT1*TWO**(APB+TWO)
+    FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR)
+    FINT2 = FINT2*TWO**(APB+THREE)
+    F2    = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) &
            * (APB+THREE)/FOUR
-      IF (N.EQ.2) THEN
-         ENDW2 = F2
-         RETURN
-      ENDIF
-      DO 100 I=3,N
-         DI   = ((I-1))
-         ABN  = ALPHA+BETA+DI
-         ABNN = ABN+DI
-         A1   =  -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
-         A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
-         A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
-         F3   =  -(A2*F2+A1*F1)/A3
-         F1   = F2
-         F2   = F3
- 100  CONTINUE
-      ENDW2  = F3
-      RETURN
-      END FUNCTION
+    IF (N.EQ.2) THEN
+       ENDW2 = F2
+       RETURN
+    ENDIF
+    DO 100 I=3,N
+       DI   = ((I-1))
+       ABN  = ALPHA+BETA+DI
+       ABNN = ABN+DI
+       A1   =  -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE))
+       A2   =  (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO))
+       A3   =  (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE))
+       F3   =  -(A2*F2+A1*F1)/A3
+       F1   = F2
+       F2   = F3
+100 CONTINUE
+    ENDW2  = F3
+    RETURN
+  end function ENDW2
 
-      REAL(KIND=RP)  FUNCTION GAMMAF (X)
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  X
-      ZERO = 0.0
-      HALF = 0.5
-      ONE  = 1.0
-      TWO  = 2.0
-      FOUR = 4.0
-      PI   = FOUR*ATAN(ONE)
-      GAMMAF = ONE
-      IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI)
-      IF (X.EQ. HALF) GAMMAF =  SQRT(PI)
-      IF (X.EQ. ONE ) GAMMAF =  ONE
-      IF (X.EQ. TWO ) GAMMAF =  ONE
-      IF (X.EQ. 1.5  ) GAMMAF =  SQRT(PI)/2.
-      IF (X.EQ. 2.5) GAMMAF =  1.5*SQRT(PI)/2.
-      IF (X.EQ. 3.5) GAMMAF =  0.5*(2.5*(1.5*SQRT(PI)))
-      IF (X.EQ. 3. ) GAMMAF =  2.
-      IF (X.EQ. 4. ) GAMMAF = 6.
-      IF (X.EQ. 5. ) GAMMAF = 24.
-      IF (X.EQ. 6. ) GAMMAF = 120.
-      RETURN
-      END FUNCTION
+  REAL(KIND=RP)  FUNCTION GAMMAF (X)
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  X
+    ZERO = 0.0
+    HALF = 0.5
+    ONE  = 1.0
+    TWO  = 2.0
+    FOUR = 4.0
+    PI   = FOUR*ATAN(ONE)
+    GAMMAF = ONE
+    IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI)
+    IF (X.EQ. HALF) GAMMAF =  SQRT(PI)
+    IF (X.EQ. ONE ) GAMMAF =  ONE
+    IF (X.EQ. TWO ) GAMMAF =  ONE
+    IF (X.EQ. 1.5  ) GAMMAF =  SQRT(PI)/2.
+    IF (X.EQ. 2.5) GAMMAF =  1.5*SQRT(PI)/2.
+    IF (X.EQ. 3.5) GAMMAF =  0.5*(2.5*(1.5*SQRT(PI)))
+    IF (X.EQ. 3. ) GAMMAF =  2.
+    IF (X.EQ. 4. ) GAMMAF = 6.
+    IF (X.EQ. 5. ) GAMMAF = 24.
+    IF (X.EQ. 6. ) GAMMAF = 120.
+    RETURN
+  end function GAMMAF
 
-      REAL(KIND=RP)  FUNCTION PNORMJ (N,ALPHA,BETA)
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  ALPHA,BETA
-      ONE   = 1.
-      TWO   = 2.
-      DN    = ((N))
-      CONST = ALPHA+BETA+ONE
-      IF (N.LE.1) THEN
-         PROD   = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA)
-         PROD   = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA))
-         PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
-         RETURN
-      ENDIF
-      PROD  = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)
-      PROD  = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE))
-      PROD  = PROD*(ONE+ALPHA)*(TWO+ALPHA)
-      PROD  = PROD*(ONE+BETA)*(TWO+BETA)
-      DO 100 I=3,N
-         DINDX = ((I))
-         FRAC  = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA))
-         PROD  = PROD*FRAC
- 100  CONTINUE
-      PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
-      RETURN
-      END FUNCTION
+  REAL(KIND=RP)  FUNCTION PNORMJ (N,ALPHA,BETA)
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  ALPHA,BETA
+    ONE   = 1.
+    TWO   = 2.
+    DN    = ((N))
+    CONST = ALPHA+BETA+ONE
+    IF (N.LE.1) THEN
+       PROD   = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA)
+       PROD   = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA))
+       PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
+       RETURN
+    ENDIF
+    PROD  = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)
+    PROD  = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE))
+    PROD  = PROD*(ONE+ALPHA)*(TWO+ALPHA)
+    PROD  = PROD*(ONE+BETA)*(TWO+BETA)
+    DO 100 I=3,N
+       DINDX = ((I))
+       FRAC  = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA))
+       PROD  = PROD*FRAC
+100 CONTINUE
+    PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST)
+    RETURN
+  end function PNORMJ
 
-      SUBROUTINE JACG (XJAC,NP,ALPHA,BETA)
+  SUBROUTINE JACG (XJAC,NP,ALPHA,BETA)
 !--------------------------------------------------------------------
 !
 !     Compute NP Gauss points XJAC, which are the zeros of the
@@ -489,94 +489,94 @@ SUBROUTINE JACG (XJAC,NP,ALPHA,BETA)
 !     ALPHA = BETA = -0.5  ->  Chebyshev points
 !
 !--------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  XJAC(1)
-      DATA KSTOP /10/
-      DATA EPS/1.0e-12_RP/
-      N   = NP-1
-      one = 1.
-      DTH = 4.*ATAN(one)/(2.*((N))+2.)
-      DO 40 J=1,NP
-         IF (J.EQ.1) THEN
-            X = COS((2.*(((J))-1.)+1.)*DTH)
-         ELSE
-            X1 = COS((2.*(((J))-1.)+1.)*DTH)
-            X2 = XLAST
-            X  = (X1+X2)/2.
-         ENDIF
-         DO 30 K=1,KSTOP
-            CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X)
-            RECSUM = 0.
-            JM = J-1
-            DO 29 I=1,JM
-               RECSUM = RECSUM+1./(X-XJAC(NP-I+1))
- 29         CONTINUE
-            DELX = -P/(PD-RECSUM*P)
-            X    = X+DELX
-            IF (ABS(DELX) .LT. EPS) GOTO 31
- 30      CONTINUE
- 31      CONTINUE
-         XJAC(NP-J+1) = X
-         XLAST        = X
- 40   CONTINUE
-      DO 200 I=1,NP
-         XMIN = 2.
-         DO 100 J=I,NP
-            IF (XJAC(J).LT.XMIN) THEN
-               XMIN = XJAC(J)
-               JMIN = J
-            ENDIF
- 100     CONTINUE
-         IF (JMIN.NE.I) THEN
-            SWAP = XJAC(I)
-            XJAC(I) = XJAC(JMIN)
-            XJAC(JMIN) = SWAP
-         ENDIF
- 200  CONTINUE
-      RETURN
-      END SUBROUTINE
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  XJAC(1)
+    DATA KSTOP /10/
+    DATA EPS/1.0e-12_RP/
+    N   = NP-1
+    one = 1.
+    DTH = 4.*ATAN(one)/(2.*((N))+2.)
+    DO 40 J=1,NP
+       IF (J.EQ.1) THEN
+          X = COS((2.*(((J))-1.)+1.)*DTH)
+       ELSE
+          X1 = COS((2.*(((J))-1.)+1.)*DTH)
+          X2 = XLAST
+          X  = (X1+X2)/2.
+       ENDIF
+       DO 30 K=1,KSTOP
+          CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X)
+          RECSUM = 0.
+          JM = J-1
+          DO 29 I=1,JM
+             RECSUM = RECSUM+1./(X-XJAC(NP-I+1))
+29        CONTINUE
+          DELX = -P/(PD-RECSUM*P)
+          X    = X+DELX
+          IF (ABS(DELX) .LT. EPS) GOTO 31
+30     CONTINUE
+31     CONTINUE
+       XJAC(NP-J+1) = X
+       XLAST        = X
+40  CONTINUE
+    DO 200 I=1,NP
+       XMIN = 2.
+       DO 100 J=I,NP
+          IF (XJAC(J).LT.XMIN) THEN
+             XMIN = XJAC(J)
+             JMIN = J
+          ENDIF
+100    CONTINUE
+       IF (JMIN.NE.I) THEN
+          SWAP = XJAC(I)
+          XJAC(I) = XJAC(JMIN)
+          XJAC(JMIN) = SWAP
+       ENDIF
+200 CONTINUE
+    RETURN
+  end subroutine JACG
 
-      SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2,N,ALP,BET,X)
+  SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2,N,ALP,BET,X)
 !--------------------------------------------------------------------
 !
 !     Computes the Jacobi polynomial (POLY) and its derivative (PDER)
 !     of degree N at X.
 !
 !--------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      APB  = ALP+BET
-      POLY = 1.
-      PDER = 0.
-      IF (N .EQ. 0) RETURN
-      POLYL = POLY
-      PDERL = PDER
-      POLY  = (ALP-BET+(APB+2.)*X)/2.
-      PDER  = (APB+2.)/2.
-      IF (N .EQ. 1) RETURN
-      DO 20 K=2,N
-         DK = ((K))
-         A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.)
-         A2 = (2.*DK+APB-1.)*(ALP**2-BET**2)
-         B3 = (2.*DK+APB-2.)
-         A3 = B3*(B3+1.)*(B3+2.)
-         A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB)
-         POLYN  = ((A2+A3*X)*POLY-A4*POLYL)/A1
-         PDERN  = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1
-         PSAVE  = POLYL
-         PDSAVE = PDERL
-         POLYL  = POLY
-         POLY   = POLYN
-         PDERL  = PDER
-         PDER   = PDERN
- 20   CONTINUE
-      POLYM1 = POLYL
-      PDERM1 = PDERL
-      POLYM2 = PSAVE
-      PDERM2 = PDSAVE
-      RETURN
-      END SUBROUTINE
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    APB  = ALP+BET
+    POLY = 1.
+    PDER = 0.
+    IF (N .EQ. 0) RETURN
+    POLYL = POLY
+    PDERL = PDER
+    POLY  = (ALP-BET+(APB+2.)*X)/2.
+    PDER  = (APB+2.)/2.
+    IF (N .EQ. 1) RETURN
+    DO 20 K=2,N
+       DK = ((K))
+       A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.)
+       A2 = (2.*DK+APB-1.)*(ALP**2-BET**2)
+       B3 = (2.*DK+APB-2.)
+       A3 = B3*(B3+1.)*(B3+2.)
+       A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB)
+       POLYN  = ((A2+A3*X)*POLY-A4*POLYL)/A1
+       PDERN  = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1
+       PSAVE  = POLYL
+       PDSAVE = PDERL
+       POLYL  = POLY
+       POLY   = POLYN
+       PDERL  = PDER
+       PDER   = PDERN
+20  CONTINUE
+    POLYM1 = POLYL
+    PDERM1 = PDERL
+    POLYM2 = PSAVE
+    PDERM2 = PDSAVE
+    RETURN
+  end subroutine JACOBF
 
-      REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA)
+  REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant HGJ through
@@ -584,28 +584,28 @@ REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA)
 !     Single precision version.
 !
 !---------------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZD = NMAX)
-      REAL(KIND=RP)  ZD,ZGJD(NZD),ALPHAD,BETAD
-      REAL(KIND=RP)  Z,ZGJ(1),ALPHA,BETA
-      NPMAX = NZD
-      IF (NP.GT.NPMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in HGJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here NP=',NP
-         call neko_error
-      ENDIF
-      ZD = Z
-      DO 100 I=1,NP
-         ZGJD(I) = ZGJ(I)
- 100  CONTINUE
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      HGJ    = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD)
-      RETURN
-      END FUNCTION
+    PARAMETER (NMAX=84)
+    PARAMETER (NZD = NMAX)
+    REAL(KIND=RP)  ZD,ZGJD(NZD),ALPHAD,BETAD
+    REAL(KIND=RP)  Z,ZGJ(1),ALPHA,BETA
+    NPMAX = NZD
+    IF (NP.GT.NPMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in HGJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here NP=',NP
+       call neko_error
+    ENDIF
+    ZD = Z
+    DO 100 I=1,NP
+       ZGJD(I) = ZGJ(I)
+100 CONTINUE
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    HGJ    = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD)
+    RETURN
+  end function HGJ
 
-      REAL(KIND=RP)  FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA)
+  REAL(KIND=RP)  FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant HGJD through
@@ -613,23 +613,23 @@ REAL(KIND=RP)  FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA)
 !     Double precision version.
 !
 !---------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  Z,ZGJ(1),ALPHA,BETA
-      EPS = 1.e-5
-      ONE = 1.
-      ZI  = ZGJ(II)
-      DZ  = Z-ZI
-      IF (ABS(DZ).LT.EPS) THEN
-         HGJD = ONE
-         RETURN
-      ENDIF
-      CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI)
-      CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z)
-      HGJD  = PZ/(PDZI*(Z-ZI))
-      RETURN
-      END FUNCTION
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  Z,ZGJ(1),ALPHA,BETA
+    EPS = 1.e-5
+    ONE = 1.
+    ZI  = ZGJ(II)
+    DZ  = Z-ZI
+    IF (ABS(DZ).LT.EPS) THEN
+       HGJD = ONE
+       RETURN
+    ENDIF
+    CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI)
+    CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z)
+    HGJD  = PZ/(PDZI*(Z-ZI))
+    RETURN
+  end function HGJD
 
-      REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA)
+  REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant HGLJ through
@@ -637,28 +637,28 @@ REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA)
 !     Single precision version.
 !
 !---------------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZD = NMAX)
-      REAL(KIND=RP)  ZD,ZGLJD(NZD),ALPHAD,BETAD
-      REAL(KIND=RP)  Z,ZGLJ(1),ALPHA,BETA
-      NPMAX = NZD
-      IF (NP.GT.NPMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in HGLJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here NP=',NP
-         call neko_error
-      ENDIF
-      ZD = Z
-      DO 100 I=1,NP
-         ZGLJD(I) = ZGLJ(I)
- 100  CONTINUE
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      HGLJ   = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD)
-      RETURN
-      END FUNCTION
+    PARAMETER (NMAX=84)
+    PARAMETER (NZD = NMAX)
+    REAL(KIND=RP)  ZD,ZGLJD(NZD),ALPHAD,BETAD
+    REAL(KIND=RP)  Z,ZGLJ(1),ALPHA,BETA
+    NPMAX = NZD
+    IF (NP.GT.NPMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in HGLJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here NP=',NP
+       call neko_error
+    ENDIF
+    ZD = Z
+    DO 100 I=1,NP
+       ZGLJD(I) = ZGLJ(I)
+100 CONTINUE
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    HGLJ   = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD)
+    RETURN
+  end function HGLJ
 
-      REAL(KIND=RP)  FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA)
+  REAL(KIND=RP)  FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant HGLJD through
@@ -666,27 +666,27 @@ REAL(KIND=RP)  FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA)
 !     Double precision version.
 !
 !---------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  Z,ZGLJ(1),ALPHA,BETA
-      EPS = 1.e-5
-      ONE = 1.
-      ZI  = ZGLJ(I)
-      DZ  = Z-ZI
-      IF (ABS(DZ).LT.EPS) THEN
-         HGLJD = ONE
-         RETURN
-      ENDIF
-      N      = NP-1
-      DN     = ((N))
-      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
-      CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI)
-      CONST  = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI
-      CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z)
-      HGLJD  = (ONE-Z**2)*PD/(CONST*(Z-ZI))
-      RETURN
-      END FUNCTION
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  Z,ZGLJ(1),ALPHA,BETA
+    EPS = 1.e-5
+    ONE = 1.
+    ZI  = ZGLJ(I)
+    DZ  = Z-ZI
+    IF (ABS(DZ).LT.EPS) THEN
+       HGLJD = ONE
+       RETURN
+    ENDIF
+    N      = NP-1
+    DN     = ((N))
+    EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+    CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI)
+    CONST  = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI
+    CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z)
+    HGLJD  = (ONE-Z**2)*PD/(CONST*(Z-ZI))
+    RETURN
+  end function HGLJD
 
-      SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
+  SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !-----------------------------------------------------------------
 !
 !     Compute the derivative matrix D and its transpose DT
@@ -696,41 +696,41 @@ SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !     Single precision version.
 !
 !-----------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZDD = NMAX)
-      REAL(KIND=RP)  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD
-      REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+    PARAMETER (NMAX=84)
+    PARAMETER (NZDD = NMAX)
+    REAL(KIND=RP)  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD
+    REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
 
-      IF (NZ.LE.0) THEN
-         WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1'
-         call neko_error
-      ENDIF
-      IF (NZ .GT. NMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in DGJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here Nz=',Nz
-         call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
-         WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      DO 100 I=1,NZ
-         ZD(I) = Z(I)
- 100  CONTINUE
-      CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
-      DO I=1,NZ
-      DO J=1,NZ
-         D(I,J)  = DD(I,J)
-         DT(I,J) = DTD(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    IF (NZ.LE.0) THEN
+       WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1'
+       call neko_error
+    ENDIF
+    IF (NZ .GT. NMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in DGJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here Nz=',Nz
+       call neko_error
+    ENDIF
+    IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+       WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    DO 100 I=1,NZ
+       ZD(I) = Z(I)
+100 CONTINUE
+    CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
+    DO I=1,NZ
+       DO J=1,NZ
+          D(I,J)  = DD(I,J)
+          DT(I,J) = DTD(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGJ
 
-      SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
+  SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !-----------------------------------------------------------------
 !
 !     Compute the derivative matrix D and its transpose DT
@@ -740,36 +740,36 @@ SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !     Double precision version.
 !
 !-----------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
-      N    = NZ-1
-      DN   = ((N))
-      ONE  = 1.
-      TWO  = 2.
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+    N    = NZ-1
+    DN   = ((N))
+    ONE  = 1.
+    TWO  = 2.
 
-      IF (NZ.LE.1) THEN
+    IF (NZ.LE.1) THEN
        WRITE (6,*) 'DGJD: Minimum number of Gauss-Lobatto points is 2'
        call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
-         WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    ENDIF
+    IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+       WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      DO I=1,NZ
-      DO J=1,NZ
-         CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I))
-         CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J))
-         IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J)))
-         IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ &
+    DO I=1,NZ
+       DO J=1,NZ
+          CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I))
+          CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J))
+          IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J)))
+          IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ &
               (TWO*(ONE-Z(I)**2))
-         DT(J,I) = D(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+          DT(J,I) = D(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGJD
 
-      SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
+  SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !-----------------------------------------------------------------
 !
 !     Compute the derivative matrix D and its transpose DT
@@ -779,41 +779,41 @@ SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !     Single precision version.
 !
 !-----------------------------------------------------------------
-      PARAMETER (NMAX=84)
-      PARAMETER (NZDD = NMAX)
-      REAL(KIND=RP)  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD
-      REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+    PARAMETER (NMAX=84)
+    PARAMETER (NZDD = NMAX)
+    REAL(KIND=RP)  DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD
+    REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
 
-      IF (NZ.LE.1) THEN
+    IF (NZ.LE.1) THEN
        WRITE (6,*) 'DGLJ: Minimum number of Gauss-Lobatto points is 2'
        call neko_error
-      ENDIF
-      IF (NZ .GT. NMAX) THEN
-         WRITE (6,*) 'Too large polynomial degree in DGLJ'
-         WRITE (6,*) 'Maximum polynomial degree is',NMAX
-         WRITE (6,*) 'Here NZ=',NZ
-         call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
-         WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      DO 100 I=1,NZ
-         ZD(I) = Z(I)
- 100  CONTINUE
-      CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
-      DO I=1,NZ
-      DO J=1,NZ
-         D(I,J)  = DD(I,J)
-         DT(I,J) = DTD(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    ENDIF
+    IF (NZ .GT. NMAX) THEN
+       WRITE (6,*) 'Too large polynomial degree in DGLJ'
+       WRITE (6,*) 'Maximum polynomial degree is',NMAX
+       WRITE (6,*) 'Here NZ=',NZ
+       call neko_error
+    ENDIF
+    IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+       WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    DO 100 I=1,NZ
+       ZD(I) = Z(I)
+100 CONTINUE
+    CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD)
+    DO I=1,NZ
+       DO J=1,NZ
+          D(I,J)  = DD(I,J)
+          DT(I,J) = DTD(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLJ
 
-      SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
+  SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !-----------------------------------------------------------------
 !
 !     Compute the derivative matrix D and its transpose DT
@@ -823,44 +823,44 @@ SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA)
 !     Double precision version.
 !
 !-----------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
-      N    = NZ-1
-      DN   = ((N))
-      ONE  = 1.
-      TWO  = 2.
-      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA
+    N    = NZ-1
+    DN   = ((N))
+    ONE  = 1.
+    TWO  = 2.
+    EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
 
-      IF (NZ.LE.1) THEN
+    IF (NZ.LE.1) THEN
        WRITE (6,*) 'DGLJD: Minimum number of Gauss-Lobatto points is 2'
        call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
-         WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    ENDIF
+    IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN
+       WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      DO I=1,NZ
-      DO J=1,NZ
-         CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I))
-         CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J))
-         CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI
-         CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ
-         IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J)))
-         IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) &
+    DO I=1,NZ
+       DO J=1,NZ
+          CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I))
+          CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J))
+          CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI
+          CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ
+          IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J)))
+          IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) &
               D(I,J) = (ALPHA*(ONE+Z(I))-BETA*(ONE-Z(I)))/ &
               (TWO*(ONE-Z(I)**2))
-         IF ((I.EQ.J).AND.(I.EQ.1)) &
-              D(I,J) =  (EIGVAL+ALPHA)/(TWO*(BETA+TWO)) 
-         IF ((I.EQ.J).AND.(I.EQ.NZ)) &
-              D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO)) 
-         DT(J,I) = D(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+          IF ((I.EQ.J).AND.(I.EQ.1)) &
+              D(I,J) =  (EIGVAL+ALPHA)/(TWO*(BETA+TWO))
+          IF ((I.EQ.J).AND.(I.EQ.NZ)) &
+              D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO))
+          DT(J,I) = D(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLJD
 
-      SUBROUTINE DGLL (D,DT,Z,NZ,NZD)
+  SUBROUTINE DGLL (D,DT,Z,NZ,NZD)
 !-----------------------------------------------------------------
 !
 !     Compute the derivative matrix D and its transpose DT
@@ -869,76 +869,76 @@ SUBROUTINE DGLL (D,DT,Z,NZ,NZD)
 !     Note: D and DT are square matrices.
 !
 !-----------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      PARAMETER (NMAX=84)
-      REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1)
-      N  = NZ-1
-      IF (NZ .GT. NMAX) THEN
-         WRITE (6,*) 'Subroutine DGLL'
-         WRITE (6,*) 'Maximum polynomial degree =',NMAX
-         WRITE (6,*) 'Polynomial degree         =',NZ
-      ENDIF
-      IF (NZ .EQ. 1) THEN
-         D(1,1) = 0.
-         RETURN
-      ENDIF
-      FN = (N)
-      d0 = FN*(FN+1.)/4.
-      DO I=1,NZ
-      DO J=1,NZ
-         D(I,J) = 0.
-         IF  (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ &
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    PARAMETER (NMAX=84)
+    REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1)
+    N  = NZ-1
+    IF (NZ .GT. NMAX) THEN
+       WRITE (6,*) 'Subroutine DGLL'
+       WRITE (6,*) 'Maximum polynomial degree =',NMAX
+       WRITE (6,*) 'Polynomial degree         =',NZ
+    ENDIF
+    IF (NZ .EQ. 1) THEN
+       D(1,1) = 0.
+       RETURN
+    ENDIF
+    FN = (N)
+    d0 = FN*(FN+1.)/4.
+    DO I=1,NZ
+       DO J=1,NZ
+          D(I,J) = 0.
+          IF  (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ &
                              (PNLEG(Z(J),N)*(Z(I)-Z(J)))
-         IF ((I.EQ.J).AND.(I.EQ.1))  D(I,J) = -d0
-         IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) =  d0
-         DT(J,I) = D(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+          IF ((I.EQ.J).AND.(I.EQ.1))  D(I,J) = -d0
+          IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) =  d0
+          DT(J,I) = D(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLL
 
-      REAL(KIND=RP) FUNCTION HGLL (I,Z,ZGLL,NZ)
+  REAL(KIND=RP) FUNCTION HGLL (I,Z,ZGLL,NZ)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant L through
 !     the NZ Gauss-Lobatto Legendre points ZGLL at the point Z.
 !
 !---------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP) ZGLL(1), EPS, DZ, Z
-      EPS = 1.E-5
-      DZ = Z - ZGLL(I)
-      IF (ABS(DZ) .LT. EPS) THEN
-         HGLL = 1.
-         RETURN
-      ENDIF
-      N = NZ - 1
-      ALFAN = (N)*((N)+1.)
-      HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ &
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP) ZGLL(1), EPS, DZ, Z
+    EPS = 1.E-5
+    DZ = Z - ZGLL(I)
+    IF (ABS(DZ) .LT. EPS) THEN
+       HGLL = 1.
+       RETURN
+    ENDIF
+    N = NZ - 1
+    ALFAN = (N)*((N)+1.)
+    HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ &
            (ALFAN*PNLEG(ZGLL(I),N)*(Z-ZGLL(I)))
-      RETURN
-      END FUNCTION
+    RETURN
+  end function HGLL
 
-      REAL(KIND=RP) FUNCTION HGL (I,Z,ZGL,NZ)
+  REAL(KIND=RP) FUNCTION HGL (I,Z,ZGL,NZ)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Lagrangian interpolant HGL through
 !     the NZ Gauss Legendre points ZGL at the point Z.
 !
 !---------------------------------------------------------------------
-      REAL(KIND=RP) ZGL(1), Z, EPS, DZ
-      EPS = 1.E-5
-      DZ = Z - ZGL(I)
-      IF (ABS(DZ) .LT. EPS) THEN
-         HGL = 1.
-         RETURN
-      ENDIF
-      N = NZ-1
-      HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I)))
-      RETURN
-      END FUNCTION
+    REAL(KIND=RP) ZGL(1), Z, EPS, DZ
+    EPS = 1.E-5
+    DZ = Z - ZGL(I)
+    IF (ABS(DZ) .LT. EPS) THEN
+       HGL = 1.
+       RETURN
+    ENDIF
+    N = NZ-1
+    HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I)))
+    RETURN
+  end function HGL
 
-      REAL(KIND=RP) FUNCTION PNLEG (Z,N)
+  REAL(KIND=RP) FUNCTION PNLEG (Z,N)
 !---------------------------------------------------------------------
 !
 !     Compute the value of the Nth order Legendre polynomial at Z.
@@ -947,33 +947,33 @@ REAL(KIND=RP) FUNCTION PNLEG (Z,N)
 !
 !---------------------------------------------------------------------
 !
-!     This next statement is to overcome the underflow bug in the i860.  
+!     This next statement is to overcome the underflow bug in the i860.
 !     It can be removed at a later date.  11 Aug 1990   pff.
 !
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP) Z, P1, P2, P3
-      IF(ABS(Z) .LT. 1.0E-25) Z = 0.0
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP) Z, P1, P2, P3
+    IF(ABS(Z) .LT. 1.0E-25) Z = 0.0
 
 
-      P1   = 1.
-      IF (N.EQ.0) THEN
-         PNLEG = P1
-         RETURN
-      ENDIF
-      P2   = Z
-      P3   = P2
-      DO 10 K = 1, N-1
-         FK  = (K)
-         P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
-         P1  = P2
-         P2  = P3
- 10   CONTINUE
-      PNLEG = P3
-      if (n.eq.0) pnleg = 1.
-      RETURN
-      END FUNCTION
+    P1   = 1.
+    IF (N.EQ.0) THEN
+       PNLEG = P1
+       RETURN
+    ENDIF
+    P2   = Z
+    P3   = P2
+    DO 10 K = 1, N-1
+       FK  = (K)
+       P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
+       P1  = P2
+       P2  = P3
+10  CONTINUE
+    PNLEG = P3
+    if (n.eq.0) pnleg = 1.
+    RETURN
+  end function PNLEG
 
-      REAL(KIND=RP) FUNCTION PNDLEG (Z,N)
+  REAL(KIND=RP) FUNCTION PNDLEG (Z,N)
 !----------------------------------------------------------------------
 !
 !     Compute the derivative of the Nth order Legendre polynomial at Z.
@@ -981,28 +981,28 @@ REAL(KIND=RP) FUNCTION PNDLEG (Z,N)
 !     Based on the recursion formula for the Legendre polynomials.
 !
 !----------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP) P1, P2, P1D, P2D, P3D, Z
-      P1   = 1.
-      P2   = Z
-      P1D  = 0.
-      P2D  = 1.
-      P3D  = 1.
-      DO 10 K = 1, N-1
-         FK  = (K)
-         P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
-         P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.)
-         P1  = P2
-         P2  = P3
-         P1D = P2D
-         P2D = P3D
- 10   CONTINUE
-      PNDLEG = P3D
-      IF (N.eq.0) pndleg = 0.
-      RETURN
-      END FUNCTION
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP) P1, P2, P1D, P2D, P3D, Z
+    P1   = 1.
+    P2   = Z
+    P1D  = 0.
+    P2D  = 1.
+    P3D  = 1.
+    DO 10 K = 1, N-1
+       FK  = (K)
+       P3  = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.)
+       P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.)
+       P1  = P2
+       P2  = P3
+       P1D = P2D
+       P2D = P3D
+10  CONTINUE
+    PNDLEG = P3D
+    IF (N.eq.0) pndleg = 0.
+    RETURN
+  end function PNDLEG
 
-      SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2)
+  SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2)
 !-----------------------------------------------------------------------
 !
 !     Compute the (one-dimensional) derivative matrix D and its
@@ -1014,32 +1014,32 @@ SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2)
 !     Note: D and DT are rectangular matrices.
 !
 !-----------------------------------------------------------------------
-      REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1)
-      REAL(KIND=RP) EPS, ZP, ZQ  
-      IF (NZM1.EQ.1) THEN
-        D (1,1) = 0.
-        DT(1,1) = 0.
-        RETURN
-      ENDIF
-      EPS = 1.E-6
-      NM1 = NZM1-1
-      DO IP = 1, NZM2
-         DO JQ = 1, NZM1
-            ZP = ZM2(IP)
-            ZQ = ZM1(JQ)
-            IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN
-                D(IP,JQ) = 0.
-            ELSE
-                D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) &
+    REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1)
+    REAL(KIND=RP) EPS, ZP, ZQ
+    IF (NZM1.EQ.1) THEN
+       D (1,1) = 0.
+       DT(1,1) = 0.
+       RETURN
+    ENDIF
+    EPS = 1.E-6
+    NM1 = NZM1-1
+    DO IP = 1, NZM2
+       DO JQ = 1, NZM1
+          ZP = ZM2(IP)
+          ZQ = ZM1(JQ)
+          IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN
+             D(IP,JQ) = 0.
+          ELSE
+             D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) &
                      -IM12(IP,JQ))/(ZP-ZQ)
-            ENDIF
-            DT(JQ,IP) = D(IP,JQ)
-         END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+          ENDIF
+          DT(JQ,IP) = D(IP,JQ)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLLGL
 
-      SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
+  SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
 !-----------------------------------------------------------------------
 !
 !     Compute the (one-dimensional) derivative matrix D and its
@@ -1052,50 +1052,50 @@ SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
 !     Single precision version.
 !
 !-----------------------------------------------------------------------
-      REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1)
-      PARAMETER (NMAX=84)
-      PARAMETER (NDD = NMAX)
-      REAL(KIND=RP)  DD(NDD,NDD), DTD(NDD,NDD)
-      REAL(KIND=RP)  ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD)
-      REAL(KIND=RP)  ALPHAD, BETAD
+    REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1)
+    PARAMETER (NMAX=84)
+    PARAMETER (NDD = NMAX)
+    REAL(KIND=RP)  DD(NDD,NDD), DTD(NDD,NDD)
+    REAL(KIND=RP)  ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD)
+    REAL(KIND=RP)  ALPHAD, BETAD
 
-      IF (NPGL.LE.1) THEN
+    IF (NPGL.LE.1) THEN
        WRITE(6,*) 'DGLJGJ: Minimum number of Gauss-Lobatto points is 2'
        call neko_error
-      ENDIF
-      IF (NPGL.GT.NMAX) THEN
-         WRITE(6,*) 'Polynomial degree too high in DGLJGJ'
-         WRITE(6,*) 'Maximum polynomial degree is',NMAX
-         WRITE(6,*) 'Here NPGL=',NPGL
-         call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
-         WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    ENDIF
+    IF (NPGL.GT.NMAX) THEN
+       WRITE(6,*) 'Polynomial degree too high in DGLJGJ'
+       WRITE(6,*) 'Maximum polynomial degree is',NMAX
+       WRITE(6,*) 'Here NPGL=',NPGL
+       call neko_error
+    ENDIF
+    IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+       WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      ALPHAD = ALPHA
-      BETAD  = BETA
-      DO I=1,NPG
-         ZGD(I) = ZG(I)
-         DO J=1,NPGL
-            IGLGD(I,J) = IGLG(I,J)
-         END DO
-      END DO
-      DO 200 I=1,NPGL
-         ZGLD(I) = ZGL(I)
- 200  CONTINUE
-      CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD)
-      DO I=1,NPG
-      DO J=1,NPGL
-         D(I,J)  = DD(I,J)
-         DT(J,I) = DTD(J,I)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    ALPHAD = ALPHA
+    BETAD  = BETA
+    DO I=1,NPG
+       ZGD(I) = ZG(I)
+       DO J=1,NPGL
+          IGLGD(I,J) = IGLG(I,J)
+       END DO
+    END DO
+    DO 200 I=1,NPGL
+       ZGLD(I) = ZGL(I)
+200 CONTINUE
+    CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD)
+    DO I=1,NPG
+       DO J=1,NPGL
+          D(I,J)  = DD(I,J)
+          DT(J,I) = DTD(J,I)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLJGJ
 
-      SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
+  SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
 !-----------------------------------------------------------------------
 !
 !     Compute the (one-dimensional) derivative matrix D and its
@@ -1108,48 +1108,48 @@ SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA)
 !     Double precision version.
 !
 !-----------------------------------------------------------------------
-      IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
-      REAL(KIND=RP)  D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2)
-      REAL(KIND=RP)  IGLG(ND2,ND1), ALPHA, BETA
+    IMPLICIT REAL(KIND=RP)  (A-H,O-Z)
+    REAL(KIND=RP)  D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2)
+    REAL(KIND=RP)  IGLG(ND2,ND1), ALPHA, BETA
 
-      IF (NPGL.LE.1) THEN
+    IF (NPGL.LE.1) THEN
        WRITE(6,*) 'DGLJGJD: Minimum number of Gauss-Lobatto points is 2'
        call neko_error
-      ENDIF
-      IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
-         WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1'
-         call neko_error
-      ENDIF
+    ENDIF
+    IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN
+       WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1'
+       call neko_error
+    ENDIF
 
-      EPS    = 1.e-6
-      ONE    = 1.
-      TWO    = 2.
-      NGL    = NPGL-1
-      DN     = ((NGL))
-      EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
+    EPS    = 1.e-6
+    ONE    = 1.
+    TWO    = 2.
+    NGL    = NPGL-1
+    DN     = ((NGL))
+    EIGVAL = -DN*(DN+ALPHA+BETA+ONE)
 
-      DO I=1,NPG
-      DO J=1,NPGL
-         DZ = ABS(ZG(I)-ZGL(J))
-         IF (DZ.LT.EPS) THEN
-            D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ &
+    DO I=1,NPG
+       DO J=1,NPGL
+          DZ = ABS(ZG(I)-ZGL(J))
+          IF (DZ.LT.EPS) THEN
+             D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ &
                  (TWO*(ONE-ZG(I)**2))
-         ELSE
-            CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I))
-            CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J))
-            FACI   = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I))
-            FACJ   = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J))
-            CONST  = EIGVAL*PJ+FACJ*PDJ
-            D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) &
+          ELSE
+             CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I))
+             CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J))
+             FACI   = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I))
+             FACJ   = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J))
+             CONST  = EIGVAL*PJ+FACJ*PDJ
+             D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) &
                  -(ONE-ZG(I)**2)*PDI)/(CONST*(ZG(I)-ZGL(J))**2)
-         ENDIF
-         DT(J,I) = D(I,J)
-      END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+          ENDIF
+          DT(J,I) = D(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine DGLJGJD
 
-      SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
+  SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
 !----------------------------------------------------------------------
 !
 !     Compute the one-dimensional interpolation operator (matrix) I12
@@ -1159,23 +1159,23 @@ SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
 !     Z2 : NZ2 points on mesh M.
 !
 !--------------------------------------------------------------------
-      REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2), ZI
-      IF (NZ1 .EQ. 1) THEN
-         I12 (1,1) = 1.
-         IT12(1,1) = 1.
-         RETURN
-      ENDIF
-      DO I=1,NZ2
-         ZI = Z2(I)
-         DO J=1,NZ1
-            I12 (I,J) = HGL(J,ZI,Z1,NZ1)
-            IT12(J,I) = I12(I,J)
-         END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2), ZI
+    IF (NZ1 .EQ. 1) THEN
+       I12 (1,1) = 1.
+       IT12(1,1) = 1.
+       RETURN
+    ENDIF
+    DO I=1,NZ2
+       ZI = Z2(I)
+       DO J=1,NZ1
+          I12 (I,J) = HGL(J,ZI,Z1,NZ1)
+          IT12(J,I) = I12(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine IGLM
 
-      SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
+  SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
 !----------------------------------------------------------------------
 !
 !     Compute the one-dimensional interpolation operator (matrix) I12
@@ -1185,23 +1185,23 @@ SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2)
 !     Z2 : NZ2 points on mesh M.
 !
 !--------------------------------------------------------------------
-      REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI
-      IF (NZ1 .EQ. 1) THEN
-         I12 (1,1) = 1.
-         IT12(1,1) = 1.
-         RETURN
-      ENDIF
-      DO I=1,NZ2
-         ZI = Z2(I)
-         DO J=1,NZ1
-            I12 (I,J) = HGLL(J,ZI,Z1,NZ1)
-            IT12(J,I) = I12(I,J)
-         END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI
+    IF (NZ1 .EQ. 1) THEN
+       I12 (1,1) = 1.
+       IT12(1,1) = 1.
+       RETURN
+    ENDIF
+    DO I=1,NZ2
+       ZI = Z2(I)
+       DO J=1,NZ1
+          I12 (I,J) = HGLL(J,ZI,Z1,NZ1)
+          IT12(J,I) = I12(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine IGLLM
 
-      SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
+  SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
 !----------------------------------------------------------------------
 !
 !     Compute the one-dimensional interpolation operator (matrix) I12
@@ -1212,23 +1212,23 @@ SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
 !     Single precision version.
 !
 !--------------------------------------------------------------------
-      REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA
-      IF (NZ1 .EQ. 1) THEN
-         I12 (1,1) = 1.
-         IT12(1,1) = 1.
-         RETURN
-      ENDIF
-      DO I=1,NZ2
-         ZI = Z2(I)
-         DO J=1,NZ1
-            I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA)
-            IT12(J,I) = I12(I,J)
-         END DO
-      END DO
-      RETURN
-      END SUBROUTINE
+    REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA
+    IF (NZ1 .EQ. 1) THEN
+       I12 (1,1) = 1.
+       IT12(1,1) = 1.
+       RETURN
+    ENDIF
+    DO I=1,NZ2
+       ZI = Z2(I)
+       DO J=1,NZ1
+          I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA)
+          IT12(J,I) = I12(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine IGJM
 
-      SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
+  SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
 !----------------------------------------------------------------------
 !
 !     Compute the one-dimensional interpolation operator (matrix) I12
@@ -1239,19 +1239,19 @@ SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA)
 !     Single precision version.
 !
 !--------------------------------------------------------------------
-      REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA
-      IF (NZ1 .EQ. 1) THEN
-         I12 (1,1) = 1.
-         IT12(1,1) = 1.
-         RETURN
-      ENDIF
-      DO I=1,NZ2
-         ZI = Z2(I)
-         DO J=1,NZ1
-            I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA)
-            IT12(J,I) = I12(I,J)
-         END DO
-      END DO
-      RETURN
-      END SUBROUTINE
-end module
+    REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA
+    IF (NZ1 .EQ. 1) THEN
+       I12 (1,1) = 1.
+       IT12(1,1) = 1.
+       RETURN
+    ENDIF
+    DO I=1,NZ2
+       ZI = Z2(I)
+       DO J=1,NZ1
+          I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA)
+          IT12(J,I) = I12(I,J)
+       END DO
+    END DO
+    RETURN
+  end subroutine IGLJM
+end module speclib
diff --git a/src/sem/spectral_error_indicator.f90 b/src/sem/spectral_error_indicator.f90
index 637e11e270e..f86a3dafedd 100644
--- a/src/sem/spectral_error_indicator.f90
+++ b/src/sem/spectral_error_indicator.f90
@@ -33,7 +33,6 @@
 !> Implements type spectral_error_indicator_t.
 module spectral_error_indicator
   use num_types, only: rp
-  use logger, only: neko_log, LOG_SIZE
   use field, only: field_t
   use coefs, only: coef_t
   use field_list, only: field_list_t
@@ -52,11 +51,11 @@ module spectral_error_indicator
 
   !> Provides tools to calculate the spectral error indicator
   !! @details
-  !! This is a posteriori error measure, based on the local properties of 
+  !! This is a posteriori error measure, based on the local properties of
   !! the spectral solution, which was developed by Mavriplis. This method
-  !! formally only gives an indication of the error. 
+  !! formally only gives an indication of the error.
   type, public :: spectral_error_indicator_t
-     !> Pointers to main fields 
+     !> Pointers to main fields
      type(field_t), pointer :: u  => null()
      type(field_t), pointer :: v  => null()
      type(field_t), pointer :: w  => null()
@@ -140,22 +139,22 @@ subroutine spec_err_ind_init(this, u,v,w,coef)
       LZ1 => coef%Xh%lz, &
       SERI_SMALL  => this%SERI_SMALL,  &
       SERI_SMALLR => this%SERI_SMALLR, &
-      SERI_SMALLG => this%SERI_SMALLG, & 
-      SERI_SMALLS => this%SERI_SMALLS, & 
+      SERI_SMALLG => this%SERI_SMALLG, &
+      SERI_SMALLS => this%SERI_SMALLS, &
       SERI_NP     => this%SERI_NP,     &
       SERI_NP_MAX => this%SERI_NP_MAX, &
       SERI_ELR    => this%SERI_ELR     &
-      )   
+      )
       ! correctness check
       if (SERI_NP.gt.SERI_NP_MAX) then
-        if (pe_rank.eq.0) write(*,*) 'SETI_NP greater than SERI_NP_MAX' 
-        endif
-        il = SERI_NP+SERI_ELR
-        jl = min(LX1,LY1)
-        jl = min(jl,LZ1)
-        if (il.gt.jl) then
-          if (pe_rank.eq.0) write(*,*) 'SERI_NP+SERI_ELR greater than L?1'
-        endif
+         if (pe_rank.eq.0) write(*,*) 'SETI_NP greater than SERI_NP_MAX'
+      endif
+      il = SERI_NP+SERI_ELR
+      jl = min(LX1,LY1)
+      jl = min(jl,LZ1)
+      if (il.gt.jl) then
+         if (pe_rank.eq.0) write(*,*) 'SERI_NP+SERI_ELR greater than L?1'
+      endif
     end associate
 
     !> Initialize the list that holds the fields to write
@@ -173,27 +172,27 @@ subroutine spec_err_ind_free(this)
     class(spectral_error_indicator_t), intent(inout) :: this
 
     if(allocated(this%eind_u)) then
-      deallocate(this%eind_u)
+       deallocate(this%eind_u)
     end if
 
     if(allocated(this%eind_v)) then
-      deallocate(this%eind_v)
+       deallocate(this%eind_v)
     end if
 
     if(allocated(this%eind_w)) then
-      deallocate(this%eind_w)
+       deallocate(this%eind_w)
     end if
 
     if(allocated(this%sig_u)) then
-      deallocate(this%sig_u)
+       deallocate(this%sig_u)
     end if
 
     if(allocated(this%sig_w)) then
-      deallocate(this%sig_w)
+       deallocate(this%sig_w)
     end if
 
     if(allocated(this%sig_w)) then
-      deallocate(this%sig_w)
+       deallocate(this%sig_w)
     end if
 
     call this%u_hat%free()
@@ -204,11 +203,11 @@ subroutine spec_err_ind_free(this)
     nullify(this%u)
     nullify(this%v)
     nullify(this%w)
-   
-    !> finalize data related to writing 
+
+    !> finalize data related to writing
     call list_final3(this%speri_l)
     call file_free(this%mf_speri)
-                
+
   end subroutine spec_err_ind_free
 
   !> Transform a field u > u_hat into physical or spectral space
@@ -223,9 +222,8 @@ subroutine transform_to_spec_or_phys(u_hat, u, wk, coef, space)
     type(field_t), intent(inout) :: u
     type(field_t), intent(inout) :: wk
     type(coef_t), intent(inout) :: coef
-    character(len=4), intent(in) :: space             
+    character(len=4), intent(in) :: space
     integer :: i, j, k, e, nxyz, nelv, n
-    character(len=LOG_SIZE) :: log_buf 
 
     !> Define some constants
     nxyz = coef%Xh%lx*coef%Xh%lx*coef%Xh%lx
@@ -234,28 +232,28 @@ subroutine transform_to_spec_or_phys(u_hat, u, wk, coef, space)
 
     !> Copy field to working array
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
-      (NEKO_BCKND_OPENCL .eq. 1)) then 
-      call device_copy(wk%x_d, u%x_d, n)
+      (NEKO_BCKND_OPENCL .eq. 1)) then
+       call device_copy(wk%x_d, u%x_d, n)
     else
-      call copy(wk%x,u%x,n)
+       call copy(wk%x,u%x,n)
     end if
 
     select case(space)
-      case('spec')
-        call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, &
+    case('spec')
+       call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, &
                     coef%Xh%lx,coef%Xh%vinv, &
                     coef%Xh%vinvt, coef%Xh%vinvt, nelv)
-      case('phys') 
-        call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, &
+    case('phys')
+       call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, &
                     coef%Xh%lx,coef%Xh%v, &
                     coef%Xh%vt, coef%Xh%vt, nelv)
     end select
 
     ! Synchronize
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
-       (NEKO_BCKND_OPENCL .eq. 1)) then 
+       (NEKO_BCKND_OPENCL .eq. 1)) then
 
-      call device_memcpy(u_hat%x,u_hat%x_d, n, &
+       call device_memcpy(u_hat%x,u_hat%x_d, n, &
                          DEVICE_TO_HOST, sync=.true.)
     end if
 
@@ -272,8 +270,8 @@ subroutine spec_err_ind_get(this,coef)
     call transform_to_spec_or_phys(this%u_hat, this%u, this%wk, coef, 'spec')
     call transform_to_spec_or_phys(this%v_hat, this%v, this%wk, coef, 'spec')
     call transform_to_spec_or_phys(this%w_hat, this%w, this%wk, coef, 'spec')
-   
-    
+
+
     ! Get the spectral error indicator
     call calculate_indicators(this, coef, this%eind_u, this%sig_u, coef%msh%nelv, &
                               coef%Xh%lx,  coef%Xh%ly,  coef%Xh%lz, &
@@ -284,7 +282,7 @@ subroutine spec_err_ind_get(this,coef)
     call calculate_indicators(this, coef, this%eind_w, this%sig_w, coef%msh%nelv, &
                               coef%Xh%lx,  coef%Xh%ly,  coef%Xh%lz, &
                               this%w_hat%x)
-                             
+
   end subroutine spec_err_ind_get
 
   !> Write error indicators in a field file.
@@ -292,15 +290,15 @@ end subroutine spec_err_ind_get
   subroutine spec_err_ind_write(this, t)
     class(spectral_error_indicator_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
-    
+
     integer i, e
     integer lx, ly, lz, nelv
-    
+
     lx = this%u_hat%Xh%lx
     ly = this%u_hat%Xh%ly
     lz = this%u_hat%Xh%lz
     nelv = this%u_hat%msh%nelv
-    
+
     !> Copy the element indicator into all points of the field
     do e = 1,nelv
        do i = 1,lx*ly*lx
@@ -313,7 +311,7 @@ subroutine spec_err_ind_write(this, t)
     !> Write the file
     !! Remember that the list is already ponting to the fields
     !! that were just modified.
-    call this%mf_speri%write(this%speri_l,t)      
+    call this%mf_speri%write(this%speri_l,t)
 
   end subroutine spec_err_ind_write
 
@@ -336,19 +334,19 @@ subroutine calculate_indicators(this, coef, eind, sig, lnelt, LX1, LY1, LZ1, var
     real(kind=rp) :: eind(lnelt)
     real(kind=rp) :: sig(lnelt)
     real(kind=rp) :: var(LX1,LY1,LZ1,lnelt)
-           
+
     real(kind=rp) :: xa(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz)
     real(kind=rp) :: xb(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz)
     integer :: i, e
-      
+
     ! zero arrays
     call rzero(eind,lnelt)
     call rzero(sig,lnelt)
-       
+
     ! Get the indicator
     call speri_var(this, eind,sig,var,lnelt,xa,xb, LX1, LY1, LZ1)
 
-  end subroutine calculate_indicators 
+  end subroutine calculate_indicators
 
 
   !> Calculate the indicator in a specified variable
@@ -372,7 +370,7 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1)
     real(kind=rp) :: var(LX1,LY1,LZ1,nell)
     real(kind=rp) :: xa(LX1,LY1,LZ1)
     real(kind=rp) :: xb(LX1,LY1,LZ1)
-    
+
     !> local variables
     integer :: il, jl, kl, ll, j_st, j_en, ii
     !> polynomial coefficients
@@ -380,7 +378,7 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1)
     !> Legendre coefficients; first value coeff(1,1,1)
     real(kind=rp) ::  coef11
     !> copy of last SERI_NP columns of coefficients
-    real(kind=rp) ::  coefx(this%SERI_NP_MAX,LY1,LZ1), & 
+    real(kind=rp) ::  coefx(this%SERI_NP_MAX,LY1,LZ1), &
                       coefy(this%SERI_NP_MAX,LX1,LZ1), &
                       coefz(this%SERI_NP_MAX,LX1,LY1)
     !> estimated error
@@ -392,83 +390,83 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1)
 
     !> loop over elements
     do il = 1,nell
-      !> go to Legendre space (done in two operations)
-      !! and square the coefficient
-      do ii = 1, LX1*LY1*LZ1
-        coeff(ii,1,1) = var(ii,1,1,il) * var(ii,1,1,il) 
-      end do
-
-      !> lower left corner
-      coef11 = coeff(1,1,1)
-
-      !> small value; nothing to od
-      if (coef11.ge.this%SERI_SMALL) then
-        !> extrapolate coefficients
-        !> X - direction
-        !> copy last SERI_NP collumns (or less if NX1 is smaller)
-        !> SERI_ELR allows to exclude last row
-        j_st = max(1,LX1-this%SERI_NP+1-this%SERI_ELR)
-        j_en = max(1,LX1-this%SERI_ELR)
-        do ll = 1,LZ1
-          do kl = 1,LY1
-            do jl = j_st,j_en
-              coefx(j_en-jl+1,kl,ll) = coeff(jl,kl,ll)
-            enddo
+       !> go to Legendre space (done in two operations)
+       !! and square the coefficient
+       do ii = 1, LX1*LY1*LZ1
+          coeff(ii,1,1) = var(ii,1,1,il) * var(ii,1,1,il)
+       end do
+
+       !> lower left corner
+       coef11 = coeff(1,1,1)
+
+       !> small value; nothing to od
+       if (coef11.ge.this%SERI_SMALL) then
+          !> extrapolate coefficients
+          !> X - direction
+          !> copy last SERI_NP collumns (or less if NX1 is smaller)
+          !> SERI_ELR allows to exclude last row
+          j_st = max(1,LX1-this%SERI_NP+1-this%SERI_ELR)
+          j_en = max(1,LX1-this%SERI_ELR)
+          do ll = 1,LZ1
+             do kl = 1,LY1
+                do jl = j_st,j_en
+                   coefx(j_en-jl+1,kl,ll) = coeff(jl,kl,ll)
+                enddo
+             enddo
           enddo
-        enddo
-        !> get extrapolated values
-        call speri_extrap(this,estx,sigx,coef11,coefx, &
+          !> get extrapolated values
+          call speri_extrap(this,estx,sigx,coef11,coefx, &
             j_st,j_en,LY1,LZ1)
-         
-        !> Y - direction
-        !> copy last SERI_NP collumns (or less if NY1 is smaller)
-        !> SERI_ELR allows to exclude last row
-        j_st = max(1,LY1-this%SERI_NP+1-this%SERI_ELR)
-        j_en = max(1,LY1-this%SERI_ELR)
-        do ll = 1,LZ1
-          do kl = j_st,j_en
-            do jl = 1,LX1
-              coefy(j_en-kl+1,jl,ll) = coeff(jl,kl,ll)
-            enddo
+
+          !> Y - direction
+          !> copy last SERI_NP collumns (or less if NY1 is smaller)
+          !> SERI_ELR allows to exclude last row
+          j_st = max(1,LY1-this%SERI_NP+1-this%SERI_ELR)
+          j_en = max(1,LY1-this%SERI_ELR)
+          do ll = 1,LZ1
+             do kl = j_st,j_en
+                do jl = 1,LX1
+                   coefy(j_en-kl+1,jl,ll) = coeff(jl,kl,ll)
+                enddo
+             enddo
           enddo
-        enddo
-        !> get extrapolated values
-        call speri_extrap(this, esty,sigy,coef11,coefy, &
+          !> get extrapolated values
+          call speri_extrap(this, esty,sigy,coef11,coefy, &
              j_st,j_en,LX1,LZ1)
-   
-        !> Z - direction
-        !> copy last SERI_NP collumns (or less if NZ1 is smaller)
-        !> SERI_ELR allows to exclude last row
-        j_st = max(1,LZ1-this%SERI_NP+1-this%SERI_ELR)
-        j_en = max(1,LZ1-this%SERI_ELR)
-        do ll = j_st,j_en
-          do kl = 1,LY1
-            do jl = 1,LX1
-              coefz(j_en-ll+1,jl,kl) = coeff(jl,kl,ll)
-            enddo
+
+          !> Z - direction
+          !> copy last SERI_NP collumns (or less if NZ1 is smaller)
+          !> SERI_ELR allows to exclude last row
+          j_st = max(1,LZ1-this%SERI_NP+1-this%SERI_ELR)
+          j_en = max(1,LZ1-this%SERI_ELR)
+          do ll = j_st,j_en
+             do kl = 1,LY1
+                do jl = 1,LX1
+                   coefz(j_en-ll+1,jl,kl) = coeff(jl,kl,ll)
+                enddo
+             enddo
           enddo
-        enddo
-        !> get extrapolated values
-        call speri_extrap(this, estz,sigz,coef11,coefz, &
+          !> get extrapolated values
+          call speri_extrap(this, estz,sigz,coef11,coefz, &
         j_st,j_en,LX1,LY1)
 
-        !> average
-        est(il) =  sqrt(estx + esty + estz)
-        sig(il) =  third*(sigx + sigy + sigz)
-
-      else
-        !> for testing
-        estx = 0.0
-        esty = 0.0
-        estz = 0.0
-        sigx = -1.0
-        sigy = -1.0
-        sigz = -1.0
-        !> for testing; end
-
-        est(il) =  0.0
-        sig(il) = -1.0
-      endif
+          !> average
+          est(il) =  sqrt(estx + esty + estz)
+          sig(il) =  third*(sigx + sigy + sigz)
+
+       else
+          !> for testing
+          estx = 0.0
+          esty = 0.0
+          estz = 0.0
+          sigx = -1.0
+          sigy = -1.0
+          sigz = -1.0
+          !> for testing; end
+
+          est(il) =  0.0
+          sig(il) = -1.0
+       endif
 
     end do
 
@@ -504,15 +502,15 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, &
     real(kind=rp) :: sumtmp(4), cffl(this%SERI_NP_MAX)
     real(kind=rp) :: stmp, estt, clog, ctmp, cave, erlog
     logical :: cuse(this%SERI_NP_MAX)
-      
+
     associate(SERI_SMALL  => this%SERI_SMALL,  &
              SERI_SMALLR => this%SERI_SMALLR, &
-             SERI_SMALLG => this%SERI_SMALLG, & 
-             SERI_SMALLS => this%SERI_SMALLS, & 
+             SERI_SMALLG => this%SERI_SMALLG, &
+             SERI_SMALLS => this%SERI_SMALLS, &
              SERI_NP     => this%SERI_NP,     &
              SERI_NP_MAX => this%SERI_NP_MAX, &
              SERI_ELR    => this%SERI_ELR     &
-            )   
+            )
       ! initial values
       estx =  0.0
       sigx = -1.0
@@ -533,121 +531,121 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, &
       ! loop over all face points
       nzlt = max(1,nzl - SERI_ELR) !  for 2D runs
       do il=1,nzlt
-        ! weight
-        rtmp3 = 1.0/(2.0*(il-1)+1.0)
-        do jl=1,nyl - SERI_ELR
-
-          ! find min and max coef along single row
-          cffl(1) = coef(1,jl,il)
-          cmin = cffl(1)
-          cmax = cmin
-          do kl =2,pnr
-            cffl(kl) = coef(kl,jl,il)
-            cmin = min(cmin,cffl(kl))
-            cmax = max(cmax,cffl(kl))
-          enddo
-
-          ! are coefficients sufficiently big
-          if((cmin.gt.0.0).and.(cmax.gt.smallr)) then
-            ! mark array position we use in iterpolation
-            do kl =1,pnr
-              cuse(kl) = .TRUE.
+         ! weight
+         rtmp3 = 1.0/(2.0*(il-1)+1.0)
+         do jl=1,nyl - SERI_ELR
+
+            ! find min and max coef along single row
+            cffl(1) = coef(1,jl,il)
+            cmin = cffl(1)
+            cmax = cmin
+            do kl =2,pnr
+               cffl(kl) = coef(kl,jl,il)
+               cmin = min(cmin,cffl(kl))
+               cmax = max(cmax,cffl(kl))
             enddo
-            ! max n for polynomial order
-            cnm = real(ix_en)
-
-            ! check if all the points should be taken into account
-            ! in original code by Catherine Mavriplis this part is written
-            ! for 4 points, so I place if statement first
-            if (pnr.eq.4) then
-              ! should we neglect last values
-              if ((cffl(1).lt.smallr).and. &
+
+            ! are coefficients sufficiently big
+            if((cmin.gt.0.0).and.(cmax.gt.smallr)) then
+               ! mark array position we use in iterpolation
+               do kl =1,pnr
+                  cuse(kl) = .TRUE.
+               enddo
+               ! max n for polynomial order
+               cnm = real(ix_en)
+
+               ! check if all the points should be taken into account
+               ! in original code by Catherine Mavriplis this part is written
+               ! for 4 points, so I place if statement first
+               if (pnr.eq.4) then
+                  ! should we neglect last values
+                  if ((cffl(1).lt.smallr).and. &
                  (cffl(2).lt.smallr)) then
-                if (cffl(3).lt.smallr) then
-                  cuse(1) = .FALSE.
-                  cuse(2) = .FALSE.
-                  cnm = real(ix_en-2)
-                else
-                  cuse(1) = .FALSE.
-                  cnm = real(ix_en-1)
-                endif
-              else
-                ! should we take stronger gradient
-                if ((cffl(1)/cffl(2).lt.SERI_SMALLG).and. &
+                     if (cffl(3).lt.smallr) then
+                        cuse(1) = .FALSE.
+                        cuse(2) = .FALSE.
+                        cnm = real(ix_en-2)
+                     else
+                        cuse(1) = .FALSE.
+                        cnm = real(ix_en-1)
+                     endif
+                  else
+                     ! should we take stronger gradient
+                     if ((cffl(1)/cffl(2).lt.SERI_SMALLG).and. &
                    (cffl(3)/cffl(4).lt.SERI_SMALLG)) then
-                  cuse(1) = .FALSE.
-                  cuse(3) = .FALSE.
-                  cnm = real(ix_en-1)
-                elseif ((cffl(2)/cffl(1).lt.SERI_SMALLG).and. &
+                        cuse(1) = .FALSE.
+                        cuse(3) = .FALSE.
+                        cnm = real(ix_en-1)
+                     elseif ((cffl(2)/cffl(1).lt.SERI_SMALLG).and. &
                        (cffl(4)/cffl(3).lt.SERI_SMALLG)) then
-                  cuse(2) = .FALSE.
-                  cuse(4) = .FALSE.
-                endif
-              endif
-            endif
-
-            ! get sigma for given face point
-            do kl =1,4
-              sumtmp(kl) = 0.0
-            enddo
-            ! find new min and count number of points
-            cmin = cmax
-            cmax = 0.0
-            do kl =1,pnr
-              if(cuse(kl)) then
-                rtmp  = real(ix_en-kl)
-                rtmp2 = log(cffl(kl))
-                sumtmp(1) = sumtmp(1) +rtmp2
-                sumtmp(2) = sumtmp(2) +rtmp
-                sumtmp(3) = sumtmp(3) +rtmp*rtmp
-                sumtmp(4) = sumtmp(4) +rtmp2*rtmp
-                ! find new min and count used points
-                cmin = min(cffl(kl),cmin)
-                cmax = cmax + 1.0
-              endif
-            enddo
-            ! decay rate along single row
-            stmp = (sumtmp(1)*sumtmp(2) - sumtmp(4)*cmax)/ &
+                        cuse(2) = .FALSE.
+                        cuse(4) = .FALSE.
+                     endif
+                  endif
+               endif
+
+               ! get sigma for given face point
+               do kl =1,4
+                  sumtmp(kl) = 0.0
+               enddo
+               ! find new min and count number of points
+               cmin = cmax
+               cmax = 0.0
+               do kl =1,pnr
+                  if(cuse(kl)) then
+                     rtmp  = real(ix_en-kl)
+                     rtmp2 = log(cffl(kl))
+                     sumtmp(1) = sumtmp(1) +rtmp2
+                     sumtmp(2) = sumtmp(2) +rtmp
+                     sumtmp(3) = sumtmp(3) +rtmp*rtmp
+                     sumtmp(4) = sumtmp(4) +rtmp2*rtmp
+                     ! find new min and count used points
+                     cmin = min(cffl(kl),cmin)
+                     cmax = cmax + 1.0
+                  endif
+               enddo
+               ! decay rate along single row
+               stmp = (sumtmp(1)*sumtmp(2) - sumtmp(4)*cmax)/ &
                    (sumtmp(3)*cmax - sumtmp(2)*sumtmp(2))
-            ! for averaging
-            sigt = sigt + stmp
-            nsigt = nsigt + 1
-
-            ! get error estimator depending on calculated decay rate
-            estt = 0.0
-            if (stmp.lt.SERI_SMALLS) then
-              estt = cmin
-            else
-              ! get averaged constant in front of c*exp(-sig*n)
-              clog = (sumtmp(1)+stmp*sumtmp(2))/cmax
-              ctmp = exp(clog)
-              ! average exponent
-              cave = sumtmp(1)/cmax
-              ! check quality of approximation comparing is to the constant cave
-              do kl =1,2
-                sumtmp(kl) = 0.0
-              enddo
-              do kl =1,pnr
-                if(cuse(kl)) then
-                  erlog = clog - stmp*real(ix_en-kl)
-                  sumtmp(1) = sumtmp(1)+ &
+               ! for averaging
+               sigt = sigt + stmp
+               nsigt = nsigt + 1
+
+               ! get error estimator depending on calculated decay rate
+               estt = 0.0
+               if (stmp.lt.SERI_SMALLS) then
+                  estt = cmin
+               else
+                  ! get averaged constant in front of c*exp(-sig*n)
+                  clog = (sumtmp(1)+stmp*sumtmp(2))/cmax
+                  ctmp = exp(clog)
+                  ! average exponent
+                  cave = sumtmp(1)/cmax
+                  ! check quality of approximation comparing is to the constant cave
+                  do kl =1,2
+                     sumtmp(kl) = 0.0
+                  enddo
+                  do kl =1,pnr
+                     if(cuse(kl)) then
+                        erlog = clog - stmp*real(ix_en-kl)
+                        sumtmp(1) = sumtmp(1)+ &
                             (erlog-log(cffl(kl)))**2
-                  sumtmp(2) = sumtmp(2)+ &
+                        sumtmp(2) = sumtmp(2)+ &
                   (erlog-cave)**2
-                endif
-              enddo
-              rtmp = 1.0 - sumtmp(1)/sumtmp(2)
-              if (rtmp.lt.SERI_SMALLS) then
-                estt = cmin
-              else
-                ! last coefficient is not included in error estimator
-                estt = ctmp/stmp*exp(-stmp*cnm)
-              endif
-            endif
-            ! add contribution to error estimator; variable weight
-            estx = estx + estt/(2.0*(jl-1)+1.0)*rtmp3
-          endif  ! if((cmin.gt.0.0).and.(cmax.gt.smallr))
-        enddo
+                     endif
+                  enddo
+                  rtmp = 1.0 - sumtmp(1)/sumtmp(2)
+                  if (rtmp.lt.SERI_SMALLS) then
+                     estt = cmin
+                  else
+                     ! last coefficient is not included in error estimator
+                     estt = ctmp/stmp*exp(-stmp*cnm)
+                  endif
+               endif
+               ! add contribution to error estimator; variable weight
+               estx = estx + estt/(2.0*(jl-1)+1.0)*rtmp3
+            endif  ! if((cmin.gt.0.0).and.(cmax.gt.smallr))
+         enddo
       enddo
       ! constant weight
       ! Multiplication by 4 in 2D / 8 in 3D
@@ -659,7 +657,7 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, &
       ! final everaging
       ! sigt = 2*sigma so we divide by 2
       if (nsigt.gt.0) then
-        sigx = 0.5*sigt/nsigt
+         sigx = 0.5*sigt/nsigt
       endif
 
     end associate
diff --git a/src/simulation.f90 b/src/simulation.f90
index b57f744a6bb..db36125d75e 100644
--- a/src/simulation.f90
+++ b/src/simulation.f90
@@ -44,34 +44,42 @@ module simulation
   use field, only : field_t
   use profiler
   use math, only : col2
-  use simulation_component_global, only : neko_simcomps
-  use json_module, only : json_file_t => json_file
+  use simcomp_executor, only : neko_simcomps
   use json_utils, only : json_get_or_default
+  use time_step_controller
   implicit none
   private
 
   public :: neko_solve
-  
+
 contains
 
   !> Main driver to solve a case @a C
   subroutine neko_solve(C)
-    implicit none
-    type(case_t), intent(inout) :: C
+    type(case_t), target, intent(inout) :: C
     real(kind=rp) :: t, cfl
     real(kind=dp) :: start_time_org, start_time, end_time
-    character(len=LOG_SIZE) :: log_buf    
+    character(len=LOG_SIZE) :: log_buf
     integer :: tstep, i
     character(len=:), allocatable :: restart_file
     logical :: output_at_end, found
+    ! for variable_tsteping
+    real(kind=rp) :: cfl_avrg = 0.0_rp
+    type(time_step_controller_t) :: dt_controller
 
     t = 0d0
     tstep = 0
     call neko_log%section('Starting simulation')
     write(log_buf,'(A, E15.7,A,E15.7,A)') 'T  : [', 0d0,',',C%end_time,')'
     call neko_log%message(log_buf)
-    write(log_buf,'(A, E15.7)') 'dt :  ', C%dt
-    call neko_log%message(log_buf)
+    call dt_controller%init(C%params)
+    if (.not. dt_controller%if_variable_dt) then
+       write(log_buf,'(A, E15.7)') 'dt :  ', C%dt
+       call neko_log%message(log_buf)
+    else
+       write(log_buf,'(A, E15.7)') 'CFL :  ', dt_controller%set_cfl
+       call neko_log%message(log_buf)
+    end if
 
     call C%params%get('case.restart_file', restart_file, found)
     if (found .and. len_trim(restart_file) .gt. 0) then
@@ -79,15 +87,11 @@ subroutine neko_solve(C)
        call simulation_restart(C, t)
 
        ! Restart the simulation components
-       if (allocated(neko_simcomps)) then
-         do i=1, size(neko_simcomps)
-            call neko_simcomps(i)%simcomp%restart(t)
-         end do
-       end if
+       call neko_simcomps%restart(t)
     end if
 
     !> Call stats, samplers and user-init before time loop
-    call neko_log%section('Postprocessing')       
+    call neko_log%section('Postprocessing')
     call C%q%eval(t, C%dt, tstep)
     call C%s%sample(t, tstep)
 
@@ -97,13 +101,20 @@ subroutine neko_solve(C)
     call neko_log%newline()
 
     call profiler_start
-
+    cfl = C%fluid%compute_cfl(C%dt)
     start_time_org = MPI_WTIME()
+
     do while (t .lt. C%end_time .and. (.not. jobctrl_time_limit()))
        call profiler_start_region('Time-Step')
        tstep = tstep + 1
        start_time = MPI_WTIME()
+       if (dt_controller%dt_last_change .eq. 0) then
+          cfl_avrg = cfl
+       end if
+       call dt_controller%set_dt(C%dt, cfl, cfl_avrg, tstep)
+       !calculate the cfl after the possibly varied dt
        cfl = C%fluid%compute_cfl(C%dt)
+
        call neko_log%status(t, C%end_time)
        write(log_buf, '(A,I6)') 'Time-step: ', tstep
        call neko_log%message(log_buf)
@@ -111,12 +122,10 @@ subroutine neko_solve(C)
 
        write(log_buf, '(A,E15.7,1x,A,E15.7)') 'CFL:', cfl, 'dt:', C%dt
        call neko_log%message(log_buf)
-
-       ! Fluid step 
        call simulation_settime(t, C%dt, C%ext_bdf, C%tlag, C%dtlag, tstep)
 
-       call neko_log%section('Fluid')       
-       call C%fluid%step(t, tstep, C%dt, C%ext_bdf)
+       call neko_log%section('Fluid')
+       call C%fluid%step(t, tstep, C%dt, C%ext_bdf, dt_controller)
        end_time = MPI_WTIME()
        write(log_buf, '(A,E15.7,A,E15.7)') &
             'Elapsed time (s):', end_time-start_time_org, ' Step time:', &
@@ -126,22 +135,18 @@ subroutine neko_solve(C)
        ! Scalar step
        if (allocated(C%scalar)) then
           start_time = MPI_WTIME()
-          call neko_log%section('Scalar')       
-          call C%scalar%step(t, tstep, C%dt, C%ext_bdf)
+          call neko_log%section('Scalar')
+          call C%scalar%step(t, tstep, C%dt, C%ext_bdf, dt_controller)
           end_time = MPI_WTIME()
           write(log_buf, '(A,E15.7,A,E15.7)') &
                'Elapsed time (s):', end_time-start_time_org, ' Step time:', &
                end_time-start_time
           call neko_log%end_section(log_buf)
-       end if                 
+       end if
 
-       call neko_log%section('Postprocessing')       
+       call neko_log%section('Postprocessing')
        ! Execute all simulation components
-       if (allocated(neko_simcomps)) then
-         do i=1, size(neko_simcomps)
-            call neko_simcomps(i)%simcomp%compute(t, tstep)
-         end do
-       end if
+       call neko_simcomps%compute(t, tstep)
 
        call C%q%eval(t, C%dt, tstep)
        call C%s%sample(t, tstep)
@@ -152,22 +157,21 @@ subroutine neko_solve(C)
                                       C%material_properties%cp, &
                                       C%material_properties%lambda, &
                                       C%params)
-         
+
        call C%usr%user_check(t, tstep,&
             C%fluid%u, C%fluid%v, C%fluid%w, C%fluid%p, C%fluid%c_Xh, C%params)
 
        call neko_log%end_section()
-       
+
        call neko_log%end()
        call profiler_end_region
     end do
-
     call profiler_stop
 
     call json_get_or_default(C%params, 'case.output_at_end',&
                              output_at_end, .true.)
     call C%s%sample(t, tstep, output_at_end)
-    
+
     if (.not. (output_at_end) .and. t .lt. C%end_time) then
        call simulation_joblimit_chkp(C, t)
     end if
@@ -175,7 +179,7 @@ subroutine neko_solve(C)
     call C%usr%user_finalize_modules(t, C%params)
 
     call neko_log%end_section('Normal end.')
-    
+
   end subroutine neko_solve
 
   subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step)
@@ -186,7 +190,7 @@ subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step)
     real(kind=rp), dimension(10) :: dtlag
     integer, intent(in) :: step
     integer :: i
-    
+
 
     do i = 10, 2, -1
        tlag(i) = tlag(i-1)
@@ -203,7 +207,7 @@ subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step)
     t = t + dt
 
     call ext_bdf%set_coeffs(dtlag)
-    
+
   end subroutine simulation_settime
 
   !> Restart a case @a C from a given checkpoint
@@ -213,7 +217,7 @@ subroutine simulation_restart(C, t)
     real(kind=rp), intent(inout) :: t
     integer :: i
     type(file_t) :: chkpf, previous_meshf
-    character(len=LOG_SIZE) :: log_buf   
+    character(len=LOG_SIZE) :: log_buf
     character(len=:), allocatable :: restart_file
     character(len=:), allocatable :: restart_mesh_file
     real(kind=rp) :: tol
@@ -233,20 +237,17 @@ subroutine simulation_restart(C, t)
 
     if (found) C%fluid%chkp%mesh2mesh_tol = tol
     
-    C%dtlag(:) = C%dt 
-    C%tlag(:) = t
-    do i = 1, size(C%tlag)
-       C%tlag(i) = t - i*C%dtlag(i)
-    end do
-
     chkpf = file_t(trim(restart_file))
     call chkpf%read(C%fluid%chkp)
+    C%dtlag = C%fluid%chkp%dtlag
+    C%tlag = C%fluid%chkp%tlag
+
     !Free the previous mesh, dont need it anymore
     call C%fluid%chkp%previous_mesh%free()
     do i = 1, size(C%dtlag)
        call C%ext_bdf%set_coeffs(C%dtlag)
     end do
-    
+
     call C%fluid%restart(C%dtlag, C%tlag)
     if (allocated(C%scalar)) call C%scalar%restart( C%dtlag, C%tlag)
 
@@ -259,7 +260,6 @@ subroutine simulation_restart(C, t)
     call neko_log%message(log_buf)
     call neko_log%end_section()
 
-    
     call C%s%set_counter(t)
   end subroutine simulation_restart
 
@@ -275,8 +275,9 @@ subroutine simulation_joblimit_chkp(C, t)
     call chkpf%write(C%fluid%chkp, t)
     write(log_buf, '(A)') '! saving checkpoint >>>'
     call neko_log%message(log_buf)
-    
+
   end subroutine simulation_joblimit_chkp
 
 end module simulation
 
+
diff --git a/src/simulation_components/field_writer.f90 b/src/simulation_components/field_writer.f90
new file mode 100644
index 00000000000..5eb34654fb0
--- /dev/null
+++ b/src/simulation_components/field_writer.f90
@@ -0,0 +1,160 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!> Implements the `field_writer_t` type.
+
+module field_writer
+  use num_types, only : rp, dp, sp
+  use json_module, only : json_file
+  use simulation_component, only : simulation_component_t
+  use field_registry, only : neko_field_registry
+  use field, only : field_t
+  use operators, only : curl
+  use case, only : case_t
+  use fld_file_output, only : fld_file_output_t
+  use json_utils, only : json_get, json_get_or_default
+  implicit none
+  private
+
+  !> A simulation component that writes a 3d field to a file.
+  type, public, extends(simulation_component_t) :: field_writer_t
+     !> Output writer.
+     type(fld_file_output_t), private :: output
+
+   contains
+     !> Constructor from json, wrapping the actual constructor.
+     procedure, pass(this) :: init => field_writer_init_from_json
+     !> Actual constructor.
+     procedure, pass(this) :: init_from_attributes => &
+        field_writer_init_from_attributes
+     !> Destructor.
+     procedure, pass(this) :: free => field_writer_free
+     !> Here to compy with the interface, does nothing.
+     procedure, pass(this) :: compute_ => field_writer_compute
+  end type field_writer_t
+
+contains
+
+  !> Constructor from json.
+  !> @param json JSON object with the parameters.
+  !! @param case The case object.
+  subroutine field_writer_init_from_json(this, json, case)
+    class(field_writer_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    class(case_t), intent(inout), target :: case
+    character(len=:), allocatable :: filename
+    character(len=:), allocatable :: precision
+    character(len=20), allocatable :: fields(:)
+
+    call this%init_base(json, case)
+    call json_get(json, "fields", fields)
+
+    if (json%valid_path("output_filename")) then
+       call json_get(json, "output_filename", filename)
+       if (json%valid_path("output_precision")) then
+           call json_get(json, "output_precision", precision)
+           if (precision == "double") then
+              call field_writer_init_from_attributes(this, fields, filename, dp)
+           else
+              call field_writer_init_from_attributes(this, fields, filename, sp)
+           end if
+       else
+           call field_writer_init_from_attributes(this, fields, filename)
+       end if
+    else
+       call field_writer_init_from_attributes(this, fields)
+    end if
+  end subroutine field_writer_init_from_json
+
+  !> Actual constructor.
+  !! @param fields Array of field names to be sampled.
+  !! @param filename The name of the file save the fields to. Optional, if not
+  !! provided, fields are added to the main output file.
+  !! @param precision The real precision of the output data. Optional, defaults
+  !! to single precision.
+  subroutine field_writer_init_from_attributes(this, fields, filename, precision)
+    class(field_writer_t), intent(inout) :: this
+    character(len=20), allocatable, intent(in) :: fields(:)
+    character(len=*), intent(in), optional :: filename
+    integer, intent(in), optional :: precision
+    character(len=20) :: fieldi
+    integer :: i
+
+    ! Regsiter fields if they don't exist.
+    do i=1, size(fields)
+      fieldi = trim(fields(i))
+      call neko_field_registry%add_field(this%case%fluid%dm_Xh, fieldi,&
+                                         ignore_existing=.true.)
+    end do
+
+    if (present(filename)) then
+       if (present(precision)) then
+          call this%output%init(precision, filename, size(fields))
+       else
+          call this%output%init(sp, filename, size(fields))
+       end if
+       do i=1, size(fields)
+          fieldi = trim(fields(i))
+          this%output%fields%fields(i)%f => &
+            neko_field_registry%get_field(fieldi)
+       end do
+
+       call this%case%s%add(this%output, &
+                            this%output_controller%control_value, &
+                            this%output_controller%control_mode)
+    else
+      do i=1, size(fields)
+         fieldi = trim(fields(i))
+         call this%case%f_out%fluid%append(neko_field_registry%get_field(fieldi))
+      end do
+    end if
+
+  end subroutine field_writer_init_from_attributes
+
+  !> Destructor.
+  subroutine field_writer_free(this)
+    class(field_writer_t), intent(inout) :: this
+    call this%free_base()
+  end subroutine field_writer_free
+
+  !> Here to comply with the interface, does nothing.
+  !! @param t The time value.
+  !! @param tstep The current time-step
+  subroutine field_writer_compute(this, t, tstep)
+    class(field_writer_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+  end subroutine field_writer_compute
+
+end module field_writer
diff --git a/src/simulation_components/lambda2.f90 b/src/simulation_components/lambda2.f90
index 8050a9a3c53..349da645b2f 100644
--- a/src/simulation_components/lambda2.f90
+++ b/src/simulation_components/lambda2.f90
@@ -39,10 +39,10 @@ module lambda2
   use json_module, only : json_file
   use simulation_component, only : simulation_component_t
   use field_registry, only : neko_field_registry
-  use field, only : field_t, field_ptr_t
-  use field_list, only : field_list_t
+  use field, only : field_t
   use operators, only : lambda2op
   use case, only : case_t
+  use field_writer, only : field_writer_t
   use device
   implicit none
   private
@@ -62,6 +62,9 @@ module lambda2
      type(field_t) :: temp1
      type(field_t) :: temp2
 
+     !> Output writer.
+     type(field_writer_t) :: writer
+
    contains
      !> Constructor from json.
      procedure, pass(this) :: init => lambda2_init_from_json
@@ -80,9 +83,16 @@ module lambda2
   subroutine lambda2_init_from_json(this, json, case)
     class(lambda2_t), intent(inout) :: this
     type(json_file), intent(inout) :: json
-    class(case_t), intent(inout), target ::case 
+    class(case_t), intent(inout), target ::case
+    character(len=20) :: fields(1)
+
+    ! Add fields keyword to the json so that the field_writer picks it up.
+    ! Will also add fields to the registry.
+    fields(1) = "lambda2"
+    call json%add("fields", fields)
 
     call this%init_base(json, case)
+    call this%writer%init(json, case)
 
     call lambda2_init_from_attributes(this)
   end subroutine lambda2_init_from_json
@@ -91,17 +101,10 @@ end subroutine lambda2_init_from_json
   subroutine lambda2_init_from_attributes(this)
     class(lambda2_t), intent(inout) :: this
 
-    this%u => neko_field_registry%get_field_by_name("u")
-    this%v => neko_field_registry%get_field_by_name("v")
-    this%w => neko_field_registry%get_field_by_name("w")
-
-    if (.not. neko_field_registry%field_exists("lambda2")) then
-       call neko_field_registry%add_field(this%u%dof, "lambda2")
-    end if
-    this%lambda2 => neko_field_registry%get_field_by_name("lambda2")
-    !Store lambda2 in the next free field in the fluid output.
-    !If running without scalar this means the temperature field.
-    call this%case%f_out%fluid%append(this%lambda2)
+    this%u => neko_field_registry%get_field("u")
+    this%v => neko_field_registry%get_field("v")
+    this%w => neko_field_registry%get_field("w")
+    this%lambda2 => neko_field_registry%get_field("lambda2")
   end subroutine lambda2_init_from_attributes
 
   !> Destructor.
diff --git a/src/simulation_components/les_simcomp.f90 b/src/simulation_components/les_simcomp.f90
new file mode 100644
index 00000000000..92886be5fe7
--- /dev/null
+++ b/src/simulation_components/les_simcomp.f90
@@ -0,0 +1,107 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!> Implements the `les_simcomp_t` type.
+
+module les_simcomp
+  use num_types, only : rp
+  use json_module, only : json_file
+  use simulation_component, only : simulation_component_t
+  use field_registry, only : neko_field_registry
+  use field, only : field_t
+  use operators, only : curl
+  use case, only : case_t
+  use les_model, only : les_model_t
+  use les_model_fctry, only : les_model_factory
+  use json_utils, only : json_get
+  implicit none
+  private
+
+  !> A simulation component that drives the computation of the SGS
+  !! viscosity.
+   type, public, extends(simulation_component_t) :: les_simcomp_t
+     !> The LES model.
+     class(les_model_t), allocatable :: les_model
+   contains
+     !> Constructor from json, wrapping the actual constructor.
+     procedure, pass(this) :: init => les_simcomp_init_from_json
+     !> Destructor.
+     procedure, pass(this) :: free => les_simcomp_free
+     !> Compute the les_simcomp field.
+     procedure, pass(this) :: compute_ => les_simcomp_compute
+  end type les_simcomp_t
+
+contains
+
+  !> Constructor from json.
+  subroutine les_simcomp_init_from_json(this, json, case)
+    class(les_simcomp_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    class(case_t), intent(inout), target :: case
+    character(len=:), allocatable :: name
+
+    call this%free()
+
+    call json_get(json, "model", name)
+
+    call this%init_base(json, case)
+
+    call les_model_factory(this%les_model, name, case%fluid%dm_Xh,&
+                           case%fluid%c_Xh, json)
+
+  end subroutine les_simcomp_init_from_json
+
+  !> Destructor.
+  subroutine les_simcomp_free(this)
+    class(les_simcomp_t), intent(inout) :: this
+    call this%free_base()
+
+    if (allocated(this%les_model)) then
+      call this%les_model%free()
+      deallocate(this%les_model)
+    end if
+  end subroutine les_simcomp_free
+
+  !> Compute the les_simcomp field.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  subroutine les_simcomp_compute(this, t, tstep)
+    class(les_simcomp_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    call this%les_model%compute(t, tstep)
+
+  end subroutine les_simcomp_compute
+
+end module les_simcomp
diff --git a/src/simulation_components/probes.F90 b/src/simulation_components/probes.F90
index eb5cbb1c799..ef88980985b 100644
--- a/src/simulation_components/probes.F90
+++ b/src/simulation_components/probes.F90
@@ -38,8 +38,7 @@ module probes
   use num_types, only: rp
   use matrix, only: matrix_t
   use logger, only: neko_log, LOG_SIZE
-  use utils, only: neko_error, neko_warning
-  use field, only: field_t
+  use utils, only: neko_error
   use field_list, only: field_list_t
   use simulation_component
   use field_registry, only : neko_field_registry
@@ -47,7 +46,6 @@ module probes
   use json_module, only : json_file
   use json_utils, only : json_get
   use global_interpolation, only: global_interpolation_t
-  use math, only: rzero, copy
   use tensor, only: trsp
   use comm
   use device
@@ -58,7 +56,7 @@ module probes
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   type, public, extends(simulation_component_t) :: probes_t
      !> Number of output fields
      integer :: n_fields = 0
@@ -86,7 +84,7 @@ module probes
      real(kind=rp), allocatable :: global_output_values(:,:)
      !> Output variables
      type(file_t) :: fout
-     type(matrix_t) :: mat_out     
+     type(matrix_t) :: mat_out
    contains
      !> Initialize from json
      procedure, pass(this) :: init => probes_init_from_json
@@ -99,16 +97,16 @@ module probes
      procedure, pass(this) :: setup_offset => probes_setup_offset
      !> Interpolate each probe from its `r,s,t` coordinates.
      procedure, pass(this) :: compute_ => probes_evaluate_and_write
-     
+
   end type probes_t
-  
+
 contains
-  
+
   !> Constructor from json.
   subroutine probes_init_from_json(this, json, case)
     class(probes_t), intent(inout) :: this
     type(json_file), intent(inout) :: json
-    class(case_t), intent(inout), target :: case 
+    class(case_t), intent(inout), target :: case
     real(kind=rp), allocatable :: xyz(:,:)
     character(len=:), allocatable  :: output_file
     character(len=:), allocatable  :: points_file
@@ -119,18 +117,18 @@ subroutine probes_init_from_json(this, json, case)
 
     !> Read from case file
     call json%info('fields', n_children=this%n_fields)
-    call json_get(json, 'fields', this%which_fields) 
+    call json_get(json, 'fields', this%which_fields)
     !> Should be extended to not only csv
     !! but also be possible to define in userfile for example
     call json_get(json, 'points_file', points_file)
-    call json_get(json, 'output_file', output_file) 
+    call json_get(json, 'output_file', output_file)
 
     allocate(this%sampled_fields%fields(this%n_fields))
     do i = 1, this%n_fields
        this%sampled_fields%fields(i)%f => neko_field_registry%get_field(&
                                           trim(this%which_fields(i)))
     end do
-    !> This is distributed as to make it similar to parallel file 
+    !> This is distributed as to make it similar to parallel file
     !! formats latera
     !! Reads all into rank 0
     call read_probe_locations(this, this%xyz, this%n_local_probes, &
@@ -149,6 +147,7 @@ subroutine probes_init_from_attributes(this, dof, output_file)
     class(probes_t), intent(inout) :: this
     type(dofmap_t), intent(in) :: dof
     character(len=:), allocatable, intent(inout)  :: output_file
+    character(len=1024) :: header_line
     real(kind=rp), allocatable :: global_output_coords(:,:)
     integer :: i, ierr
     type(matrix_t) :: mat_coords
@@ -165,7 +164,7 @@ subroutine probes_init_from_attributes(this, dof, output_file)
     allocate(this%out_values(this%n_local_probes,this%n_fields))
     allocate(this%out_values_d(this%n_fields))
     allocate(this%out_vals_trsp(this%n_fields,this%n_local_probes))
-    
+
     if (NEKO_BCKND_DEVICE .eq. 1) then
        do i = 1, this%n_fields
           this%out_values_d(i) = c_null_ptr
@@ -180,34 +179,42 @@ subroutine probes_init_from_attributes(this, dof, output_file)
     this%fout = file_t(trim(output_file))
 
     select type(ft => this%fout%file_type)
-      type is (csv_file_t)
-          !> Necessary for not-parallel csv format...
-          !! offsets and n points per pe
-          !! Needed at root for sequential csv i/o
-          allocate(this%n_local_probes_tot(pe_size))
-          allocate(this%n_local_probes_tot_offset(pe_size))
-          call this%setup_offset()
-          if (pe_rank .eq. 0) then
-             allocate(global_output_coords(3,&
+    type is (csv_file_t)
+
+       ! Build the header
+       write(header_line, '(I0,A,I0)') this%n_global_probes, ",", this%n_fields
+       do i = 1, this%n_fields
+          header_line = trim(header_line) // "," // trim(this%which_fields(i))
+       end do
+       call this%fout%set_header(header_line)
+
+       !> Necessary for not-parallel csv format...
+       !! offsets and n points per pe
+       !! Needed at root for sequential csv i/o
+       allocate(this%n_local_probes_tot(pe_size))
+       allocate(this%n_local_probes_tot_offset(pe_size))
+       call this%setup_offset()
+       if (pe_rank .eq. 0) then
+          allocate(global_output_coords(3,&
                       this%n_global_probes))
-             call this%mat_out%init(this%n_global_probes, this%n_fields)
-             allocate(this%global_output_values(this%n_fields,&
+          call this%mat_out%init(this%n_global_probes, this%n_fields)
+          allocate(this%global_output_values(this%n_fields,&
                       this%n_global_probes))
-             call mat_coords%init(this%n_global_probes,3)
-          end if
-          call MPI_Gatherv(this%xyz, 3*this%n_local_probes,&
+          call mat_coords%init(this%n_global_probes,3)
+       end if
+       call MPI_Gatherv(this%xyz, 3*this%n_local_probes,&
                            MPI_DOUBLE_PRECISION, global_output_coords,&
                            3*this%n_local_probes_tot,&
                            3*this%n_local_probes_tot_offset,&
-                           MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) 
-          if (pe_rank .eq. 0) then
-             call trsp(mat_coords%x, this%n_global_probes,&
+                           MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr)
+       if (pe_rank .eq. 0) then
+          call trsp(mat_coords%x, this%n_global_probes,&
                        global_output_coords, 3)
-             !! Write the data to the file
-             call this%fout%write(mat_coords)
-          end if
+          !! Write the data to the file
+          call this%fout%write(mat_coords)
+       end if
     class default
-        call neko_error("Invalid data. Expected csv_file_t.")
+       call neko_error("Invalid data. Expected csv_file_t.")
     end select
 
   end subroutine probes_init_from_attributes
@@ -223,15 +230,15 @@ subroutine probes_free(this)
     if (allocated(this%out_values)) then
        deallocate(this%out_values)
     end if
-    
+
     if (allocated(this%out_vals_trsp)) then
        deallocate(this%out_vals_trsp)
     end if
-    
+
     if (allocated(this%sampled_fields%fields)) then
        deallocate(this%sampled_fields%fields)
     end if
-    
+
     if (allocated(this%n_local_probes_tot)) then
        deallocate(this%n_local_probes_tot)
     end if
@@ -239,7 +246,7 @@ subroutine probes_free(this)
     if (allocated(this%n_local_probes_tot_offset)) then
        deallocate(this%n_local_probes_tot_offset)
     end if
-    
+
     if (allocated(this%global_output_values)) then
        deallocate(this%global_output_values)
     end if
@@ -272,7 +279,7 @@ subroutine probes_show(this)
     end do
     call neko_log%end_section()
     call neko_log%newline()
-    
+
   end subroutine probes_show
 
   !> Show the status of processor/element owner and error code for each point
@@ -326,7 +333,7 @@ subroutine probes_evaluate_and_write(this, t, tstep)
 
     !> Check controller to determine if we must write
 
-       
+
     do i = 1,this%n_fields
        call this%global_interp%evaluate(this%out_values(:,i), &
                                         this%sampled_fields%fields(i)%f%x)
@@ -349,13 +356,13 @@ subroutine probes_evaluate_and_write(this, t, tstep)
                            MPI_DOUBLE_PRECISION, this%global_output_values,&
                            this%n_fields*this%n_local_probes_tot,&
                            this%n_fields*this%n_local_probes_tot_offset,&
-                           MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) 
+                           MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr)
           if (pe_rank .eq. 0) then
              call trsp(this%mat_out%x, this%n_global_probes, &
                        this%global_output_values, this%n_fields)
              call this%fout%write(this%mat_out, t)
           end if
-       else 
+       else
           call neko_error('probes sim comp, parallel io need implementation')
        end if
 
@@ -380,17 +387,17 @@ subroutine read_probe_locations(this, xyz, n_local_probes, n_global_probes, poin
     file_in = file_t(trim(points_file))
     !> Reads on rank 0 and distributes the probes across the different ranks
     select type(ft => file_in%file_type)
-      type is (csv_file_t)
-          call read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, ft)
-          this%seq_io = .true.
-      class default
-          call neko_error("Invalid data. Expected csv_file_t.")
+    type is (csv_file_t)
+       call read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, ft)
+       this%seq_io = .true.
+    class default
+       call neko_error("Invalid data. Expected csv_file_t.")
     end select
 
     !> Close the file
     call file_free(file_in)
 
-  end subroutine read_probe_locations 
+  end subroutine read_probe_locations
 
   !> Read and initialize the number of probes from a `csv` input file
   !! @param xyz xyz coordinates of the probes
@@ -421,7 +428,7 @@ subroutine read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, f)
        call mat_in2%init(3,this%n_global_probes)
        call f%read(mat_in)
        call trsp(xyz, 3, mat_in%x, this%n_global_probes)
-    else 
+    else
        n_local_probes = 0
        this%n_local_probes = 0
        allocate(xyz(3,this%n_local_probes))
diff --git a/src/simulation_components/simcomp_executor.f90 b/src/simulation_components/simcomp_executor.f90
new file mode 100644
index 00000000000..ea231c2f311
--- /dev/null
+++ b/src/simulation_components/simcomp_executor.f90
@@ -0,0 +1,182 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!> Contains the `simcomp_executor_t` type.
+module simcomp_executor
+  use num_types, only : rp
+  use simulation_component, only : simulation_component_wrapper_t
+  use simulation_component_fctry, only : simulation_component_factory
+  use json_module, only : json_file, json_core, json_value
+  use json_utils, only : json_get, json_get_or_default, json_extract_item
+  use case, only : case_t
+  implicit none
+  private
+
+  !> Singleton type that serves as a driver for the simulation components.
+  !! Stores all the components in the case and provides an interface matching
+  !! that of a single simcomp, which executes the corresponding routines for
+  !! each stored simcomp.
+  !! The execution order is based on the order property of each simcomp.
+  !! By default, the order is by the order of apparence in the case file.
+  type, public :: simcomp_executor_t
+     !> The simcomps.
+     class(simulation_component_wrapper_t), allocatable :: simcomps(:)
+     !> Index array defining the order of execution, i.e. simcomps(order(1)) is
+     !! first to execute, and so on.
+     integer, allocatable ::  order(:)
+   contains
+     !> Constructor.
+     procedure, pass(this) :: init => simcomp_executor_init
+     !> Destructor.
+     procedure, pass(this) :: free => simcomp_executor_free
+     !> Execute compute_ for all simcomps.
+     procedure, pass(this) :: compute => simcomp_executor_compute
+     !> Execute restart for all simcomps.
+     procedure, pass(this) :: restart=> simcomp_executor_restart
+
+  end type simcomp_executor_t
+
+  !> Global variable for the simulation component driver.
+  type(simcomp_executor_t), public :: neko_simcomps
+
+contains
+
+  !> Constructor.
+  subroutine simcomp_executor_init(this, case)
+    class(simcomp_executor_t), intent(inout) :: this
+    type(case_t), target, intent(inout) :: case
+    integer :: n_simcomps, i
+    type(json_core) :: core
+    type(json_value), pointer :: simcomp_object
+    type(json_file) :: comp_subdict
+    logical :: found
+    ! Help array for finding minimal values
+    logical, allocatable :: mask(:)
+    ! The order value for each simcomp in order of appearance in the case file.
+    integer, allocatable :: read_order(:)
+    ! Location of the min value
+    integer :: loc(1)
+
+    call this%free()
+
+    if (case%params%valid_path('case.simulation_components')) then
+
+       call case%params%info('case.simulation_components', n_children=n_simcomps)
+       allocate(this%simcomps(n_simcomps))
+       allocate(this%order(n_simcomps))
+       allocate(read_order(n_simcomps))
+       allocate(mask(n_simcomps))
+       mask = .true.
+
+       call case%params%get_core(core)
+       call case%params%get('case.simulation_components', simcomp_object, found)
+
+       ! We need a separate loop to figure out the order, so that we can
+       ! apply the order to the initialization as well.
+       do i=1, n_simcomps
+          ! Create a new json containing just the subdict for this simcomp
+          call json_extract_item(core, simcomp_object, i, comp_subdict)
+          call json_get_or_default(comp_subdict, "order", read_order(i), i)
+       end do
+
+       ! Figure out the execution order using a poor man's argsort.
+       ! Searches for the location of the min value, each time masking out the
+       ! found location prior to the next search.
+       do i= 1, n_simcomps
+         loc = minloc(read_order, mask=mask)
+         this%order(i) = loc(1)
+         mask(loc) = .false.
+       end do
+
+       ! Init in the determined order.
+       do i=1, n_simcomps
+          call json_extract_item(core, simcomp_object, this%order(i),&
+                                    comp_subdict)
+          ! Have to add, the simcomp constructor expects it.
+          if (.not. comp_subdict%valid_path("order")) then
+             call comp_subdict%add("order", this%order(i))
+          end if
+          call simulation_component_factory(this%simcomps(i)%simcomp, &
+                                            comp_subdict, case)
+       end do
+    end if
+  end subroutine simcomp_executor_init
+
+  !> Destructor.
+  subroutine simcomp_executor_free(this)
+    class(simcomp_executor_t), intent(inout) :: this
+    integer :: i
+
+    if (allocated(this%order)) deallocate(this%order)
+
+    if (allocated(this%simcomps)) then
+       do i=1, size(this%simcomps)
+          call this%simcomps(i)%simcomp%free
+       end do
+       deallocate(this%simcomps)
+    end if
+  end subroutine simcomp_executor_free
+
+  !> Execute compute_ for all simcomps.
+  !! @param t The time value.
+  !! @param tstep The timestep number.
+  subroutine simcomp_executor_compute(this, t, tstep)
+    class(simcomp_executor_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    integer :: i
+
+    if (allocated(this%simcomps)) then
+       do i=1, size(this%simcomps)
+          call this%simcomps(this%order(i))%simcomp%compute(t, tstep)
+       end do
+    end if
+
+  end subroutine simcomp_executor_compute
+
+  !> Execute restart for all simcomps.
+  !! @param t The time value.
+  subroutine simcomp_executor_restart(this, t)
+    class(simcomp_executor_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer :: i
+
+    if (allocated(this%simcomps)) then
+       do i=1, size(this%simcomps)
+          call this%simcomps(this%order(i))%simcomp%restart(t)
+       end do
+    end if
+
+  end subroutine simcomp_executor_restart
+
+end module simcomp_executor
diff --git a/src/simulation_components/simulation_component.f90 b/src/simulation_components/simulation_component.f90
index fe2b0468834..102f14a5b9b 100644
--- a/src/simulation_components/simulation_component.f90
+++ b/src/simulation_components/simulation_component.f90
@@ -40,10 +40,10 @@ module simulation_component
   use json_module, only : json_file
   use case, only : case_t
   use time_based_controller, only : time_based_controller_t
-  use json_utils, only : json_get_or_default
+  use json_utils, only : json_get_or_default, json_get
   implicit none
   private
-  
+
   !> Base abstract class for simulation components.
   type, abstract, public :: simulation_component_t
      !> Pointer to the simulation case.
@@ -52,6 +52,8 @@ module simulation_component
      type(time_based_controller_t) :: compute_controller
      !> Controller for when to do output.
      type(time_based_controller_t) :: output_controller
+     !> The execution order, lowest excutes first.
+     integer :: order
    contains
      !> Constructor for the simulation_component_t (base) class.
      procedure, pass(this) :: init_base => simulation_component_init_base
@@ -70,58 +72,59 @@ module simulation_component
      !> The main function to be executed during the run.
      procedure(simulation_component_compute), pass(this), deferred :: compute_
   end type simulation_component_t
-  
+
   !> A helper type that is needed to have an array of polymorphic objects
   type, public :: simulation_component_wrapper_t
-    class(simulation_component_t), allocatable :: simcomp
+     class(simulation_component_t), allocatable :: simcomp
   end type simulation_component_wrapper_t
 
-  
+
   abstract interface
      !> The common constructor using a JSON dictionary.
      !! @param json The JSON with properties.
-     !! @param case The case_t object. 
-     subroutine simulation_component_init(this, json, case)  
+     !! @param case The case_t object.
+     subroutine simulation_component_init(this, json, case)
        import simulation_component_t, json_file, case_t
        class(simulation_component_t), intent(inout) :: this
        type(json_file), intent(inout) :: json
        class(case_t), intent(inout), target :: case
-     end subroutine
+     end subroutine simulation_component_init
   end interface
 
   abstract interface
      !> Destructor.
-     subroutine simulation_component_free(this)  
+     subroutine simulation_component_free(this)
        import simulation_component_t
        class(simulation_component_t), intent(inout) :: this
-     end subroutine
+     end subroutine simulation_component_free
   end interface
 
   abstract interface
      !> The main function to be executed during the run.
      !! @param t The time value.
      !! @param tstep The current time-step
-     subroutine simulation_component_compute(this, t, tstep)  
+     subroutine simulation_component_compute(this, t, tstep)
        import simulation_component_t, rp
        class(simulation_component_t), intent(inout) :: this
        real(kind=rp), intent(in) :: t
        integer, intent(in) :: tstep
-     end subroutine
+     end subroutine simulation_component_compute
   end interface
 
 contains
   !> Constructor for the `simulation_component_t` (base) class.
-  subroutine simulation_component_init_base(this, json, case)  
+  subroutine simulation_component_init_base(this, json, case)
     class(simulation_component_t), intent(inout) :: this
     type(json_file), intent(inout) :: json
     class(case_t), intent(inout), target :: case
     character(len=:), allocatable :: compute_control, output_control
     real(kind=rp) :: compute_value, output_value
+    integer :: order
 
     this%case => case
     call json_get_or_default(json, "compute_control", compute_control, &
                              "tsteps")
-    call json_get_or_default(json, "compute_value", compute_value, 1.0_rp) 
+    call json_get_or_default(json, "compute_value", compute_value, 1.0_rp)
 
     ! We default to output whenever we execute
     call json_get_or_default(json, "output_control", output_control, &
@@ -129,6 +132,17 @@ subroutine simulation_component_init_base(this, json, case)
     call json_get_or_default(json, "output_value", output_value, &
                              compute_value)
 
+
+    if (output_control == "global") then
+       call json_get(this%case%params, 'case.fluid.output_control', &
+                     output_control)
+       call json_get(this%case%params, 'case.fluid.output_value', &
+                     output_value)
+    end if
+
+    call json_get(json, "order", order)
+    this%order = order
+
     call this%compute_controller%init(case%end_time, compute_control, &
                                         compute_value)
     call this%output_controller%init(case%end_time, output_control, &
@@ -137,7 +151,7 @@ subroutine simulation_component_init_base(this, json, case)
   end subroutine simulation_component_init_base
 
   !> Destructor for the `simulation_component_t` (base) class.
-  subroutine simulation_component_free_base(this)  
+  subroutine simulation_component_free_base(this)
     class(simulation_component_t), intent(inout) :: this
 
     nullify(this%case)
@@ -147,21 +161,21 @@ end subroutine simulation_component_free_base
   !! Serves as the public interface.
   !! @param t The time value.
   !! @param tstep The current time-step
-  subroutine simulation_component_compute_wrapper(this, t, tstep)  
+  subroutine simulation_component_compute_wrapper(this, t, tstep)
     class(simulation_component_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
 
     if (this%compute_controller%check(t, tstep)) then
-      call this%compute_(t, tstep)
-      call this%compute_controller%register_execution()
+       call this%compute_(t, tstep)
+       call this%compute_controller%register_execution()
     end if
   end subroutine simulation_component_compute_wrapper
 
   !> Wrapper for calling `set_counter_` based for the controllers.
   !! Serves as the public interface.
   !! @param t The time value.
-  subroutine simulation_component_restart_wrapper(this, t)  
+  subroutine simulation_component_restart_wrapper(this, t)
     class(simulation_component_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
 
@@ -169,5 +183,5 @@ subroutine simulation_component_restart_wrapper(this, t)
     call this%output_controller%set_counter(t)
 
   end subroutine simulation_component_restart_wrapper
-  
+
 end module simulation_component
diff --git a/src/simulation_components/simulation_component_factory.f90 b/src/simulation_components/simulation_component_fctry.f90
similarity index 67%
rename from src/simulation_components/simulation_component_factory.f90
rename to src/simulation_components/simulation_component_fctry.f90
index 2c5028280fc..04f67251fb2 100644
--- a/src/simulation_components/simulation_component_factory.f90
+++ b/src/simulation_components/simulation_component_fctry.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2023, The Neko Authors
+! Copyright (c) 2024, The Neko Authors
 ! All rights reserved.
 !
 ! Redistribution and use in source and binary forms, with or without
@@ -37,38 +37,47 @@ module simulation_component_fctry
   use vorticity, only : vorticity_t
   use lambda2, only : lambda2_t
   use probes, only : probes_t
+  use les_simcomp, only : les_simcomp_t
   use json_module, only : json_file
   use case, only : case_t
   use json_utils, only : json_get
+  use logger, only : neko_log
+  use field_writer, only : field_writer_t
   implicit none
   private
-  
+
   public :: simulation_component_factory
-  
-  contains
+
+contains
 
   !> Simulation component factory. Both constructs and initializes the object.
   !! @param json JSON object initializing the simulation component.
   subroutine simulation_component_factory(simcomp, json, case)
-       class(simulation_component_t), allocatable, intent(inout) :: simcomp
-       type(json_file), intent(inout) :: json
-       class(case_t), intent(inout), target :: case
-       character(len=:), allocatable :: simcomp_type
-              
-       call json_get(json, "type", simcomp_type)
+    class(simulation_component_t), allocatable, intent(inout) :: simcomp
+    type(json_file), intent(inout) :: json
+    class(case_t), intent(inout), target :: case
+    character(len=:), allocatable :: simcomp_type
+
+    call json_get(json, "type", simcomp_type)
+
+    if (trim(simcomp_type) .eq. "vorticity") then
+       allocate(vorticity_t::simcomp)
+    else if (trim(simcomp_type) .eq. "lambda2") then
+       allocate(lambda2_t::simcomp)
+    else if (trim(simcomp_type) .eq. "probes") then
+       allocate(probes_t::simcomp)
+    else if (trim(simcomp_type) .eq. "les_model") then
+       allocate(les_simcomp_t::simcomp)
+    else if (trim(simcomp_type) .eq. "field_writer") then
+       allocate(field_writer_t::simcomp)
+    else
+       call neko_log%error("Unknown simulation component type: " &
+                           // trim(simcomp_type))
+       stop
+    end if
 
-       if (trim(simcomp_type) .eq. "vorticity") then 
-          allocate(vorticity_t::simcomp)
-       end if
-       if (trim(simcomp_type) .eq. "lambda2") then 
-          allocate(lambda2_t::simcomp)
-       end if
-       if (trim(simcomp_type) .eq. "probes") then 
-          allocate(probes_t::simcomp)
-       end if
-       
-       ! Initialize
-       call simcomp%init(json, case)
+    ! Initialize
+    call simcomp%init(json, case)
 
   end subroutine simulation_component_factory
 
diff --git a/src/simulation_components/simulation_component_global.f90 b/src/simulation_components/simulation_component_global.f90
deleted file mode 100644
index c51e7370c75..00000000000
--- a/src/simulation_components/simulation_component_global.f90
+++ /dev/null
@@ -1,79 +0,0 @@
-! Copyright (c) 2023, The Neko Authors
-! All rights reserved.
-!
-! Redistribution and use in source and binary forms, with or without
-! modification, are permitted provided that the following conditions
-! are met:
-!
-!   * Redistributions of source code must retain the above copyright
-!     notice, this list of conditions and the following disclaimer.
-!
-!   * Redistributions in binary form must reproduce the above
-!     copyright notice, this list of conditions and the following
-!     disclaimer in the documentation and/or other materials provided
-!     with the distribution.
-!
-!   * Neither the name of the authors nor the names of its
-!     contributors may be used to endorse or promote products derived
-!     from this software without specific prior written permission.
-!
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-! POSSIBILITY OF SUCH DAMAGE.
-!
-!
-!> Contains the global simulation component array and its init for the given 
-!! case.
-module simulation_component_global
-  use simulation_component, only : simulation_component_wrapper_t
-  use simulation_component_fctry, only : simulation_component_factory
-  use json_module, only : json_file, json_core, json_value
-  use case, only : case_t
-  implicit none
-  private
-  
-  class(simulation_component_wrapper_t), public, allocatable :: neko_simcomps(:)
-  public :: simcomps_global_init
-
-contains
-
-  !> Populates the global `neko_simcomps` array with objects based on the json
-  !! configuration and the simulation case.
-  !! @param case Simulation case.
-  subroutine simcomps_global_init(case)
-    type(case_t), intent(inout) :: case
-    integer :: n_simcomps, i
-    type(json_core) :: core
-    type(json_value), pointer :: simcomp_object, comp_pointer 
-    type(json_file) :: comp_subdict
-    character(len=:), allocatable :: buffer
-    logical :: found
-
-    if (case%params%valid_path('case.simulation_components')) then
-
-      call case%params%info('case.simulation_components', n_children=n_simcomps)
-      allocate(neko_simcomps(n_simcomps))
-
-      call case%params%get_core(core)
-      call case%params%get('case.simulation_components', simcomp_object, found)
-      do i=1, n_simcomps
-         ! Create a new json containing just the subdict for this simcomp
-         call core%get_child(simcomp_object, i, comp_pointer, found)
-         call core%print_to_string(comp_pointer, buffer)
-         call comp_subdict%load_from_string(buffer)
-         call simulation_component_factory(neko_simcomps(i)%simcomp, &
-                                           comp_subdict, case) 
-      end do
-    end if
-  end subroutine
-
-end module simulation_component_global
diff --git a/src/simulation_components/vorticity.f90 b/src/simulation_components/vorticity.f90
index 17fef434173..7428e181f94 100644
--- a/src/simulation_components/vorticity.f90
+++ b/src/simulation_components/vorticity.f90
@@ -31,19 +31,22 @@
 ! POSSIBILITY OF SUCH DAMAGE.
 !
 !
-!> Implements the `vorticity_t` type. 
+!> Implements the `vorticity_t` type.
 
 module vorticity
-  use num_types, only : rp
+  use num_types, only : rp, dp, sp
   use json_module, only : json_file
   use simulation_component, only : simulation_component_t
   use field_registry, only : neko_field_registry
   use field, only : field_t
   use operators, only : curl
   use case, only : case_t
+  use fld_file_output, only : fld_file_output_t
+  use json_utils, only : json_get, json_get_or_default
+  use field_writer, only : field_writer_t
   implicit none
   private
-  
+
   !> A simulation component that computes the vorticity field.
   !! Added to the field registry as `omega_x`, `omega_y``, and `omega_z`.
   type, public, extends(simulation_component_t) :: vorticity_t
@@ -66,74 +69,81 @@ module vorticity
      !> Work array.
      type(field_t) :: temp2
 
-     contains
-      !> Constructor from json, wrapping the actual constructor.
-      procedure, pass(this) :: init => vorticity_init_from_json
-      !> Actual constructor.
-      procedure, pass(this) :: init_from_attributes => &
+     !> Output writer.
+     type(field_writer_t) :: writer
+
+   contains
+     !> Constructor from json, wrapping the actual constructor.
+     procedure, pass(this) :: init => vorticity_init_from_json
+     !> Actual constructor.
+     procedure, pass(this) :: init_from_attributes => &
         vorticity_init_from_attributes
-      !> Destructor.
-      procedure, pass(this) :: free => vorticity_free
-      !> Compute the vorticity field.
-      procedure, pass(this) :: compute_ => vorticity_compute
+     !> Destructor.
+     procedure, pass(this) :: free => vorticity_free
+     !> Compute the vorticity field.
+     procedure, pass(this) :: compute_ => vorticity_compute
   end type vorticity_t
-  
-  contains
-  
+
+contains
+
   !> Constructor from json.
   subroutine vorticity_init_from_json(this, json, case)
-       class(vorticity_t), intent(inout) :: this
-       type(json_file), intent(inout) :: json
-       class(case_t), intent(inout), target :: case 
-       
-       call this%init_base(json, case)
+    class(vorticity_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    class(case_t), intent(inout), target :: case
+    character(len=:), allocatable :: filename
+    character(len=:), allocatable :: precision
+    character(len=20) :: fields(3)
+
+    ! Add fields keyword to the json so that the field_writer picks it up.
+    ! Will also add fields to the registry.
+    fields(1) = "omega_x"
+    fields(2) = "omega_y"
+    fields(3) = "omega_z"
+
+    call json%add("fields", fields)
 
-       call vorticity_init_from_attributes(this)
+    call this%init_base(json, case)
+    call this%writer%init(json, case)
+
+    call vorticity_init_from_attributes(this)
   end subroutine vorticity_init_from_json
 
   !> Actual constructor.
   subroutine vorticity_init_from_attributes(this)
-       class(vorticity_t), intent(inout) :: this
-
-       this%u => neko_field_registry%get_field_by_name("u")
-       this%v => neko_field_registry%get_field_by_name("v")
-       this%w => neko_field_registry%get_field_by_name("w")
-
-       if (.not. neko_field_registry%field_exists("omega_x")) then
-          call neko_field_registry%add_field(this%u%dof, "omega_x")
-       end if
-       if (.not. neko_field_registry%field_exists("omega_y")) then
-          call neko_field_registry%add_field(this%u%dof, "omega_y")
-       end if
-       if (.not. neko_field_registry%field_exists("omega_z")) then
-          call neko_field_registry%add_field(this%u%dof, "omega_z")
-       end if
-       this%omega_x => neko_field_registry%get_field_by_name("omega_x")
-       this%omega_y => neko_field_registry%get_field_by_name("omega_y")
-       this%omega_z => neko_field_registry%get_field_by_name("omega_z")
-
-       call this%temp1%init(this%u%dof)
-       call this%temp2%init(this%u%dof)
+    class(vorticity_t), intent(inout) :: this
+
+    this%u => neko_field_registry%get_field_by_name("u")
+    this%v => neko_field_registry%get_field_by_name("v")
+    this%w => neko_field_registry%get_field_by_name("w")
+
+    this%omega_x => neko_field_registry%get_field_by_name("omega_x")
+    this%omega_y => neko_field_registry%get_field_by_name("omega_y")
+    this%omega_z => neko_field_registry%get_field_by_name("omega_z")
+
+    call this%temp1%init(this%u%dof)
+    call this%temp2%init(this%u%dof)
+
   end subroutine vorticity_init_from_attributes
 
   !> Destructor.
   subroutine vorticity_free(this)
-       class(vorticity_t), intent(inout) :: this
-       call this%free_base()
-       call this%temp1%free()
-       call this%temp2%free()
+    class(vorticity_t), intent(inout) :: this
+    call this%free_base()
+    call this%temp1%free()
+    call this%temp2%free()
   end subroutine vorticity_free
 
   !> Compute the vorticity field.
   !! @param t The time value.
   !! @param tstep The current time-step
   subroutine vorticity_compute(this, t, tstep)
-       class(vorticity_t), intent(inout) :: this
-       real(kind=rp), intent(in) :: t
-       integer, intent(in) :: tstep
+    class(vorticity_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
 
-       call curl(this%omega_x, this%omega_y, this%omega_z, this%u, this%v, &
+    call curl(this%omega_x, this%omega_y, this%omega_z, this%u, this%v, &
                  this%w, this%temp1, this%temp2, this%case%fluid%c_Xh)
   end subroutine vorticity_compute
-  
+
 end module vorticity
diff --git a/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90
new file mode 100644
index 00000000000..9d1174cdb3d
--- /dev/null
+++ b/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90
@@ -0,0 +1,69 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the cpu kernel for the  `boussinesq_source_term_t` type.
+module boussinesq_source_term_cpu
+  use num_types, only : rp
+  use field_list, only : field_list_t
+  use field, only : field_t
+  use math, only : add2s2, cadd
+  implicit none
+  private
+
+  public :: boussinesq_source_term_compute_cpu
+
+contains
+
+  !> Computes the Boussinesq source term on the cpu.
+  !! @param fields The right-hand side.
+  !! @param s The scalar field
+  !! @param ref_value The reference value of the scalar field.
+  !! @param g The gravity vector.
+  !! @param beta The thermal expansion coefficient.
+  subroutine boussinesq_source_term_compute_cpu(fields, s, ref_value, g, beta)
+    type(field_list_t), intent(inout) :: fields
+    type(field_t), intent(inout) :: s
+    real(kind=rp), intent(in) :: ref_value
+    real(kind=rp), intent(in) :: g(3)
+    real(kind=rp), intent(in) :: beta
+    integer :: n_fields, i, n
+
+    n_fields = size(fields%fields)
+    n = fields%fields(1)%f%dof%size()
+
+    do i=1, n_fields
+       call add2s2(fields%fields(i)%f%x, s%x, g(i)*beta, n)
+       call cadd(fields%fields(i)%f%x, -g(i)*beta*ref_value, n)
+    end do
+  end subroutine boussinesq_source_term_compute_cpu
+
+end module boussinesq_source_term_cpu
diff --git a/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90
new file mode 100644
index 00000000000..8bbf961c6d7
--- /dev/null
+++ b/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90
@@ -0,0 +1,68 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the cpu kernel for the `brinkman_source_term_t` type.
+module brinkman_source_term_cpu
+  use num_types, only: rp
+  use field, only: field_t
+  use field_list, only: field_list_t
+  use math, only: subcol3
+  use field_registry, only: neko_field_registry
+  implicit none
+  private
+
+  public :: brinkman_source_term_compute_cpu
+
+contains
+
+  !> Computes the Brinkman source term on the cpu.
+  !! @param fields The right-hand side.
+  !! @param values The values of the source components.
+  subroutine brinkman_source_term_compute_cpu(fields, brinkman)
+    type(field_list_t), intent(inout) :: fields
+    type(field_t), intent(in) :: brinkman
+    type(field_t), pointer :: u, v, w
+    integer :: n
+
+    n = fields%fields(1)%f%dof%size()
+
+    u => neko_field_registry%get_field('u')
+    v => neko_field_registry%get_field('v')
+    w => neko_field_registry%get_field('w')
+
+    call subcol3(fields%fields(1)%f%x, u%x, brinkman%x, n)
+    call subcol3(fields%fields(2)%f%x, v%x, brinkman%x, n)
+    call subcol3(fields%fields(3)%f%x, w%x, brinkman%x, n)
+
+  end subroutine brinkman_source_term_compute_cpu
+
+end module brinkman_source_term_cpu
diff --git a/src/source_terms/bcknd/cpu/const_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/const_source_term_cpu.f90
index acd024d8d19..4510346d3c8 100644
--- a/src/source_terms/bcknd/cpu/const_source_term_cpu.f90
+++ b/src/source_terms/bcknd/cpu/const_source_term_cpu.f90
@@ -42,10 +42,10 @@ module const_source_term_cpu
 
 contains
 
-  !> Computs the constant source term on the cpu.
+  !> Computes the constant source term on the cpu.
   !! @param fields The right-hand side.
   !! @param values The values of the source components.
-  subroutine const_source_term_compute_cpu(fields, values) 
+  subroutine const_source_term_compute_cpu(fields, values)
     type(field_list_t), intent(inout) :: fields
     real(kind=rp), intent(in) :: values(:)
     integer :: n_fields, i, n
@@ -54,8 +54,8 @@ subroutine const_source_term_compute_cpu(fields, values)
     n = fields%fields(1)%f%dof%size()
 
     do i=1, n_fields
-        call cadd(fields%fields(i)%f%x, values(i), n)
+       call cadd(fields%fields(i)%f%x, values(i), n)
     end do
   end subroutine const_source_term_compute_cpu
-  
+
 end module const_source_term_cpu
diff --git a/src/source_terms/bcknd/cpu/filters_cpu.f90 b/src/source_terms/bcknd/cpu/filters_cpu.f90
new file mode 100644
index 00000000000..3b2dc2a4bab
--- /dev/null
+++ b/src/source_terms/bcknd/cpu/filters_cpu.f90
@@ -0,0 +1,84 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> CPU implementations of the filter functions.
+module filters_cpu
+  use num_types, only: rp
+  implicit none
+
+contains
+
+  ! ========================================================================== !
+  ! Internal functions and subroutines
+  ! ========================================================================== !
+
+  !> @brief Apply a smooth step function to a scalar.
+  elemental function smooth_step_cpu(x, edge0, edge1) result(res)
+    real(kind=rp), intent(in) :: x
+    real(kind=rp), intent(in) :: edge0, edge1
+    real(kind=rp) :: res, t
+
+    t = clamp_cpu((x - edge0) / (edge1 - edge0), 0.0_rp, 1.0_rp)
+
+    res = t**3 * (t * (6.0_rp * t - 15.0_rp) + 10.0_rp)
+
+  end function smooth_step_cpu
+
+  !> @brief Clamp a value between two limits.
+  elemental function clamp_cpu(x, lowerlimit, upperlimit) result(res)
+    real(kind=rp), intent(in) :: x
+    real(kind=rp), intent(in) :: lowerlimit, upperlimit
+    real(kind=rp) :: res
+
+    res = max(lowerlimit, min(upperlimit, x))
+  end function clamp_cpu
+
+  !> @brief Apply a step function to a scalar.
+  elemental function step_function_cpu(x, x_step, value0, value1) result(res)
+    real(kind=rp), intent(in) :: x, x_step, value0, value1
+    real(kind=rp) :: res
+
+    res = merge(value0, value1, x > x_step)
+
+  end function step_function_cpu
+
+  !> @brief Apply a permeability function to a scalar.
+  elemental function permeability_cpu(x, k_0, k_1, q) result(perm)
+    real(kind=rp), intent(in) :: x, k_0, k_1, q
+    real(kind=rp) :: perm
+
+    perm = k_0 + (k_1 - k_0) * x * (q + 1.0_rp) / (q + x)
+
+  end function permeability_cpu
+
+
+end module filters_cpu
diff --git a/src/source_terms/bcknd/device/boussinesq_source_term_device.f90 b/src/source_terms/bcknd/device/boussinesq_source_term_device.f90
new file mode 100644
index 00000000000..482d566dac4
--- /dev/null
+++ b/src/source_terms/bcknd/device/boussinesq_source_term_device.f90
@@ -0,0 +1,70 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the device kernel for the `boussinesq_source_term_t` type.
+module boussinesq_source_term_device
+  use num_types, only : rp
+  use field_list, only : field_list_t
+  use field, only : field_t
+  use device_math, only : device_add2s2, device_cadd
+  implicit none
+  private
+
+  public :: boussinesq_source_term_compute_device
+
+contains
+
+  !> Computes the Boussinesq source term on the device.
+  !! @param fields The right-hand side.
+  !! @param s The scalar field
+  !! @param ref_value The reference value of the scalar field.
+  !! @param g The gravity vector.
+  !! @param beta The thermal expansion coefficient.
+  subroutine boussinesq_source_term_compute_device(fields, s, ref_value, g,&
+                                                   beta)
+    type(field_list_t), intent(inout) :: fields
+    type(field_t), intent(inout) :: s
+    real(kind=rp), intent(in) :: ref_value
+    real(kind=rp), intent(in) :: g(3)
+    real(kind=rp), intent(in) :: beta
+    integer :: n_fields, i, n
+
+    n_fields = size(fields%fields)
+    n = fields%fields(1)%f%dof%size()
+
+    do i=1, n_fields
+       call device_add2s2(fields%fields(i)%f%x_d, s%x_d, g(i)*beta, n)
+       call device_cadd(fields%fields(i)%f%x_d, -g(i)*beta*ref_value, n)
+    end do
+  end subroutine boussinesq_source_term_compute_device
+
+end module boussinesq_source_term_device
diff --git a/src/source_terms/bcknd/device/brinkman_source_term_device.f90 b/src/source_terms/bcknd/device/brinkman_source_term_device.f90
new file mode 100644
index 00000000000..454decceba9
--- /dev/null
+++ b/src/source_terms/bcknd/device/brinkman_source_term_device.f90
@@ -0,0 +1,68 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the device kernel for the `brinkman_source_term_t` type.
+module brinkman_source_term_device
+  use num_types, only : rp
+  use field, only : field_t
+  use field_list, only : field_list_t
+  use device_math, only : device_subcol3
+  use field_registry, only : neko_field_registry
+  implicit none
+  private
+
+  public :: brinkman_source_term_compute_device
+
+contains
+
+  !> Computes the Brinkman source term on the device.
+  !! @param fields The right-hand side.
+  !! @param values The values of the source components.
+  subroutine brinkman_source_term_compute_device(fields, brinkman)
+    type(field_list_t), intent(inout) :: fields
+    type(field_t), intent(in) :: brinkman
+    integer :: n
+    type(field_t), pointer :: u, v, w
+
+    n = fields%fields(1)%f%dof%size()
+
+    u => neko_field_registry%get_field('u')
+    v => neko_field_registry%get_field('v')
+    w => neko_field_registry%get_field('w')
+
+    call device_subcol3(fields%fields(1)%f%x_d, u%x_d, brinkman%x_d, n)
+    call device_subcol3(fields%fields(2)%f%x_d, v%x_d, brinkman%x_d, n)
+    call device_subcol3(fields%fields(3)%f%x_d, w%x_d, brinkman%x_d, n)
+
+  end subroutine brinkman_source_term_compute_device
+
+end module brinkman_source_term_device
diff --git a/src/source_terms/bcknd/device/const_source_term_device.f90 b/src/source_terms/bcknd/device/const_source_term_device.f90
index e01da4b2e7a..16bcfc217fe 100644
--- a/src/source_terms/bcknd/device/const_source_term_device.f90
+++ b/src/source_terms/bcknd/device/const_source_term_device.f90
@@ -42,10 +42,10 @@ module const_source_term_device
 
 contains
 
-  !> Computs the constant source term on the device.
+  !> Computes the constant source term on the device.
   !! @param fields The right-hand side.
   !! @param values The values of the source components.
-  subroutine const_source_term_compute_device(fields, values) 
+  subroutine const_source_term_compute_device(fields, values)
     type(field_list_t), intent(inout) :: fields
     real(kind=rp), intent(in) :: values(:)
     integer :: n_fields, i, n
@@ -57,5 +57,5 @@ subroutine const_source_term_compute_device(fields, values)
        call device_cadd(fields%fields(i)%f%x_d, values(i), n)
     end do
   end subroutine const_source_term_compute_device
-  
+
 end module const_source_term_device
diff --git a/src/source_terms/boussinesq_source_term.f90 b/src/source_terms/boussinesq_source_term.f90
new file mode 100644
index 00000000000..8e242b33e53
--- /dev/null
+++ b/src/source_terms/boussinesq_source_term.f90
@@ -0,0 +1,180 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the `boussinesq_source_term_t` type.
+module boussinesq_source_term
+  use num_types, only : rp
+  use field_list, only : field_list_t
+  use field, only : field_t
+  use json_module, only : json_file
+  use json_utils, only: json_get, json_get_or_default
+  use source_term, only : source_term_t
+  use coefs, only : coef_t
+  use neko_config, only : NEKO_BCKND_DEVICE
+  use utils, only : neko_error
+  use boussinesq_source_term_cpu, only : boussinesq_source_term_compute_cpu
+  use boussinesq_source_term_device, only : boussinesq_source_term_compute_device
+  use field_registry, only : neko_field_registry
+  implicit none
+  private
+
+  !> Bouyancy source term accroding to the Boussinesq approximation.
+  !! @details Called "boussinesq" in the JSON.
+  !! Controlled by the following parameters:
+  !! - "scalar_field": The name of the scalar that drives the source term,
+  !!   defaults to "s".
+  !! - "ref_value": The reference value of the scalar.
+  !! - "g": The gravity vector.
+  !! - "beta": The the thermal expansion coefficeint, defaults to `1/ref_value`.
+  type, public, extends(source_term_t) :: boussinesq_source_term_t
+     !> The scalar field used to drive the source term, typically temperature.
+     type(field_t), pointer :: s => null()
+     !> The reference value of the scalar field.
+     real(kind=rp) :: ref_value = 0
+     !> Gravity vector
+     real(kind=rp) :: g(3)
+     !> Thermal expantion coefficient
+     real(kind=rp) :: beta
+   contains
+     !> The common constructor using a JSON object.
+     procedure, pass(this) :: init => boussinesq_source_term_init_from_json
+     !> The constructor from type components.
+     procedure, pass(this) :: init_from_compenents => &
+       boussinesq_source_term_init_from_components
+     !> Destructor.
+     procedure, pass(this) :: free => boussinesq_source_term_free
+     !> Computes the source term and adds the result to `fields`.
+     procedure, pass(this) :: compute_ => boussinesq_source_term_compute
+  end type boussinesq_source_term_t
+
+contains
+  !> The common constructor using a JSON object.
+  !! @param json The JSON object for the source.
+  !! @param fields A list of fields for adding the source values.
+  !! @param coef The SEM coeffs.
+  subroutine boussinesq_source_term_init_from_json(this, json, fields, coef)
+    class(boussinesq_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    type(field_list_t), intent(inout), target :: fields
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp), allocatable :: values(:)
+    real(kind=rp) :: start_time, end_time, ref_value
+    character(len=:), allocatable :: scalar_name
+    real(kind=rp), allocatable :: g(:)
+    real(kind=rp) :: beta
+
+    if (.not. size(fields%fields) == 3) then
+       call neko_error("Boussinesq term expects 3 fields to work on.")
+    end if
+
+    call json_get_or_default(json, "scalar_field", start_time, 0.0_rp)
+    call json_get_or_default(json, "end_time", end_time, huge(0.0_rp))
+
+    call json_get_or_default(json, "scalar_field", scalar_name, "s")
+    call json_get(json, "g", g)
+
+    if (.not. size(g) == 3) then
+       call neko_error("The gravity vector should have 3 components")
+    end if
+
+    call json_get(json, "reference_value", ref_value)
+    call json_get_or_default(json, "beta", beta, 1.0_rp/ref_value)
+
+    call boussinesq_source_term_init_from_components(this, fields, scalar_name,&
+       ref_value, g, beta, coef, start_time, end_time)
+
+  end subroutine boussinesq_source_term_init_from_json
+
+  !> The constructor from type components.
+  !! @param fields A list of fields for adding the source values.
+  !! @param scalar_name The name of the scalar field driving the source.
+  !! @param ref_value The reference value of the scalar field.
+  !! @param g The gravity vector.
+  !! @param beta The thermal expansion coefficient.
+  !! @param coef The SEM coeffs.
+  !! @param start_time When to start adding the source term.
+  !! @param end_time When to stop adding the source term.
+  subroutine boussinesq_source_term_init_from_components(this, fields, &
+    scalar_name, ref_value, g, beta, coef, start_time, end_time)
+    class(boussinesq_source_term_t), intent(inout) :: this
+    class(field_list_t), intent(inout), target :: fields
+    character(len=*), intent(in) :: scalar_name
+    real(kind=rp), intent(in) :: ref_value
+    real(kind=rp), intent(in) :: g(3)
+    real(kind=rp), intent(in) :: beta
+    type(coef_t) :: coef
+    real(kind=rp), intent(in) :: start_time
+    real(kind=rp), intent(in) :: end_time
+
+    call this%free()
+    call this%init_base(fields, coef, start_time, end_time)
+
+    if (.not. neko_field_registry%field_exists(scalar_name)) then
+       call neko_field_registry%add_field(this%fields%fields(1)%f%dof, "s")
+    end if
+    this%s => neko_field_registry%get_field("s")
+
+    this%ref_value = ref_value
+    this%g = g
+    this%beta = beta
+  end subroutine boussinesq_source_term_init_from_components
+
+  !> Destructor.
+  subroutine boussinesq_source_term_free(this)
+    class(boussinesq_source_term_t), intent(inout) :: this
+
+    call this%free_base()
+    nullify(this%s)
+  end subroutine boussinesq_source_term_free
+
+  !> Computes the source term and adds the result to `fields`.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  subroutine boussinesq_source_term_compute(this, t, tstep)
+    class(boussinesq_source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+    integer :: n_fields, i, n
+
+    n_fields = size(this%fields%fields)
+    n = this%fields%fields(1)%f%dof%size()
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call boussinesq_source_term_compute_device(this%fields, this%s,&
+         this%ref_value, this%g, this%beta)
+    else
+       call boussinesq_source_term_compute_cpu(this%fields, this%s,&
+         this%ref_value, this%g, this%beta)
+    end if
+  end subroutine boussinesq_source_term_compute
+
+end module boussinesq_source_term
diff --git a/src/source_terms/brinkman/filters.f90 b/src/source_terms/brinkman/filters.f90
new file mode 100644
index 00000000000..879f89313ef
--- /dev/null
+++ b/src/source_terms/brinkman/filters.f90
@@ -0,0 +1,102 @@
+! Copyright (c) 2024, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> A module containing filter functions and subroutines. These functions
+!! are used to modify fields in a way that is useful for various
+!! simulations.
+module filters
+  use field, only: field_t
+  use num_types, only: rp
+  implicit none
+
+  private
+  public :: smooth_step_field, permeability_field, step_function_field
+
+contains
+
+  !> @brief Apply a smooth step function to a field.
+  !! @details The smooth step function is defined as:
+  !! \f[
+  !! t = (x - edge0) / (edge1 - edge0)
+  !!  f(t) = \begin{cases}
+  !!            t^3 (t (6x - 15) + 10), & t \in [0, 1] \\
+  !!              0, & t \leq 0 \\
+  !!              1, & t \geq 1 \\
+  !!          \end{cases}
+  !! \f]
+  !! @note The step can be inverted by swapping edge0 and edge1.
+  !!
+  !! @param[in,out] F Field to be modified.
+  !! @param[in] edge0 Edge giving output 0.
+  !! @param[in] edge1 Edge giving output 1.
+  subroutine smooth_step_field(F, edge0, edge1)
+    use filters_cpu, only: smooth_step_cpu
+
+    type(field_t), intent(inout) :: F
+    real(kind=rp), intent(in) :: edge0, edge1
+
+    F%x = smooth_step_cpu(F%x, edge0, edge1)
+  end subroutine smooth_step_field
+
+  !> @brief Apply a permeability function to a field.
+  !! @details The permeability function is defined as:
+  !! \f[ k(x) = k_0 + (k_1 - k_0) x \frac{q + 1}{q + x}} \f]
+  !! @param[in,out] F Field to be modified.
+  !! @param[in] k_0 Permeability at x=0.
+  !! @param[in] k_1 Permeability at x=1.
+  !! @param[in] q Penalty factor.
+  subroutine permeability_field(F_out, x, k_0, k_1, q)
+    use filters_cpu, only: permeability_cpu
+
+    type(field_t), intent(inout) :: F_out
+    type(field_t), intent(in) :: x
+    real(kind=rp), intent(in) :: k_0, k_1
+    real(kind=rp), intent(in) :: q
+
+    F_out%x = permeability_cpu(x%x, k_0, k_1, q)
+  end subroutine permeability_field
+
+  !> @brief Apply a step function to a field.
+  !! @param[in,out] F Field to be modified.
+  !! @param[in] x0 Position of the step.
+  !! @param[in] value0 Value of the field before the step.
+  !! @param[in] value1 Value of the field after the step.
+  subroutine step_function_field(F, x0, value0, value1)
+    use filters_cpu, only: step_function_cpu
+
+    type(field_t), intent(inout) :: F
+    real(kind=rp), intent(in) :: x0, value0, value1
+
+    F%x = step_function_cpu(F%x, x0, value0, value1)
+  end subroutine step_function_field
+
+end module filters
diff --git a/src/source_terms/brinkman_source_term.f90 b/src/source_terms/brinkman_source_term.f90
new file mode 100644
index 00000000000..6f24167f443
--- /dev/null
+++ b/src/source_terms/brinkman_source_term.f90
@@ -0,0 +1,408 @@
+! Copyright (c) 2023, The Neko Authors
+! All rights reserved.
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+!
+!   * Redistributions of source code must retain the above copyright
+!     notice, this list of conditions and the following disclaimer.
+!
+!   * Redistributions in binary form must reproduce the above
+!     copyright notice, this list of conditions and the following
+!     disclaimer in the documentation and/or other materials provided
+!     with the distribution.
+!
+!   * Neither the name of the authors nor the names of its
+!     contributors may be used to endorse or promote products derived
+!     from this software without specific prior written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!> Implements the `brinkman_source_term_t` type.
+module brinkman_source_term
+  use num_types, only: rp, dp
+  use field, only: field_t
+  use field_list, only: field_list_t
+  use json_module, only: json_file
+  use json_utils, only: json_get, json_get_or_default, json_extract_item
+  use field_registry, only: neko_field_registry
+  use source_term, only: source_term_t
+  use coefs, only: coef_t
+  use neko_config, only: NEKO_BCKND_DEVICE
+  use utils, only: neko_error
+  use brinkman_source_term_cpu, only: brinkman_source_term_compute_cpu
+  use brinkman_source_term_device, only: brinkman_source_term_compute_device
+  implicit none
+  private
+
+  !> A Brinkman source term.
+  !! The region and strength are controlled by assigning regions types and
+  !! brinkman limits to the source term.
+  type, public, extends(source_term_t) :: brinkman_source_term_t
+     private
+
+     !> The value of the source term.
+     type(field_t), pointer :: indicator => null()
+     !> Brinkman permeability field.
+     type(field_t), pointer :: brinkman => null()
+   contains
+     !> The common constructor using a JSON object.
+     procedure, public, pass(this) :: init => brinkman_source_term_init_from_json
+     !> Destructor.
+     procedure, public, pass(this) :: free => brinkman_source_term_free
+     !> Computes the source term and adds the result to `fields`.
+     procedure, public, pass(this) :: compute_ => brinkman_source_term_compute
+
+     ! ----------------------------------------------------------------------- !
+     ! Private methods
+     procedure, pass(this) :: init_boundary_mesh
+     procedure, pass(this) :: init_point_zone
+  end type brinkman_source_term_t
+
+contains
+
+  ! ========================================================================== !
+  ! Public methods
+
+  !> The common constructor using a JSON object.
+  !! @param json The JSON object for the source.
+  !! @param fields A list of fields for adding the source values.
+  !! @param coef The SEM coeffs.
+  subroutine brinkman_source_term_init_from_json(this, json, fields, coef)
+    use file, only: file_t
+    use tri_mesh, only: tri_mesh_t
+    use device, only: device_memcpy, HOST_TO_DEVICE
+    use filters, only: smooth_step_field, step_function_field, permeability_field
+    use signed_distance, only: signed_distance_field
+    use profiler, only: profiler_start_region, profiler_end_region
+    use json_module, only: json_core, json_value
+    implicit none
+
+    class(brinkman_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+    type(field_list_t), intent(inout), target :: fields
+    type(coef_t), intent(inout) :: coef
+    real(kind=rp) :: start_time, end_time
+
+    character(len=:), allocatable :: filter_type
+    real(kind=rp), dimension(:), allocatable :: brinkman_limits
+    real(kind=rp) :: brinkman_penalty
+
+    type(json_value), pointer :: json_object_list
+    type(json_core) :: core
+
+    character(len=:), allocatable :: object_type
+    type(json_file) :: object_settings
+    integer :: n_regions
+    integer :: i
+
+    ! Mandatory fields for the general source term
+    call json_get_or_default(json, "start_time", start_time, 0.0_rp)
+    call json_get_or_default(json, "end_time", end_time, huge(0.0_rp))
+
+    ! Read the options for the permeability field
+    call json_get(json, 'brinkman.limits', brinkman_limits)
+    call json_get(json, 'brinkman.penalty', brinkman_penalty)
+
+    if (size(brinkman_limits) .ne. 2) then
+       call neko_error('brinkman_limits must be a 2 element array of reals')
+    end if
+
+    call this%free()
+    call this%init_base(fields, coef, start_time, end_time)
+
+    ! ------------------------------------------------------------------------ !
+    ! Allocate the permeability and indicator field
+
+    if (neko_field_registry%field_exists('brinkman_indicator') &
+        .or. neko_field_registry%field_exists('brinkman')) then
+       call neko_error('Brinkman field already exists.')
+    end if
+
+    call neko_field_registry%add_field(coef%dof, 'brinkman_indicator')
+    this%indicator => neko_field_registry%get_field_by_name('brinkman_indicator')
+
+    call neko_field_registry%add_field(coef%dof, 'brinkman')
+    this%brinkman => neko_field_registry%get_field_by_name('brinkman')
+
+    ! ------------------------------------------------------------------------ !
+    ! Select which constructor should be called
+
+    call json%get('objects', json_object_list)
+    call json%info('objects', n_children=n_regions)
+    call json%get_core(core)
+
+    do i=1, n_regions
+       call json_extract_item(core, json_object_list, i, object_settings)
+       call json_get_or_default(object_settings, 'type', object_type, 'none')
+
+       select case (object_type)
+         case ('boundary_mesh')
+          call this%init_boundary_mesh(object_settings)
+         case ('point_zone')
+          call this%init_point_zone(object_settings)
+
+         case ('none')
+          call object_settings%print()
+          call neko_error('Brinkman source term objects require a region type')
+         case default
+          call neko_error('Brinkman source term unknown region type')
+       end select
+
+    end do
+
+    ! Run filter on the full indicator field to smooth it out.
+    call json_get_or_default(json, 'filter.type', filter_type, 'none')
+
+    select case (filter_type)
+      case ('none')
+       ! Do nothing
+      case default
+       call neko_error('Brinkman source term unknown filter type')
+    end select
+
+    ! ------------------------------------------------------------------------ !
+    ! Compute the permeability field
+
+    call permeability_field(this%brinkman, this%indicator, &
+      & brinkman_limits(1), brinkman_limits(2), brinkman_penalty)
+
+    ! Copy the permeability field to the device
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_memcpy(this%brinkman%x, this%brinkman%x_d, &
+                          this%brinkman%dof%size(), HOST_TO_DEVICE, .true.)
+    end if
+
+  end subroutine brinkman_source_term_init_from_json
+
+  !> Destructor.
+  subroutine brinkman_source_term_free(this)
+    class(brinkman_source_term_t), intent(inout) :: this
+
+    this%brinkman => null()
+    call this%free_base()
+  end subroutine brinkman_source_term_free
+
+  !> Computes the source term and adds the result to `fields`.
+  !! @param t The time value.
+  !! @param tstep The current time-step.
+  subroutine brinkman_source_term_compute(this, t, tstep)
+    use device, only: device_memcpy, HOST_TO_DEVICE
+    implicit none
+
+    class(brinkman_source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call brinkman_source_term_compute_device(this%fields, this%brinkman)
+    else
+       call brinkman_source_term_compute_cpu(this%fields, this%brinkman)
+    end if
+  end subroutine brinkman_source_term_compute
+
+  ! ========================================================================== !
+  ! Private methods
+
+  !> Initializes the source term from a boundary mesh.
+  subroutine init_boundary_mesh(this, json)
+    use file, only: file_t
+    use tri_mesh, only: tri_mesh_t
+    use device, only: device_memcpy, HOST_TO_DEVICE
+    use filters, only: smooth_step_field, step_function_field, permeability_field
+    use signed_distance, only: signed_distance_field
+    use profiler, only: profiler_start_region, profiler_end_region
+    use aabb
+    implicit none
+
+    class(brinkman_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+
+    ! Options
+    character(len=:), allocatable :: mesh_file_name
+    character(len=:), allocatable :: distance_transform
+    character(len=:), allocatable :: filter_type
+    character(len=:), allocatable :: mesh_transform
+
+    ! Read the options for the boundary mesh
+    type(file_t) :: mesh_file
+    type(tri_mesh_t) :: boundary_mesh
+    real(kind=rp) :: scalar_r
+    real(kind=dp) :: scalar_d
+
+    ! Mesh transform options variables
+    real(kind=dp), dimension(:), allocatable :: box_min, box_max
+    logical :: keep_aspect_ratio
+    real(kind=dp), dimension(3) :: scaling
+    real(kind=dp), dimension(3) :: translation
+    type(field_t) :: temp_field
+    type(aabb_t) :: mesh_box, target_box
+    integer :: idx_p
+
+    ! ------------------------------------------------------------------------ !
+    ! Read the options for the boundary mesh
+
+    call json_get(json, 'name', mesh_file_name)
+
+    ! Settings on how to filter the design field
+    call json_get(json, 'distance_transform.type', distance_transform)
+
+    ! ------------------------------------------------------------------------ !
+    ! Load the immersed boundary mesh
+
+    mesh_file = file_t(mesh_file_name)
+    call mesh_file%read(boundary_mesh)
+
+    if (boundary_mesh%nelv .eq. 0) then
+       call neko_error('No elements in the boundary mesh')
+    end if
+
+    ! ------------------------------------------------------------------------ !
+    ! Transform the mesh if specified.
+
+    call json_get_or_default(json, 'mesh_transform.type', mesh_transform, 'none')
+
+    select case (mesh_transform)
+      case ('none')
+       ! Do nothing
+      case ('bounding_box')
+       call json_get(json, 'mesh_transform.box_min', box_min)
+       call json_get(json, 'mesh_transform.box_max', box_max)
+       call json_get_or_default(json, 'mesh_transform.keep_aspect_ratio', &
+                                keep_aspect_ratio, .true.)
+
+       if (size(box_min) .ne. 3 .or. size(box_max) .ne. 3) then
+          call neko_error('Case file: mesh_transform. &
+            &box_min and box_max must be 3 element arrays of reals')
+       end if
+
+       call target_box%init(box_min, box_max)
+
+       mesh_box = get_aabb(boundary_mesh)
+
+       scaling = target_box%get_diagonal() / mesh_box%get_diagonal()
+       if (keep_aspect_ratio) then
+          scaling = minval(scaling)
+       end if
+
+       translation = - scaling * mesh_box%get_min() + target_box%get_min()
+
+       do idx_p = 1, boundary_mesh%mpts
+          boundary_mesh%points(idx_p)%x = &
+            scaling * boundary_mesh%points(idx_p)%x + translation
+       end do
+
+      case default
+       call neko_error('Unknown mesh transform')
+    end select
+
+    ! ------------------------------------------------------------------------ !
+    ! Compute the permeability field
+
+    ! Assign the signed distance field to all GLL points in the permeability
+    ! field. Initally we just run a brute force loop over all GLL points and
+    ! compute the signed distance function. This should be replaced with a
+    ! more efficient method, such as a tree search.
+
+    call temp_field%init(this%indicator%dof)
+
+    ! Select how to transform the distance field to a design field
+    select case (distance_transform)
+      case ('smooth_step')
+       call json_get(json, 'distance_transform.value', scalar_d)
+       scalar_r = real(scalar_d, kind=rp)
+
+       call signed_distance_field(temp_field, boundary_mesh, scalar_d)
+       call smooth_step_field(temp_field, scalar_r, 0.0_rp)
+
+      case ('step')
+
+       call json_get(json, 'distance_transform.value', scalar_d)
+
+       call signed_distance_field(temp_field, boundary_mesh, scalar_d)
+       call step_function_field(temp_field, scalar_r, 1.0_rp, 0.0_rp)
+
+      case default
+       call neko_error('Unknown distance transform')
+    end select
+
+    ! ------------------------------------------------------------------------ !
+    ! Run filter on the temporary indicator field to smooth it out.
+    call json_get_or_default(json, 'filter.type', filter_type, 'none')
+
+    select case (filter_type)
+      case ('none')
+       ! Do nothing
+      case default
+       call neko_error('Unknown filter type')
+    end select
+
+    ! Update the global indicator field by max operator
+    this%indicator%x = max(this%indicator%x, temp_field%x)
+
+  end subroutine init_boundary_mesh
+
+  !> Initializes the source term from a point zone.
+  subroutine init_point_zone(this, json)
+    use filters, only: smooth_step_field, step_function_field, permeability_field
+    use signed_distance, only: signed_distance_field
+    use profiler, only: profiler_start_region, profiler_end_region
+    use point_zone, only: point_zone_t
+    use device, only: device_memcpy, HOST_TO_DEVICE
+    use point_zone_registry, only: neko_point_zone_registry
+    implicit none
+
+    class(brinkman_source_term_t), intent(inout) :: this
+    type(json_file), intent(inout) :: json
+
+    ! Options
+    character(len=:), allocatable :: zone_name
+    character(len=:), allocatable :: filter_type
+
+    type(field_t) :: temp_field
+    class(point_zone_t), pointer :: my_point_zone
+    integer :: i
+
+    ! ------------------------------------------------------------------------ !
+    ! Read the options for the point zone
+
+    call json_get(json,'name', zone_name)
+    call json_get_or_default(json, 'filter.type', filter_type, 'none')
+
+    ! Compute the indicator field
+
+    call temp_field%init(this%indicator%dof)
+
+    my_point_zone => neko_point_zone_registry%get_point_zone(zone_name)
+
+    do i = 1, my_point_zone%size
+       temp_field%x(my_point_zone%mask(i), 1, 1, 1) = 1.0_rp
+    end do
+
+    ! Run filter on the temporary indicator field to smooth it out.
+
+    select case (filter_type)
+      case ('none')
+       ! Do nothing
+      case default
+       call neko_error('Unknown filter type')
+    end select
+
+    ! Update the global indicator field by max operator
+    this%indicator%x = max(this%indicator%x, temp_field%x)
+
+  end subroutine init_point_zone
+
+end module brinkman_source_term
diff --git a/src/source_terms/const_source_term.f90 b/src/source_terms/const_source_term.f90
index 9dd2b96b85e..a09f96fb9c5 100644
--- a/src/source_terms/const_source_term.f90
+++ b/src/source_terms/const_source_term.f90
@@ -35,7 +35,7 @@ module const_source_term
   use num_types, only : rp
   use field_list, only : field_list_t
   use json_module, only : json_file
-  use json_utils, only: json_get
+  use json_utils, only: json_get, json_get_or_default
   use source_term, only : source_term_t
   use coefs, only : coef_t
   use neko_config, only : NEKO_BCKND_DEVICE
@@ -55,12 +55,12 @@ module const_source_term
      !> The common constructor using a JSON object.
      procedure, pass(this) :: init => const_source_term_init_from_json
      !> The constructor from type components.
-     procedure, pass(this) :: init_from_compenents => & 
+     procedure, pass(this) :: init_from_compenents => &
        const_source_term_init_from_components
      !> Destructor.
      procedure, pass(this) :: free => const_source_term_free
      !> Computes the source term and adds the result to `fields`.
-     procedure, pass(this) :: compute => const_source_term_compute
+     procedure, pass(this) :: compute_ => const_source_term_compute
   end type const_source_term_t
 
 contains
@@ -68,15 +68,21 @@ module const_source_term
   !! @param json The JSON object for the source.
   !! @param fields A list of fields for adding the source values.
   !! @param coef The SEM coeffs.
-  subroutine const_source_term_init_from_json(this, json, fields, coef) 
+  subroutine const_source_term_init_from_json(this, json, fields, coef)
     class(const_source_term_t), intent(inout) :: this
     type(json_file), intent(inout) :: json
     type(field_list_t), intent(inout), target :: fields
     type(coef_t), intent(inout) :: coef
     real(kind=rp), allocatable :: values(:)
+    real(kind=rp) :: start_time, end_time
 
     call json_get(json, "values", values)
-    call const_source_term_init_from_components(this, fields, values, coef)
+    call json_get_or_default(json, "start_time", start_time, 0.0_rp)
+    call json_get_or_default(json, "end_time", end_time, huge(0.0_rp))
+
+
+    call const_source_term_init_from_components(this, fields, values, coef, &
+                                                start_time, end_time)
 
   end subroutine const_source_term_init_from_json
 
@@ -84,15 +90,19 @@ end subroutine const_source_term_init_from_json
   !! @param fields A list of fields for adding the source values.
   !! @param values The array of values, one for each field.
   !! @param coef The SEM coeffs.
+  !! @param start_time When to start adding the source term.
+  !! @param end_time When to stop adding the source term.
   subroutine const_source_term_init_from_components(this, fields, values, &
-                                                    coef) 
+                                                    coef, start_time, end_time)
     class(const_source_term_t), intent(inout) :: this
     class(field_list_t), intent(inout), target :: fields
     real(kind=rp), intent(in) :: values(:)
     type(coef_t) :: coef
+    real(kind=rp), intent(in) :: start_time
+    real(kind=rp), intent(in) :: end_time
 
     call this%free()
-    call this%init_base(fields, coef)
+    call this%init_base(fields, coef, start_time, end_time)
 
     if (size(values) .ne. size(fields%fields)) then
        call neko_error("Number of fields and values inconsistent.")
@@ -102,7 +112,7 @@ subroutine const_source_term_init_from_components(this, fields, values, &
   end subroutine const_source_term_init_from_components
 
   !> Destructor.
-  subroutine const_source_term_free(this) 
+  subroutine const_source_term_free(this)
     class(const_source_term_t), intent(inout) :: this
 
     call this%free_base()
@@ -111,7 +121,7 @@ end subroutine const_source_term_free
   !> Computes the source term and adds the result to `fields`.
   !! @param t The time value.
   !! @param tstep The current time-step.
-  subroutine const_source_term_compute(this, t, tstep) 
+  subroutine const_source_term_compute(this, t, tstep)
     class(const_source_term_t), intent(inout) :: this
     real(kind=rp), intent(in) :: t
     integer, intent(in) :: tstep
@@ -126,5 +136,5 @@ subroutine const_source_term_compute(this, t, tstep)
        call const_source_term_compute_cpu(this%fields, this%values)
     end if
   end subroutine const_source_term_compute
-  
+
 end module const_source_term
diff --git a/src/source_terms/source_term.f90 b/src/source_terms/source_term.f90
index 65a1c9eb74d..11f5e7e9f09 100644
--- a/src/source_terms/source_term.f90
+++ b/src/source_terms/source_term.f90
@@ -39,24 +39,30 @@ module source_term
   use json_module, only : json_file
   implicit none
   private
-  
+
   !> Base abstract type for source terms.
   type, abstract, public:: source_term_t
      !> The fields to be updated with the source term values
      type(field_list_t) :: fields
      !> Coefficients for the SEM.
      type(coef_t), pointer :: coef => null()
+     !> Start time for adding the source term.
+     real(kind=rp) :: start_time = 0.0_rp
+     !> End time for adding the source term.
+     real(kind=rp) :: end_time = huge(0.0_rp)
    contains
      !> Constructor for the source_term_t (base) type.
      procedure, pass(this) :: init_base => source_term_init_base
      !> Destructor for the source_term_t (base) type.
      procedure, pass(this) :: free_base => source_term_free_base
+     !> Executes `compute_` based on time conditions.
+     procedure, pass(this) :: compute => source_term_compute_wrapper
      !> The common constructor using a JSON object.
      procedure(source_term_init), pass(this), deferred :: init
      !> Destructor.
      procedure(source_term_free), pass(this), deferred :: free
      !> Computes the source term and adds the result to `fields`.
-     procedure(source_term_compute), pass(this), deferred :: compute
+     procedure(source_term_compute), pass(this), deferred :: compute_
   end type source_term_t
 
 
@@ -66,56 +72,62 @@ module source_term
      class(source_term_t), allocatable :: source_term
    contains
      !> Destructor.
-     procedure, pass(this) :: free => source_term_wrapper_free 
+     procedure, pass(this) :: free => source_term_wrapper_free
   end type source_term_wrapper_t
 
   abstract interface
-    !> The common constructor using a JSON object.
-    !! @param json The JSON object for the source.
-    !! @param fields A list of fields for adding the source values.
-    !! @param coef The SEM coeffs.
-     subroutine source_term_init(this, json, fields, coef)  
+     !> The common constructor using a JSON object.
+     !! @param json The JSON object for the source.
+     !! @param fields A list of fields for adding the source values.
+     !! @param coef The SEM coeffs.
+     subroutine source_term_init(this, json, fields, coef)
        import source_term_t, json_file, field_list_t, coef_t
        class(source_term_t), intent(inout) :: this
        type(json_file), intent(inout) :: json
        type(field_list_t), intent(inout), target :: fields
        type(coef_t), intent(inout) :: coef
-     end subroutine
+     end subroutine source_term_init
   end interface
 
   abstract interface
      !> Destructor.
-     subroutine source_term_free(this)  
+     subroutine source_term_free(this)
        import source_term_t
        class(source_term_t), intent(inout) :: this
-     end subroutine
+     end subroutine source_term_free
   end interface
 
   abstract interface
      !> Computes the source term and adds the result to `fields`.
      !! @param t The time value.
      !! @param tstep The current time-step.
-     subroutine source_term_compute(this, t, tstep)  
+     subroutine source_term_compute(this, t, tstep)
        import source_term_t, rp
        class(source_term_t), intent(inout) :: this
        real(kind=rp), intent(in) :: t
        integer, intent(in) :: tstep
-     end subroutine
+     end subroutine source_term_compute
   end interface
 
 contains
 
   !> Constructor for the `source_term_t` (base) type.
-  !> @param fields A list of pointers to fields to be updated by the source 
+  !> @param fields A list of pointers to fields to be updated by the source
   !! term.
   !> @param coef SEM coefs.
-  subroutine source_term_init_base(this, fields, coef) 
+  !! @param start_time When to start adding the source term.
+  !! @param end_time When to stop adding the source term.
+  subroutine source_term_init_base(this, fields, coef, start_time, end_time)
     class(source_term_t), intent(inout) :: this
     type(field_list_t) :: fields
     type(coef_t), intent(inout), target :: coef
+    real(kind=rp), intent(in) :: start_time
+    real(kind=rp), intent(in) :: end_time
     integer :: n_fields, i
 
     this%coef => coef
+    this%start_time = start_time
+    this%end_time = end_time
     n_fields = size(fields%fields)
     allocate(this%fields%fields(n_fields))
 
@@ -127,7 +139,7 @@ subroutine source_term_init_base(this, fields, coef)
   end subroutine source_term_init_base
 
   !> Destructor for the `source_term_t` (base) type.
-  subroutine source_term_free_base(this) 
+  subroutine source_term_free_base(this)
     class(source_term_t), intent(inout) :: this
 
     call this%fields%free()
@@ -135,7 +147,7 @@ subroutine source_term_free_base(this)
   end subroutine source_term_free_base
 
   !> Destructor for the `source_term_wrapper_t` type.
-  subroutine source_term_wrapper_free(this) 
+  subroutine source_term_wrapper_free(this)
     class(source_term_wrapper_t), intent(inout) :: this
     integer :: n_fields, i
 
@@ -144,5 +156,18 @@ subroutine source_term_wrapper_free(this)
        deallocate(this%source_term)
     end if
   end subroutine source_term_wrapper_free
-  
+
+  !> Executes `compute_` based on time conditions.
+  !> @param t Time value.
+  !> @param tstep Current time step.
+  subroutine source_term_compute_wrapper(this, t, tstep)
+    class(source_term_t), intent(inout) :: this
+    real(kind=rp), intent(in) :: t
+    integer, intent(in) :: tstep
+
+    if (t .ge. this%start_time .and. t .le. this%end_time) then
+       call this%compute_(t, tstep)
+    end if
+
+  end subroutine source_term_compute_wrapper
 end module source_term
diff --git a/src/source_terms/source_term_factory.f90 b/src/source_terms/source_term_fctry.f90
similarity index 71%
rename from src/source_terms/source_term_factory.f90
rename to src/source_terms/source_term_fctry.f90
index cbdb2951625..63766ad4281 100644
--- a/src/source_terms/source_term_factory.f90
+++ b/src/source_terms/source_term_fctry.f90
@@ -35,6 +35,8 @@
 module source_term_fctry
   use source_term, only : source_term_t
   use const_source_term, only : const_source_term_t
+  use boussinesq_source_term, only : boussinesq_source_term_t
+  use brinkman_source_term, only: brinkman_source_term_t
   use json_module, only : json_file
   use json_utils, only : json_get
   use field_list, only : field_list_t
@@ -42,30 +44,34 @@ module source_term_fctry
   use coefs, only : coef_t
   implicit none
   private
-  
+
   public :: source_term_factory
-  
-  contains
+
+contains
 
   !> Source term factory. Both constructs and initializes the object.
   !! @param json JSON object initializing the source term.
   subroutine source_term_factory(source_term, json, fields, coef)
-       class(source_term_t), allocatable, intent(inout) :: source_term
-       type(json_file), intent(inout) :: json
-       type(field_list_t), intent(inout) :: fields
-       type(coef_t), intent(inout) :: coef
-       character(len=:), allocatable :: source_type
-              
-       call json_get(json, "type", source_type)
+    class(source_term_t), allocatable, intent(inout) :: source_term
+    type(json_file), intent(inout) :: json
+    type(field_list_t), intent(inout) :: fields
+    type(coef_t), intent(inout) :: coef
+    character(len=:), allocatable :: source_type
+
+    call json_get(json, "type", source_type)
+
+    if (trim(source_type) .eq. "constant") then
+       allocate(const_source_term_t::source_term)
+    else if (trim(source_type) .eq. "boussinesq") then
+       allocate(boussinesq_source_term_t::source_term)
+    else if (trim(source_type) .eq. "brinkman") then
+       allocate(brinkman_source_term_t::source_term)
+    else
+       call neko_error('Unknown source term '//trim(source_type))
+    end if
 
-       if (trim(source_type) .eq. "constant") then 
-          allocate(const_source_term_t::source_term)
-       else
-           call neko_error('Unknown source term '//trim(source_type))
-       end if
-       
-       ! Initialize
-       call source_term%init(json, fields, coef)
+    ! Initialize
+    call source_term%init(json, fields, coef)
 
   end subroutine source_term_factory
 
diff --git a/src/time_schemes/ab_time_scheme.f90 b/src/time_schemes/ab_time_scheme.f90
index 661f2ca4b8b..9df980e17e3 100644
--- a/src/time_schemes/ab_time_scheme.f90
+++ b/src/time_schemes/ab_time_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Adam-Bashforth scheme for time integration.
@@ -66,20 +66,20 @@ module ab_time_scheme
   use utils, only : neko_error
   implicit none
   private
-  
+
   !> Explicit Adam-Bashforth scheme for time integration.
   !! @details
   !! For a contant time-step this corresponds to the following schemes for
   !! order 1 to 3:
   !! - Order 1: \f$  u^{n+1} = u^n \f$
   !! - Order 2: \f$  u^{n+1} = 2u^n - u^{n-1} \f$, linear extrapolation
-  type, public, extends(time_scheme_t) :: ab_time_scheme_t 
-     contains
-       !> Compute the scheme coefficients
-       procedure, nopass :: compute_coeffs => ab_time_scheme_compute_coeffs
+  type, public, extends(time_scheme_t) :: ab_time_scheme_t
+   contains
+     !> Compute the scheme coefficients
+     procedure, nopass :: compute_coeffs => ab_time_scheme_compute_coeffs
   end type ab_time_scheme_t
-  
-  contains
+
+contains
 
   !> Compute the scheme coefficients
   !! @param t Timestep values, first element is the current timestep.
@@ -92,7 +92,7 @@ subroutine ab_time_scheme_compute_coeffs(coeffs, dt, order)
     real(kind=rp) dta, dtb, dtc, dtd, dte, dts
 
     call rzero(coeffs, 4)
-    
+
     select case (order)
     case (1)
        coeffs(1) = 1.0_rp
@@ -100,19 +100,19 @@ subroutine ab_time_scheme_compute_coeffs(coeffs, dt, order)
        coeffs(2) = -0.5_rp * dt(1) / dt(2)
        coeffs(1) =  1.0_rp - coeffs(2)
     case (3)
-      dts =  dt(2) + dt(3)
-      dta =  dt(1) / dt(2)
-      dtb =  dt(2) / dt(3)
-      dtc =  dt(1) / dt(3)
-      dtd =  dts / dt(2)
-      dte =  dt(1) / dts
-         coeffs(3) =  dte*( 0.5_rp*dtb + dtc/3.0_rp )
-         coeffs(2) = -0.5_rp * dta - coeffs(3) * dtd
-         coeffs(1) =  1.0_rp - coeffs(2) - coeffs(3)  
-    case default 
-      call neko_error("The order of the AB time scheme must be 1 to 3.")
+       dts =  dt(2) + dt(3)
+       dta =  dt(1) / dt(2)
+       dtb =  dt(2) / dt(3)
+       dtc =  dt(1) / dt(3)
+       dtd =  dts / dt(2)
+       dte =  dt(1) / dts
+       coeffs(3) =  dte*( 0.5_rp*dtb + dtc/3.0_rp )
+       coeffs(2) = -0.5_rp * dta - coeffs(3) * dtd
+       coeffs(1) =  1.0_rp - coeffs(2) - coeffs(3)
+    case default
+       call neko_error("The order of the AB time scheme must be 1 to 3.")
     end select
-    
+
   end subroutine ab_time_scheme_compute_coeffs
 
-end module ab_time_scheme
\ No newline at end of file
+end module ab_time_scheme
diff --git a/src/time_schemes/bdf_time_scheme.f90 b/src/time_schemes/bdf_time_scheme.f90
index 2ed0a6161f9..3b1c7bfaef0 100644
--- a/src/time_schemes/bdf_time_scheme.f90
+++ b/src/time_schemes/bdf_time_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Backward-differencing scheme for time integration.
@@ -66,7 +66,7 @@ module bdf_time_scheme
   use utils, only : neko_error
   implicit none
   private
-  
+
   !> Implicit backward-differencing scheme for time integration.
   !! @details
   !! The explicit forumlas for the coefficients are taken from the following
@@ -76,14 +76,14 @@ module bdf_time_scheme
   !!
   !! For a contant time-step this corresponds to the following schemes for
   !! order 1 to 3:
-  !! - Order 1: \f$ \frac{1}{\Delta t} u^{n+1} - \frac{1}{\Delta t} u^{n} \f$ 
-  !! - Order 2: \f$ \frac{3}{2\Delta t} u^{n+1} - \frac{4}{2\Delta t} u^{n}  + 
-  !!                \frac{1}{2\Delta t} u^{n-1}\f$ 
-  !! - Order 3: \f$ \frac{11}{6\Delta t} u^{n+1} - \frac{18}{6\Delta t} u^{n}  + 
-  !!                \frac{9}{6\Delta t} u^{n-1} - \frac{2}{6\Delta t} u^{n-2}\f$ 
+  !! - Order 1: \f$ \frac{1}{\Delta t} u^{n+1} - \frac{1}{\Delta t} u^{n} \f$
+  !! - Order 2: \f$ \frac{3}{2\Delta t} u^{n+1} - \frac{4}{2\Delta t} u^{n}  +
+  !!                \frac{1}{2\Delta t} u^{n-1}\f$
+  !! - Order 3: \f$ \frac{11}{6\Delta t} u^{n+1} - \frac{18}{6\Delta t} u^{n}  +
+  !!                \frac{9}{6\Delta t} u^{n-1} - \frac{2}{6\Delta t} u^{n-2}\f$
   !!
   !! It is assumed that all the coefficients but the first one premultiply terms
-  !! that go to the right-hand side of the equation. 
+  !! that go to the right-hand side of the equation.
   !! Accordingly, the signs of these coefficients are reversed in the `coeffs`
   !! array. This is taken into account, for example, in the implemeation of the
   !! `rhs_maker` class.
@@ -95,18 +95,18 @@ module bdf_time_scheme
   !! the array \f$[1, 1]\f$, and **not** \f$[1/\Delta t, -1/\Delta t]\f$, as one
   !! might expect. Similar for the second and third order.
   !!
-  !! @remark 
+  !! @remark
   !! The current implementation can be easily extended to schemes of arbitrary
   !! order, by using the `fd_weights_full` subroutine to compute the
   !! coefficients. A demonstration of this is implemented in a test in
   !! `tests/ext_bdf_scheme/test_bdf.pf`
-  type, public, extends(time_scheme_t) :: bdf_time_scheme_t 
-     contains
-       !> Compute the scheme coefficients
-       procedure, nopass :: compute_coeffs => bdf_time_scheme_compute_coeffs
+  type, public, extends(time_scheme_t) :: bdf_time_scheme_t
+   contains
+     !> Compute the scheme coefficients
+     procedure, nopass :: compute_coeffs => bdf_time_scheme_compute_coeffs
   end type bdf_time_scheme_t
-  
-  contains
+
+contains
 
   !> Compute the scheme coefficients
   !! @param t Timestep values, first element is the current timestep.
@@ -118,7 +118,7 @@ subroutine bdf_time_scheme_compute_coeffs(coeffs, dt, order)
     integer, intent(in) :: order
 
     call rzero(coeffs, 4)
-    
+
     ! Note, these are true coeffs, multiplied by dt(1)
     select case (order)
     case (1)
@@ -137,10 +137,10 @@ subroutine bdf_time_scheme_compute_coeffs(coeffs, dt, order)
                    (dt(3) * (dt(2) + dt(3)) * (dt(1) + dt(2) + dt(3)))
        coeffs(1) = coeffs(2) + coeffs(3) + coeffs(4)
        coeffs = coeffs * dt(1)
-    case default 
-      call neko_error("The order of the BDF time scheme must be 1 to 3.")
+    case default
+       call neko_error("The order of the BDF time scheme must be 1 to 3.")
     end select
-    
+
   end subroutine bdf_time_scheme_compute_coeffs
 
-end module bdf_time_scheme
\ No newline at end of file
+end module bdf_time_scheme
diff --git a/src/time_schemes/ext_time_scheme.f90 b/src/time_schemes/ext_time_scheme.f90
index 1f550967d47..f7a8fbdd023 100644
--- a/src/time_schemes/ext_time_scheme.f90
+++ b/src/time_schemes/ext_time_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,40 +21,40 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Explicit extrapolation scheme for time integration.
@@ -66,7 +66,7 @@ module ext_time_scheme
   use utils, only : neko_error
   implicit none
   private
-  
+
   !> Explicit extrapolation scheme for time integration.
   !! @details
   !! Compute the value at the current time-step by evaluating a polynomial
@@ -81,16 +81,16 @@ module ext_time_scheme
   !! gives improved stability when combined with BDF2 in a advection-diffusion
   !! equation.
   !! - Order 3: \f$  u^{n+1} = 8/3u^n - 7/3u^{n-1} + 2/3u^{n-2} \f$
-  type, public, extends(time_scheme_t) :: ext_time_scheme_t 
-     contains
-       !> Compute the scheme coefficients
-       procedure, nopass :: compute_coeffs => ext_time_scheme_compute_coeffs
-       !> Compute the coefficients for the modified EXT scheme
-       procedure, nopass :: compute_modified_coeffs => &
+  type, public, extends(time_scheme_t) :: ext_time_scheme_t
+   contains
+     !> Compute the scheme coefficients
+     procedure, nopass :: compute_coeffs => ext_time_scheme_compute_coeffs
+     !> Compute the coefficients for the modified EXT scheme
+     procedure, nopass :: compute_modified_coeffs => &
          ext_time_scheme_compute_modified_coeffs
   end type ext_time_scheme_t
 
-  contains 
+contains
 
   !> Compute the scheme coefficients
   !! @param t Timestep values, first element is the current timestep.
@@ -102,7 +102,7 @@ subroutine ext_time_scheme_compute_coeffs(coeffs, dt, order)
     integer, intent(in) :: order
 
     call rzero(coeffs, 4)
-    
+
     select case (order)
     case (1)
        coeffs(1) = 1.0_rp
@@ -113,8 +113,8 @@ subroutine ext_time_scheme_compute_coeffs(coeffs, dt, order)
        coeffs(3) =  dt(1) / (dt(2) + dt(3)) * (dt(1) + dt(2)) / dt(3)
        coeffs(2) = - dt(1) / dt(2) * (1.0_rp + dt(2) / dt(3) + dt(1) / dt(3))
        coeffs(1) =  1.0_rp - coeffs(2) - coeffs(3)
-    case default 
-      call neko_error("The order of the EXT time scheme must be 1 to 3.")
+    case default
+       call neko_error("The order of the EXT time scheme must be 1 to 3.")
     end select
   end subroutine ext_time_scheme_compute_coeffs
 
@@ -125,18 +125,18 @@ subroutine ext_time_scheme_compute_modified_coeffs(coeffs, dt)
     real(kind=rp), intent(out) :: coeffs(4)
     real(kind=rp), intent(in) :: dt(10)
     real(kind=rp) dta, dtb, dtc, dtd, dte, dts
-    
+
     call rzero(coeffs, 4)
-    
-   dts =  dt(2) + dt(3)
-   dta =  dt(1) / dt(2)
-   dtb =  dt(2) / dt(3)
-   dtc =  dt(1) / dt(3)
-   dtd =  dts / dt(2)
-   dte =  dt(1) / dts
-   coeffs(3) =  2.0_rp / 3.0_rp * dtc * (1.0_rp / dtd + dte)
-   coeffs(2) = -dta - coeffs(3) * dtd
-   coeffs(1) =  1.0_rp - coeffs(2) - coeffs(3)
+
+    dts =  dt(2) + dt(3)
+    dta =  dt(1) / dt(2)
+    dtb =  dt(2) / dt(3)
+    dtc =  dt(1) / dt(3)
+    dtd =  dts / dt(2)
+    dte =  dt(1) / dts
+    coeffs(3) =  2.0_rp / 3.0_rp * dtc * (1.0_rp / dtd + dte)
+    coeffs(2) = -dta - coeffs(3) * dtd
+    coeffs(1) =  1.0_rp - coeffs(2) - coeffs(3)
 
   end subroutine ext_time_scheme_compute_modified_coeffs
-end module ext_time_scheme
\ No newline at end of file
+end module ext_time_scheme
diff --git a/src/time_schemes/time_scheme.f90 b/src/time_schemes/time_scheme.f90
index 9272caffff5..751ca18c471 100644
--- a/src/time_schemes/time_scheme.f90
+++ b/src/time_schemes/time_scheme.f90
@@ -1,4 +1,4 @@
-! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. 
+! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC.
 !
 ! The UChicago Argonne, LLC as Operator of Argonne National
 ! Laboratory holds copyright in the Software. The copyright holder
@@ -21,58 +21,57 @@
 ! may be used to endorse or promote products derived from this software
 ! without specific prior written permission.
 !
-! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
-! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 
-! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF 
-! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
-! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 
-! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
-! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
-! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
-! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF
+! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 !
 ! Additional BSD Notice
 ! ---------------------
 ! 1. This notice is required to be provided under our contract with
 ! the U.S. Department of Energy (DOE). This work was produced at
-! Argonne National Laboratory under Contract 
+! Argonne National Laboratory under Contract
 ! No. DE-AC02-06CH11357 with the DOE.
 !
-! 2. Neither the United States Government nor UCHICAGO ARGONNE, 
-! LLC nor any of their employees, makes any warranty, 
+! 2. Neither the United States Government nor UCHICAGO ARGONNE,
+! LLC nor any of their employees, makes any warranty,
 ! express or implied, or assumes any liability or responsibility for the
 ! accuracy, completeness, or usefulness of any information, apparatus,
 ! product, or process disclosed, or represents that its use would not
 ! infringe privately-owned rights.
 !
-! 3. Also, reference herein to any specific commercial products, process, 
-! or services by trade name, trademark, manufacturer or otherwise does 
-! not necessarily constitute or imply its endorsement, recommendation, 
-! or favoring by the United States Government or UCHICAGO ARGONNE LLC. 
-! The views and opinions of authors expressed 
-! herein do not necessarily state or reflect those of the United States 
-! Government or UCHICAGO ARGONNE, LLC, and shall 
+! 3. Also, reference herein to any specific commercial products, process,
+! or services by trade name, trademark, manufacturer or otherwise does
+! not necessarily constitute or imply its endorsement, recommendation,
+! or favoring by the United States Government or UCHICAGO ARGONNE LLC.
+! The views and opinions of authors expressed
+! herein do not necessarily state or reflect those of the United States
+! Government or UCHICAGO ARGONNE, LLC, and shall
 ! not be used for advertising or product endorsement purposes.
 !
 !> Base class for time integration schemes
 module time_scheme
   use neko_config
   use num_types, only : rp
-  use utils, only : neko_warning
   use, intrinsic :: iso_c_binding
   implicit none
   private
-  
+
   !> Base abstract class for time integration schemes
   type, abstract, public :: time_scheme_t
    contains
      !> Compute the coefficients
      procedure(compute_coeffs), deferred, nopass :: compute_coeffs
   end type time_scheme_t
-  
+
   abstract interface
      !> Interface for setting the scheme coefficients
      !! @param t Timestep values, first element is the current timestep.
@@ -83,7 +82,7 @@ subroutine compute_coeffs(coeffs, dt, order)
        real(kind=rp), intent(out) :: coeffs(4)
        real(kind=rp), intent(in) :: dt(10)
        integer, intent(in) :: order
-     end subroutine
+     end subroutine compute_coeffs
   end interface
-  
+
 end module time_scheme
diff --git a/src/time_schemes/time_scheme_controller.f90 b/src/time_schemes/time_scheme_controller.f90
index a90be291e23..9116f5c1b26 100644
--- a/src/time_schemes/time_scheme_controller.f90
+++ b/src/time_schemes/time_scheme_controller.f90
@@ -31,7 +31,7 @@
 ! POSSIBILITY OF SUCH DAMAGE.
 !
 !> Compound scheme for the advection and diffusion operators in a transport
-!! equation. 
+!! equation.
 module time_scheme_controller
   use neko_config
   use num_types, only : rp
@@ -45,7 +45,7 @@ module time_scheme_controller
   !> Implements the logic to compute the time coefficients for the advection and
   !! diffusion operators in a transport equation.
   !! @details
-  !! Uses the BDF scheme for the diffusion, where as the term for advection 
+  !! Uses the BDF scheme for the diffusion, where as the term for advection
   !! the scheme depends on the orders of the BDF and advection schemes.
   !! - Order 1 advection
   !!   - BDF1 for diffusion -> Adam-Bashforth scheme.
@@ -57,22 +57,22 @@ module time_scheme_controller
   !!   - BDF2 for diffusion -> Modified explcit extrapolation scheme.
   !!   - BDF3 for diffusion -> Explciit extrapolation scheme.
   !! The order of the BDF scheme in the above logic is set by the user, whereas
-  !! the advection scheme is set to forward Euler when BDF is order 1, 
+  !! the advection scheme is set to forward Euler when BDF is order 1,
   !! and otherwise to a 3rd order scheme (excluding the first 2 timesteps).
   !! This means that some of the options in the above list never get realized,
   !! particularly order 2 and 3 advection for 1st order BDF. They remain in the
   !! code so as to have the orinigal Nek5000 logic in place for possible
   !! adoption in the future.
-  !! An important detail here is the handling of the first timesteps where a 
+  !! An important detail here is the handling of the first timesteps where a
   !! high-order scheme cannot be constructed. The parameters `nadv` and `ndiff`,
   !! which are initialized to 0, hold the current order of the respective
   !! scheme.
-  !! @note the advection scheme also applies to source terms. 
+  !! @note the advection scheme also applies to source terms.
   type, public :: time_scheme_controller_t
      type(ext_time_scheme_t) :: ext
      type(ab_time_scheme_t) :: ab
      type(bdf_time_scheme_t) :: bdf
-  
+
      !> Time coefficients for the advection operator
      real(kind=rp) :: advection_coeffs(4) = 0
      !> Time coefficients for the diffusion operator
@@ -88,10 +88,10 @@ module time_scheme_controller
      !> Order of the diffusion scheme
      integer :: diffusion_time_order
      !> Device pointer for `advection_coeffs`
-     type(c_ptr) :: advection_coeffs_d = C_NULL_PTR 
+     type(c_ptr) :: advection_coeffs_d = C_NULL_PTR
      !> Device pointer for `diffusion_coeffs`
-     type(c_ptr) :: diffusion_coeffs_d = C_NULL_PTR 
-     
+     type(c_ptr) :: diffusion_coeffs_d = C_NULL_PTR
+
    contains
      !> Constructor
      procedure, pass(this) :: init => time_scheme_controller_init
@@ -102,7 +102,7 @@ module time_scheme_controller
        time_scheme_controller_set_coeffs
   end type time_scheme_controller_t
 
-  contains
+contains
 
   !> Contructor
   !! @param torder Desired order of the scheme: 1, 2, 3.
@@ -110,10 +110,10 @@ module time_scheme_controller
   subroutine time_scheme_controller_init(this, torder)
     implicit none
     class(time_scheme_controller_t) :: this
-    integer :: torder 
-  
+    integer :: torder
+
     this%diffusion_time_order = torder
-    
+
     ! Force 1st order advection when diffusion is 1st order
     if (torder .eq. 1) then
        this%advection_time_order = 1
@@ -139,7 +139,7 @@ subroutine time_scheme_controller_free(this)
   end subroutine time_scheme_controller_free
 
   !> Set the time coefficients
-  !! @details Implements all necessary logic to handle  
+  !! @details Implements all necessary logic to handle
   !! @param t Timestep values, first element is the current timestep.
   subroutine time_scheme_controller_set_coeffs(this, dt)
     implicit none
@@ -155,18 +155,18 @@ subroutine time_scheme_controller_set_coeffs(this, dt)
       adv_coeffs_d  => this%advection_coeffs_d, &
       diff_coeffs   => this%diffusion_coeffs, &
       diff_coeffs_d => this%diffusion_coeffs_d)
-   
+
       adv_coeffs_old = adv_coeffs
       diff_coeffs_old = diff_coeffs
-    
+
       ! Increment the order of the scheme if below time_order
       ndiff = ndiff + 1
       ndiff = min(ndiff, this%diffusion_time_order)
       nadv = nadv + 1
       nadv = min(nadv, this%advection_time_order)
-      
+
       call this%bdf%compute_coeffs(diff_coeffs, dt, ndiff)
-      
+
       if (nadv .eq. 1) then
          ! Forward euler
          call this%ext%compute_coeffs(adv_coeffs, dt, nadv)
@@ -190,20 +190,22 @@ subroutine time_scheme_controller_set_coeffs(this, dt)
             call this%ext%compute_coeffs(adv_coeffs, dt, nadv)
          end if
       end if
-      
+
 
       if (c_associated(adv_coeffs_d)) then
          if (maxval(abs(adv_coeffs - adv_coeffs_old)) .gt. 1e-10_rp) then
-            call device_memcpy(adv_coeffs, adv_coeffs_d, 4, HOST_TO_DEVICE)
+            call device_memcpy(adv_coeffs, adv_coeffs_d, 4, &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
       end if
 
       if (c_associated(diff_coeffs_d)) then
          if (maxval(abs(diff_coeffs - diff_coeffs_old)) .gt. 1e-10_rp) then
-            call device_memcpy(diff_coeffs, diff_coeffs_d, 4, HOST_TO_DEVICE)
+            call device_memcpy(diff_coeffs, diff_coeffs_d, 4, &
+                               HOST_TO_DEVICE, sync=.false.)
          end if
       end if
     end associate
 
   end subroutine time_scheme_controller_set_coeffs
-end module time_scheme_controller
\ No newline at end of file
+end module time_scheme_controller
diff --git a/tests/device/device.pf b/tests/device/device.pf
index 002070aa76f..4cdd145a91b 100644
--- a/tests/device/device.pf
+++ b/tests/device/device.pf
@@ -2,7 +2,7 @@
 @test
 subroutine test_device_init
   use pfunit
-  use device  
+  use device
   implicit none
   call device_init()
 end subroutine test_device_init
@@ -15,13 +15,13 @@ subroutine test_device_sync
 
   call device_init()
   call device_sync()
-  
+
 end subroutine test_device_sync
 
 @test
 subroutine test_device_alloc
-  use neko_config  
-  use pfunit  
+  use neko_config
+  use pfunit
   use device
   implicit none
   type(c_ptr) :: x_d
@@ -36,12 +36,12 @@ subroutine test_device_alloc
      @assertTrue(c_associated(x_d))
 
   end if
-  
+
 end subroutine test_device_alloc
 
 @test
 subroutine test_device_free
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
@@ -49,25 +49,25 @@ subroutine test_device_free
   integer(c_size_t) :: size_
 
   if (NEKO_BCKND_DEVICE .eq. 1) then
-         
+
      size_ = 42 * 4
      x_d = C_NULL_PTR
 
      call device_alloc(x_d, size_)
-     
+
      @assertTrue(c_associated(x_d))
 
      call device_free(x_d)
-     
+
      @assertFalse(c_associated(x_d))
 
   end if
-  
+
 end subroutine test_device_free
 
 @test
 subroutine test_device_associate
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
@@ -76,21 +76,22 @@ subroutine test_device_associate
   integer :: x(42)
 
   if (NEKO_BCKND_DEVICE .eq. 1) then
-       
+
      size_ = size(x) * 4
-     
+     x_d = C_NULL_PTR
+
      call device_alloc(x_d, size_)
      call device_associate(x, x_d)
-     
+
      @assertTrue(c_associated(x_d))
 
   end if
-  
+
 end subroutine test_device_associate
 
 @test
 subroutine test_device_associated
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
@@ -99,8 +100,9 @@ subroutine test_device_associated
   integer :: x(42)
 
   if (NEKO_BCKND_DEVICE .eq. 1) then
-     
+
      size_ = size(x) * 4
+     x_d = C_NULL_PTR
 
      call device_alloc(x_d, size_)
      call device_associate(x, x_d)
@@ -108,24 +110,26 @@ subroutine test_device_associated
      @assertTrue(device_associated(x))
 
   end if
-  
+
 end subroutine test_device_associated
 
 @test
 subroutine test_device_get_ptr
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: x_d, dev
   integer(c_size_t) :: size_
   integer :: x(42)
-  logical :: same_ptr
 
   if (NEKO_BCKND_DEVICE .eq. 1) then
 
      size_ = size(x) * 4
-  
+
+     x_d = C_NULL_PTR
+     dev = C_NULL_PTR
+
      call device_alloc(x_d, size_)
      call device_associate(x, x_d)
 
@@ -135,23 +139,24 @@ subroutine test_device_get_ptr
 
   end if
 
-  
+
 end subroutine test_device_get_ptr
 
 @test
 subroutine test_device_map
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: x_d, dev
   integer(c_size_t) :: size_
   integer :: x(42)
-  logical :: same_ptr
-  
+
   if (NEKO_BCKND_DEVICE .eq. 1) then
 
      size_ = size(x) * 4
+     x_d = C_NULL_PTR
+     dev = C_NULL_PTR
 
      call device_map(x, x_d, size(x))
 
@@ -161,23 +166,25 @@ subroutine test_device_map
 
   end if
 
-  
+
 end subroutine test_device_map
 
 @test
 subroutine test_device_memcpy
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: x_d, y_d, dev
   integer(c_size_t) :: size_
   integer :: x(42), i
-  logical :: same_ptr
 
   if (NEKO_BCKND_DEVICE .eq. 1) then
-     
+
      size_ = size(x) * 4
+     x_d = C_NULL_PTR
+     y_d = C_NULL_PTR
+     dev = C_NULL_PTR
 
      call device_map(x, x_d, size(x))
 
@@ -187,12 +194,12 @@ subroutine test_device_memcpy
         x(i) = i
      end do
 
-     call device_memcpy(x, x_d, size(X), HOST_TO_DEVICE)
+     call device_memcpy(x, x_d, size(X), HOST_TO_DEVICE, sync=.false.)
 
      x = 0
 
      call device_memcpy(x, x_d, size(x), DEVICE_TO_HOST, sync=.true.)
-     
+
      do i = 1, size(x)
         @assertEqual(x(i), i)
      end do
@@ -204,23 +211,23 @@ subroutine test_device_memcpy
      x = 0
 
      call device_memcpy(x, y_d, size(x), DEVICE_TO_HOST, sync=.true.)
-     
+
      do i = 1, size(x)
         @assertEqual(x(i), i)
      end do
-     
+
   end if
 
 end subroutine test_device_memcpy
 
 @test
 subroutine test_device_stream_create
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: stream
-  
+
   if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then
 
      stream = C_NULL_PTR
@@ -228,17 +235,17 @@ subroutine test_device_stream_create
      @assertTrue(c_associated(stream))
 
   end if
-  
+
 end subroutine test_device_stream_create
 
 @test
 subroutine test_device_stream_destroy
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: stream
-  
+
   if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then
 
      stream = C_NULL_PTR
@@ -248,17 +255,17 @@ subroutine test_device_stream_destroy
      call device_stream_destroy(stream)
 
   end if
-  
+
 end subroutine test_device_stream_destroy
 
 @test
 subroutine test_device_sync_stream
-  use neko_config  
+  use neko_config
   use pfunit
   use device
   implicit none
   type(c_ptr) :: stream1, stream2
-  
+
   if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then
 
      stream1 = C_NULL_PTR
@@ -277,5 +284,5 @@ subroutine test_device_sync_stream
      call device_stream_destroy(stream2)
 
   end if
-  
+
 end subroutine test_device_sync_stream
diff --git a/tests/device_math/device_math_parallel.pf b/tests/device_math/device_math_parallel.pf
index 3ea325b6b28..ab70603954f 100644
--- a/tests/device_math/device_math_parallel.pf
+++ b/tests/device_math/device_math_parallel.pf
@@ -44,7 +44,7 @@ contains
        a = 1.0_rp
        
        call device_map(a, a_d, size(a))
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_rzero(a_d, n)
        
@@ -58,7 +58,7 @@ contains
           a(i) = real(i, rp)
        end do
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_rzero(a_d, n)
        
@@ -83,7 +83,7 @@ contains
        a = 0.0_rp
        
        call device_map(a, a_d, size(a))
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_rone(a_d, n)
        
@@ -97,7 +97,7 @@ contains
           a(i) = real(i, rp)
        end do
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_rone(a_d, n)
        
@@ -128,14 +128,14 @@ contains
        end do
 
        call device_map(a, a_d, size(a))
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
     
        call device_map(b, b_d, size(b))
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_copy(b_d, a_d, n)
        
-       call device_memcpy(a, a_d, size(a), DEVICE_TO_HOST)
+       call device_memcpy(a, a_d, size(a), DEVICE_TO_HOST, sync=.false.)
        call device_memcpy(b, b_d, size(a), DEVICE_TO_HOST, sync=.true.)
     
        do i = 1, n
@@ -158,7 +158,7 @@ contains
        a = 1.0_rp
        
        call device_map(a, a_d, size(a))
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_cfill(a_d, c, n)
        
@@ -185,7 +185,7 @@ contains
        a = 40.0_rp
        
        call device_map(a, a_d, size(a))
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_cadd(a_d, c, n)
        
@@ -217,8 +217,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_add2(a_d, b_d, n)
        
@@ -250,8 +250,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_add2s1(a_d, b_d, c1, n)
        
@@ -283,8 +283,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_add2s2(a_d, b_d, c1, n)
        
@@ -320,9 +320,9 @@ contains
        call device_map(b, b_d, size(b))
        call device_map(c, c_d, size(c))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
        
        call device_add3s2(a_d, b_d, c_d, c1, c2, n)
        
@@ -350,7 +350,7 @@ contains
        
        call device_map(a, a_d, size(a))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        call device_invcol1(a_d, n)
        
@@ -381,8 +381,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_invcol2(a_d, b_d, n)
        
@@ -413,8 +413,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_col2(a_d, b_d, n)
        
@@ -448,9 +448,9 @@ contains
        call device_map(b, b_d, size(b))
        call device_map(c, c_d, size(c))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
        
        call device_col3(a_d, b_d, c_d, n)
        
@@ -481,8 +481,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        call device_sub2(a_d, b_d, n)
        
@@ -516,9 +516,9 @@ contains
        call device_map(b, b_d, size(b))
        call device_map(c, c_d, size(c))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
        
        call device_sub3(a_d, b_d, c_d, n)
        
@@ -552,9 +552,9 @@ contains
        call device_map(b, b_d, size(b))
        call device_map(c, c_d, size(c))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
        
        call device_addcol3(a_d, b_d, c_d, n)
        
@@ -591,10 +591,10 @@ contains
        call device_map(c, c_d, size(c))
        call device_map(d, d_d, size(d))
 
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
-       call device_memcpy(d, d_d, size(d), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(d, d_d, size(d), HOST_TO_DEVICE, sync=.false.)
        
        call device_addcol4(a_d, b_d, c_d, d_d, n)
        
@@ -632,9 +632,9 @@ contains
        call device_map(b, b_d, size(b))
        call device_map(c, c_d, size(c))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
-       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.)
        
        expected = real(this%getNumProcesses(), rp)  * real(n, rp)
        res = device_glsc3(a_d, b_d, c_d, n)
@@ -666,8 +666,8 @@ contains
        call device_map(a, a_d, size(a))
        call device_map(b, b_d, size(b))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
-       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
+       call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.)
        
        expected = real(this%getNumProcesses(), rp)  * real(n, rp)
        res = device_glsc2(a_d, b_d, n)
@@ -696,7 +696,7 @@ contains
        
        call device_map(a, a_d, size(a))
        
-       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE)
+       call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.)
        
        expected = real(this%getNumProcesses(), rp)  * real(n, rp)
        res = device_glsum(a_d, n)
diff --git a/tests/device_mathops/device_mathops.pf b/tests/device_mathops/device_mathops.pf
index 2a9e1186ceb..6753565fb0d 100644
--- a/tests/device_mathops/device_mathops.pf
+++ b/tests/device_mathops/device_mathops.pf
@@ -28,14 +28,14 @@ subroutine test_mathops_opchsign
         a2 = 2.0_rp
         a3 = 3.0_rp
         
-        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE)
+        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.)
         
         call device_opchsign(a1_d, a2_d, a3_d, gdim, n)
 
-        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST)
+        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.)
         call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)
         
         do i = 1, n
@@ -84,15 +84,15 @@ subroutine test_mathops_opcolv
         a3 = 3.0_rp
         c = 42.0_rp
 
-        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(c, c_d, n, HOST_TO_DEVICE)
+        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.)
         
         call device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n)
 
-        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST)
+        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.)
         call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)
         
         do i = 1, n
@@ -152,18 +152,18 @@ subroutine test_mathops_opcolv3c
         b3 = 3.0_rp
         c = 42.0_rp
 
-        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(c, c_d, n, HOST_TO_DEVICE)
+        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.)
         
         call device_opcolv3c(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d, d, n, gdim)
 
-        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST)
+        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.)
         call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)
 
         do i = 1, n
@@ -218,17 +218,17 @@ subroutine test_mathops_opadd2cm
         b2 = 2.0_rp
         b3 = 3.0_rp
 
-        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE)
+        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.)
         
         call device_opadd2cm(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c, n, gdim)
 
-        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST)
+        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.)
         call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)
                 
         do i = 1, n
@@ -286,19 +286,19 @@ subroutine test_mathops_opadd2col
         b3 = 3.0_rp
         c = 42.0_rp
 
-        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE)
-        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE)
-        call device_memcpy(c, c_d, n, HOST_TO_DEVICE)
+        call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.)
+        call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.)
         
         call device_opadd2col(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d, n, gdim)
 
-        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST)
-        call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)        
+        call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.)
+        call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.)
         
         do i = 1, n
            @assertEqual(a1(i), 4753.0_rp)
diff --git a/tests/field/field_parallel.pf b/tests/field/field_parallel.pf
index 319b53c4fa8..e39f4def910 100644
--- a/tests/field/field_parallel.pf
+++ b/tests/field/field_parallel.pf
@@ -73,6 +73,7 @@ contains
     
     call msh%add_element(2, p(2), p(5), p(6), p(4), &
          p(8), p(11), p(12), p(9))
+    call msh%generate_conn()
     
   end subroutine test_field_gen_msh
   
@@ -88,6 +89,8 @@ contains
 
     call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
 
+    pe_rank = this%getProcessRank()
+    pe_size = this%getNumProcesses()
     call test_field_gen_msh(msh)
     call Xh%init(GLL, lx, lx, lx)
     
diff --git a/tests/gather_scatter/gather_scatter_parallel.pf b/tests/gather_scatter/gather_scatter_parallel.pf
index 354cdd4479e..2af79cacc22 100644
--- a/tests/gather_scatter/gather_scatter_parallel.pf
+++ b/tests/gather_scatter/gather_scatter_parallel.pf
@@ -86,7 +86,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
 
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -175,7 +175,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -263,7 +263,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -351,7 +351,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -439,7 +439,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -527,7 +527,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
@@ -615,7 +615,7 @@ contains
 
     if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. &
          (NEKO_BCKND_OPENCL .eq. 1)) then
-       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE)
+       call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.)
     end if
     
     call gs_h%op(x%x, n, GS_OP_ADD)
diff --git a/tests/mean_field/mean_field_parallel.pf b/tests/mean_field/mean_field_parallel.pf
index 151d80b3d6f..b9ce60d2049 100644
--- a/tests/mean_field/mean_field_parallel.pf
+++ b/tests/mean_field/mean_field_parallel.pf
@@ -73,6 +73,7 @@ contains
     
     call msh%add_element(2, p(2), p(5), p(6), p(4), &
          p(8), p(11), p(12), p(9))
+    call msh%generate_conn()
     
   end subroutine test_mean_field_gen_msh
   
@@ -87,6 +88,8 @@ contains
     integer :: ierr
 
     call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
+    pe_rank = this%getProcessRank()
+    pe_size = this%getNumProcesses()
 
     call test_mean_field_gen_msh(msh)
     call Xh%init(GLL, lx, lx, lx)
@@ -125,7 +128,8 @@ contains
     end do
     
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), DEVICE_TO_HOST, sync=.true.)
+       call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), &
+                          DEVICE_TO_HOST, sync=.true.)
     end if
     
     @assertEqual(maxval(mf%mf%x), 42.0_rp)
@@ -139,7 +143,8 @@ contains
     end do
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), DEVICE_TO_HOST, sync=.true.)
+       call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), &
+                          DEVICE_TO_HOST, sync=.true.)
     end if
 
 
diff --git a/tests/mean_sqr_field/mean_sqr_field_parallel.pf b/tests/mean_sqr_field/mean_sqr_field_parallel.pf
index 6692e0e188f..b1427596d7c 100644
--- a/tests/mean_sqr_field/mean_sqr_field_parallel.pf
+++ b/tests/mean_sqr_field/mean_sqr_field_parallel.pf
@@ -74,6 +74,7 @@ contains
     
     call msh%add_element(2, p(2), p(5), p(6), p(4), &
          p(8), p(11), p(12), p(9))
+    call msh%generate_conn()
     
   end subroutine test_mean_sqr_field_gen_msh
   
@@ -89,6 +90,8 @@ contains
 
     call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
 
+    pe_rank = this%getProcessRank()
+    pe_size = this%getNumProcesses()
     call test_mean_sqr_field_gen_msh(msh)
     call Xh%init(GLL, lx, lx, lx)
     
@@ -126,7 +129,8 @@ contains
     end do
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), DEVICE_TO_HOST, sync=.true.)
+       call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), &
+                          DEVICE_TO_HOST, sync=.true.)
     end if
 
     @assertEqual(maxval(mf%mf%x), 1764.0_rp)
@@ -140,7 +144,8 @@ contains
     end do
 
     if (NEKO_BCKND_DEVICE .eq. 1) then
-       call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), DEVICE_TO_HOST, sync=.true.)
+       call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), &
+                          DEVICE_TO_HOST, sync=.true.)
     end if
 
     @assertEqual(maxval(mf2%mf%x), 38.5_rp)
diff --git a/tests/point_interpolation/point_interpolation_parallel.pf b/tests/point_interpolation/point_interpolation_parallel.pf
index ff9cc498099..90add943ed5 100644
--- a/tests/point_interpolation/point_interpolation_parallel.pf
+++ b/tests/point_interpolation/point_interpolation_parallel.pf
@@ -15,8 +15,32 @@ module point_interpolation_parallel
   use neko_config
   implicit none
 
+  @TestCase
+  type, extends(MPITestCase) :: interpolation_test
+   contains
+     procedure :: setUp
+     procedure :: tearDown
+  end type interpolation_test
+
 contains
 
+  subroutine setUp(this)
+    class(interpolation_test), intent(inout) :: this
+    integer :: ierr
+
+    call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_init
+    end if
+  end subroutine setUp
+
+  subroutine tearDown(this)
+    class(interpolation_test), intent(inout) :: this
+    if (NEKO_BCKND_DEVICE .eq. 1) then
+       call device_finalize
+    end if
+  end subroutine tearDown
+
   !> Creates 8 points for a cube with diagonal (1, 1) - (2, 2)
   subroutine create_points(p)
     implicit none
@@ -40,7 +64,7 @@ contains
     call p(8)%set_id(8)
   end subroutine create_points
 
-  subroutine point_interpolation_test_gen_coef(msh, coef, lx)
+  subroutine gen_coef(msh, coef, lx)
     implicit none
     type(mesh_t), intent(inout) :: msh
     type(coef_t), intent(inout) :: coef
@@ -54,32 +78,26 @@ contains
     call gs%init(dof)
     call coef%init(gs)
 
-  end subroutine point_interpolation_test_gen_coef
+  end subroutine gen_coef
 
   @test(npes=[1])
-  subroutine point_interpolation_test_init(this)
+  subroutine init(this)
     implicit none
-    class (MpiTestMethod), intent(inout) :: this
+    class (interpolation_test), intent(inout) :: this
     type(point_interpolator_t) :: interp
     type(coef_t) :: coef
     type(mesh_t) :: msh
     integer, parameter :: lx = 4
-    integer :: ierr
     type(linear_dist_t) :: dist
     type(point_t) :: p(8)
     type(dofmap_t) :: dof
     type(gs_t) :: gs
     type(space_t) :: Xh
 
-
-    call device_init
-    call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
-
     pe_rank = this%getProcessRank()
     pe_size = this%getNumProcesses()
 
-    dist = linear_dist_t(1, this%getProcessRank(), &
-         this%getNumProcesses(), NEKO_COMM)
+    dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM)
 
     call create_points(p)
 
@@ -96,35 +114,32 @@ contains
 
     @assertTrue(associated(interp%Xh))
 
-    call device_finalize
-  end subroutine point_interpolation_test_init
+  end subroutine init
 
   @test(npes=[1])
-  subroutine point_interpolation_test_interpolation(this)
+  subroutine interpolation(this)
     implicit none
-    class (MpiTestMethod), intent(inout) :: this
+    class (interpolation_test), intent(inout) :: this
     type(point_interpolator_t) :: interp
     type(coef_t) :: coef
     type(mesh_t) :: msh
     integer, parameter :: lx = 4
-    integer :: ierr
     type(point_t) :: rst(4) ! A list of  r,s,t coordinates
     type(point_t) :: xyz(4), xyz_equal(4) ! the results of the interpolation
-    real(kind=rp)  :: xyz_real(4)
+    real(kind=rp) :: xyz_real(4), xyz_target(4)
     type(linear_dist_t) :: dist
     type(point_t) :: p(8)
     type(dofmap_t) :: dof
     type(gs_t) :: gs
     type(space_t) :: Xh
 
-    integer :: e = 1 ! our element index (in this example we only use a 1-element mesh)
+    ! our element index (in this example we only use a 1-element mesh)
+    integer :: e = 1
 
-    call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
     pe_rank = this%getProcessRank()
     pe_size = this%getNumProcesses()
 
-    dist = linear_dist_t(1, this%getProcessRank(), &
-         this%getNumProcesses(), NEKO_COMM)
+    dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM)
     call create_points(p)
     call msh%init(3, dist)
     call msh%add_element(1, p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8))
@@ -135,22 +150,22 @@ contains
     call coef%init(gs)
 
     ! Initialize the r,s,t values
-    rst(1)%x = (/ 1.0_rp,  0.0_rp, -0.5_rp/)
+    rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/)
     rst(2)%x = (/ 0.4_rp, -0.3_rp, -1.0_rp/)
     rst(3)%x = (/-1.0_rp, -1.0_rp, -1.0_rp/)
-    rst(4)%x = (/ 1.0_rp,  1.0_rp,  1.0_rp/)
+    rst(4)%x = (/ 1.0_rp, 1.0_rp, 1.0_rp/)
 
-    xyz_equal(1)%x = (/2.0_rp,  1.5_rp , 1.25_rp/)
+    xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/)
     xyz_equal(2)%x = (/1.7_rp, 1.35_rp, 1.0_rp/)
     xyz_equal(3)%x = (/1.0_rp, 1.0_rp, 1.0_rp/)
-    xyz_equal(4)%x = (/ 2.0_rp,  2.0_rp,  2.0_rp/)
+    xyz_equal(4)%x = (/ 2.0_rp, 2.0_rp, 2.0_rp/)
 
     call interp%init(coef%Xh) ! initialize the interpolator object
-    
+
     ! We use the interpolation for a vector field, interpolating the coordinates
     ! themselves
     xyz = interp%interpolate(rst, coef%dof%x(:,:,:,e), &
-         coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e))
+                             coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e))
 
     @assertRelativelyEqual(xyz(1)%x, xyz_equal(1)%x, tolerance=1e-4_rp)
     @assertRelativelyEqual(xyz(2)%x, xyz_equal(2)%x, tolerance=1e-4_rp)
@@ -160,19 +175,19 @@ contains
     ! We use the interpolation for a scalar field, interpolating the coordinates
     ! themselves
     xyz_real = interp%interpolate(rst, coef%dof%x(:,:,:,e))
-    @assertRelativelyEqual(xyz_real, (/ 2.0_rp, 1.7_rp, 1.0_rp, 2.0_rp/), tolerance=1e-4_rp)
+    xyz_target = (/ 2.0_rp, 1.7_rp, 1.0_rp, 2.0_rp/)
+    @assertRelativelyEqual(xyz_real, xyz_target, tolerance=1e-4_rp)
 
-  end subroutine point_interpolation_test_interpolation
+  end subroutine interpolation
 
   @test(npes=[1])
-  subroutine point_interpolation_test_interpolation_single_point(this)
+  subroutine interpolation_single_point(this)
     implicit none
-    class (MpiTestMethod), intent(inout) :: this
+    class (interpolation_test), intent(inout) :: this
     type(point_interpolator_t) :: interp
     type(coef_t) :: coef
     type(mesh_t) :: msh
     integer, parameter :: lx = 4
-    integer :: ierr
     type(point_t) :: rst(1) ! A list of  r,s,t coordinates
     type(point_t) :: xyz(1), xyz_equal(1) ! the results of the interpolation
     real(kind=rp) :: xyz_real(1)
@@ -182,14 +197,13 @@ contains
     type(gs_t) :: gs
     type(space_t) :: Xh
 
-    integer :: e = 1 ! our element index (in this example we only use a 1-element mesh)
+    ! our element index (in this example we only use a 1-element mesh)
+    integer :: e = 1
 
-    call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr)
     pe_rank = this%getProcessRank()
     pe_size = this%getNumProcesses()
 
-    dist = linear_dist_t(1, this%getProcessRank(), &
-         this%getNumProcesses(), NEKO_COMM)
+    dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM)
     call create_points(p)
     call msh%init(3, dist)
     call msh%add_element(1, p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8))
@@ -200,16 +214,16 @@ contains
     call coef%init(gs)
 
     ! Initialize the r,s,t values
-    rst(1)%x = (/ 1.0_rp,  0.0_rp, -0.5_rp/)
+    rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/)
 
-    xyz_equal(1)%x = (/2.0_rp,  1.5_rp , 1.25_rp/)
+    xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/)
 
     call interp%init(coef%Xh) ! initialize the interpolator object
-    
+
     ! We use the interpolation for a vector field, interpolating the coordinates
     ! themselves
     xyz = interp%interpolate(rst, coef%dof%x(:,:,:,e), &
-         coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e))
+                             coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e))
 
     @assertRelativelyEqual(xyz(1)%x, xyz_equal(1)%x, tolerance=1e-4_rp)
 
@@ -218,6 +232,6 @@ contains
     xyz_real = interp%interpolate(rst, coef%dof%x(:,:,:,e))
     @assertRelativelyEqual(xyz_real, (/ 2.0_rp/), tolerance=1e-4_rp)
 
-  end subroutine point_interpolation_test_interpolation_single_point
+  end subroutine interpolation_single_point
 
-end module
+end module point_interpolation_parallel
diff --git a/tests/scratch_registry/test_scratch_registry.pf b/tests/scratch_registry/test_scratch_registry.pf
index 2f4ee27a51c..61c37aa3a55 100644
--- a/tests/scratch_registry/test_scratch_registry.pf
+++ b/tests/scratch_registry/test_scratch_registry.pf
@@ -7,6 +7,7 @@ module test_scratch_registry
   use dofmap
   use math
   use scratch_registry
+  use comm, only : NEKO_COMM, pe_rank, pe_size
   use num_types
   implicit none
 
@@ -40,10 +41,13 @@ contains
     call p(11)%set_id(11)
     p(12) = point_t(2d0, 1d0, 1d0)
     call p(12)%set_id(12)
-  
+    
     call msh%init(3, 1)
     call msh%add_element(1, p(1), p(2), p(4), p(3), &
          p(7), p(8), p(9), p(10))
+    pe_rank = 1
+    pe_size = 1
+    call msh%generate_conn()
     
   end subroutine test_scratch_registry_gen_msh
   
@@ -57,6 +61,7 @@ contains
     integer :: ierr
 
     call test_scratch_registry_gen_msh(msh)
+  
     call Xh%init(GLL, lx, lx, lx)
     dm = dofmap_t(msh, Xh)