diff --git a/.github/workflows/citation.yml b/.github/workflows/citation.yml new file mode 100644 index 00000000000..412eb89ac67 --- /dev/null +++ b/.github/workflows/citation.yml @@ -0,0 +1,20 @@ +on: +# push: +# paths: +# - CITATION.cff + workflow_dispatch: + +name: CITATION.cff +jobs: + Validate-CITATION-cff: + runs-on: ubuntu-latest + name: Validate CITATION.cff + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Validate CITATION.cff + uses: dieghernan/cff-validator@v3 diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index f2905615394..39dbd4d4ce2 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -1,14 +1,29 @@ name: develop -# Controls when the action will run. +# Controls when the action will run. on: pull_request: branches: [develop,release/*] + types: + - opened + - synchronize + - reopened + - ready_for_review workflow_dispatch: - + jobs: + is_draft: + name: Check if PR is a draft + if: github.event.pull_request.draft == true + + runs-on: ubuntu-latest + steps: + - name: Check if PR is a draft + run: echo "This PR is a draft" + linting: name: "Flint" + if: github.event.pull_request.draft != true runs-on: ubuntu-20.04 steps: - name: Setup env. @@ -16,23 +31,24 @@ jobs: sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk pip install nobvisual==0.2.0 flinter==0.4.0 - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Lint run: | flint score src/ -d 10 -r flinter_rc.yml | tee flint.txt score=$(awk '$1==0{print $3}' flint.txt) - if (( $(echo "$score < 8.13" |bc -l) )) ; then + if (( $(echo "$score < 8.06" |bc -l) )) ; then exit 1 fi - name: Archive linter report - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: flint-report path: flint.txt retention-days: 5 GNU: + if: github.event.pull_request.draft != true needs: linting runs-on: ${{matrix.os}} strategy: @@ -58,7 +74,7 @@ jobs: include: - os: ubuntu-20.04 setup-env: | - sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 cmake-curses-gui + sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 cmake-curses-gui - os: macos-13 setup-env: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && brew install openmpi && brew install automake && brew install gcc@11 && brew install gcc@12 env: @@ -74,7 +90,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache pFUnit id: cache-pfunit - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/pfunit key: pfunit-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -96,14 +112,14 @@ jobs: + error stop '*** Encountered 1 or more failures/errors during testing. ***' #endif end if - + _ACEOF git apply pfunit_error_stop.patch && mkdir b && cd b cmake -DCMAKE_INSTALL_PREFIX=${HOME}/pkg/pfunit .. make -j$(nproc) && make install && cd ../../ - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -121,36 +137,36 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build (CPU backend) if: matrix.backend == 'cpu' run: | echo $PKG_CONFIG_PATH - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --with-pfunit=${HOME}/pkg/pfunit/PFUNIT-4.4 --enable-real=${RP} make -j$(nproc) - - name: Build (CUDA backend) + - name: Build (CUDA backend) if: matrix.backend == 'cuda' run: | sudo apt-get install -y nvidia-cuda-toolkit - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --enable-real=${RP} --with-cuda=/usr make -j$(nproc) - - name: Build (HIP backend) + - name: Build (HIP backend) if: matrix.backend == 'HIP' run: | wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list sudo apt-get update && sudo apt-get install -y rocm-dev - ./regen.sh - ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.0.0 + ./regen.sh + ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.1.2 make -j$(nproc) - name: Build (OpenCL backend) if: matrix.backend == 'opencl' run: | - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --enable-real=${RP} --with-opencl make -j$(nproc) - name: Check @@ -190,7 +206,7 @@ jobs: mkdir releng tar xf neko-*.tar.gz -C releng cd releng/neko-* - ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.0.0 + ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.1.2 make -j $(nproc) - name: Dist (OpenCL backend) if: matrix.backend == 'opencl' @@ -201,8 +217,9 @@ jobs: cd releng/neko-* ./configure FC=${FC} --enable-real=${RP} --with-opencl make -j $(nproc) - + Intel: + if: github.event.pull_request.draft != true needs: linting runs-on: ${{matrix.os}} strategy: @@ -218,7 +235,8 @@ jobs: wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list && sudo apt-get update -y && sudo apt install -y --no-install-recommends intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic source /opt/intel/oneapi/setvars.sh sudo apt install -y autoconf automake autotools-dev libopenblas-dev make git m4 python3 ca-certificates cmake - export PATH=/opt/intel/oneapi/compiler/2024.0/bin:${PATH} + INTEL_PATH=$(find /opt/intel/oneapi/compiler/ -name "${FC}" -type f | xargs dirname) + export PATH=$INTEL_PATH:${PATH} printenv >> $GITHUB_ENV env: CC: icc @@ -230,7 +248,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -248,16 +266,17 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build run: | echo $PKG_CONFIG_PATH ./regen.sh - ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} + ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} make FCFLAGS="-O2 -stand f08 -warn errors `pkg-config --cflags json-fortran`" -j$(nproc) NVIDIA: + if: github.event.pull_request.draft != true needs: linting runs-on: ${{matrix.os}} strategy: @@ -269,7 +288,7 @@ jobs: precision: [dp] include: - os: ubuntu-20.04 - setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-23-7 && NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/23.7/compilers/bin:$PATH; export PATH && export PATH=$NVCOMPILERS/$NVARCH/23.7/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV + setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-24-3 && NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/24.3/compilers/bin:$PATH; export PATH && export PATH=$NVCOMPILERS/$NVARCH/24.3/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV env: CC: gcc FC: ${{ matrix.compiler }} @@ -285,7 +304,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -303,7 +322,7 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build (CPU backend) @@ -318,9 +337,32 @@ jobs: run: | git apply patches/nvhpc_bge.patch ./regen.sh - ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/ + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/ make + - name: Dist (CPU backend) + if: matrix.backend == 'cpu' + run: | + git stash + make dist + mkdir releng + tar xf neko-*.tar.gz -C releng + cd releng/neko-* + patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} + make -j $(nproc) + - name: Dist (CUDA backend) + if: matrix.backend == 'cuda' + run: | + git stash + make dist + mkdir releng + tar xf neko-*.tar.gz -C releng + cd releng/neko-* + patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/ + make -j $(nproc) ReFrame: + if: github.event.pull_request.draft != true needs: GNU runs-on: ubuntu-20.04 strategy: @@ -340,10 +382,10 @@ jobs: - name: Setup env. run: | sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 - pip install reframe-hpc + pip install reframe-hpc - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -364,7 +406,7 @@ jobs: run: | reframe --detect-host-topology - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Regression tests diff --git a/.github/workflows/docsdev.yml b/.github/workflows/docsdev.yml index f495c548c06..116bcfb4e56 100644 --- a/.github/workflows/docsdev.yml +++ b/.github/workflows/docsdev.yml @@ -3,11 +3,13 @@ name: Documentation (develop) on: schedule: # * is a special character in YAML so you have to quote this string - - cron: '55 19 * * *' + - cron: '30 03 * * *' # workflow_dispatch: - + jobs: build-dev-documentation: + # This workflow runs only on the official repository. + if: github.repository == 'ExtremeFLOW/neko' runs-on: ubuntu-latest steps: @@ -28,7 +30,7 @@ jobs: ./regen.sh ./configure FC=${FC} make html - + # Deploy the HTML documentation to GitHub Pages - name: GH Pages Deployment uses: peaceiris/actions-gh-pages@v3 diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index 502562818cd..85fb25bb182 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup env. run: | sudo apt-get update && sudo apt-get install -y git openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 doxygen fonts-freefont-ttf graphviz @@ -30,13 +30,13 @@ jobs: # Deploy the HTML documentation to GitHub Pages - name: GH Pages Deployment - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./doc/html/ allow_empty_commit: false force_orphan: false publish_branch: gh-pages - destination_dir: docs + destination_dir: docs/release keep_files: false enable_jekyll: true diff --git a/.github/workflows/formatter.yml b/.github/workflows/formatter.yml new file mode 100644 index 00000000000..5d3bcdddf05 --- /dev/null +++ b/.github/workflows/formatter.yml @@ -0,0 +1,31 @@ +name: Formatting + +# Controls when the action will run. +on: +# pull_request: +# branches: [develop,release/*,master] + workflow_dispatch: + +jobs: + formatting: + name: "Formatter (findent)" + description: "Check if the code is formatted according to the rules" + runs-on: ubuntu-20.04 + steps: + - name: Setup env. + run: | + sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk + pip install findent + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 1 + - name: Check Format + run: | + export FINDENT_FLAGS="-i2 -d3 -f3 -s3 -w3 -t3 -j3 -k- -Rr -c3" + find src/ -name "*.f90" -exec bash -c 'findent < {} > {}.tmp; mv -f {}.tmp {}' \; + if [ "$(git diff --exit-code)" != 0 ]; then + >&2 echo "Formatting rules violated" + git diff --exit-code + exit 1 + fi \ No newline at end of file diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f59adf2b45d..ce32b4973e0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,11 +1,11 @@ name: CI -# Controls when the action will run. +# Controls when the action will run. on: pull_request: branches: [master] workflow_dispatch: - + jobs: linting: name: "Flint" @@ -16,18 +16,18 @@ jobs: sudo apt-get update && sudo apt-get install -yq python3-dev python3-pip python3-tk pip install nobvisual==0.2.0 flinter==0.4.0 - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Lint run: | flint score src/ -d 10 -r flinter_rc.yml | tee flint.txt score=$(awk '$1==0{print $3}' flint.txt) - if (( $(echo "$score < 8.13" |bc -l) )) ; then + if (( $(echo "$score < 8.02" |bc -l) )) ; then exit 1 fi - name: Archive linter report - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: flint-report path: flint.txt @@ -58,7 +58,7 @@ jobs: include: - os: ubuntu-20.04 setup-env: | - sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 cmake-curses-gui + sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 cmake-curses-gui - os: macos-13 setup-env: export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 && brew install openmpi && brew install automake && brew install gcc@11 && brew install gcc@12 env: @@ -74,7 +74,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache pFUnit id: cache-pfunit - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/pfunit key: pfunit-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -96,14 +96,14 @@ jobs: + error stop '*** Encountered 1 or more failures/errors during testing. ***' #endif end if - + _ACEOF git apply pfunit_error_stop.patch && mkdir b && cd b cmake -DCMAKE_INSTALL_PREFIX=${HOME}/pkg/pfunit .. make -j$(nproc) && make install && cd ../../ - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -121,36 +121,36 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build (CPU backend) if: matrix.backend == 'cpu' run: | echo $PKG_CONFIG_PATH - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --with-pfunit=${HOME}/pkg/pfunit/PFUNIT-4.4 --enable-real=${RP} make -j$(nproc) - - name: Build (CUDA backend) + - name: Build (CUDA backend) if: matrix.backend == 'cuda' run: | sudo apt-get install -y nvidia-cuda-toolkit - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --enable-real=${RP} --with-cuda=/usr make -j$(nproc) - - name: Build (HIP backend) + - name: Build (HIP backend) if: matrix.backend == 'HIP' run: | wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add - echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/debian/ ubuntu main' | sudo tee /etc/apt/sources.list.d/rocm.list sudo apt-get update && sudo apt-get install -y rocm-dev - ./regen.sh - ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.0.0/ + ./regen.sh + ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" HIP_HIPCC_FLAGS="-O2 -fPIE" --enable-real=${RP} --with-hip=/opt/rocm-6.1.2 make -j$(nproc) - name: Build (OpenCL backend) if: matrix.backend == 'opencl' run: | - ./regen.sh + ./regen.sh ./configure FC=${FC} FCFLAGS="-O2 -pedantic -std=f2008" --enable-real=${RP} --with-opencl make -j$(nproc) - name: Check @@ -190,7 +190,7 @@ jobs: mkdir releng tar xf neko-*.tar.gz -C releng cd releng/neko-* - ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.0.0 + ./configure FC=${FC} FCFLAGS="-fPIE" --enable-real=${RP} HIP_HIPCC_FLAGS="-O2 -fPIE" --with-hip=/opt/rocm-6.1.2 make -j $(nproc) - name: Dist (OpenCL backend) if: matrix.backend == 'opencl' @@ -201,7 +201,7 @@ jobs: cd releng/neko-* ./configure FC=${FC} --enable-real=${RP} --with-opencl make -j $(nproc) - + Intel: needs: linting runs-on: ${{matrix.os}} @@ -218,7 +218,8 @@ jobs: wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list && sudo apt-get update -y && sudo apt install -y --no-install-recommends intel-oneapi-compiler-fortran intel-oneapi-mpi intel-oneapi-mpi-devel intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic source /opt/intel/oneapi/setvars.sh sudo apt install -y autoconf automake autotools-dev libopenblas-dev make git m4 python3 ca-certificates cmake - export PATH=/opt/intel/oneapi/compiler/2024.0/bin:${PATH} + INTEL_PATH=$(find /opt/intel/oneapi/compiler/ -name "${FC}" -type f | xargs dirname) + export PATH=$INTEL_PATH:${PATH} printenv >> $GITHUB_ENV env: CC: icc @@ -230,7 +231,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -248,14 +249,14 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build run: | echo $PKG_CONFIG_PATH ./regen.sh - ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} + ./configure FC=${FC} CC=${CC} MPIFC"=mpiifort -fc=${FC}" --enable-real=${RP} make FCFLAGS="-O2 -stand f08 -warn errors `pkg-config --cflags json-fortran`" -j$(nproc) NVIDIA: needs: linting @@ -269,7 +270,7 @@ jobs: precision: [dp] include: - os: ubuntu-20.04 - setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-23-7 && NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/23.7/compilers/bin:$PATH; export PATH && export PATH=$NVCOMPILERS/$NVARCH/23.7/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV + setup-env: sudo apt-get update && sudo apt-get install -y autoconf automake autotools-dev make git m4 libopenblas-dev && curl https://developer.download.nvidia.com/hpc-sdk/ubuntu/DEB-GPG-KEY-NVIDIA-HPC-SDK | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg && echo 'deb [signed-by=/usr/share/keyrings/nvidia-hpcsdk-archive-keyring.gpg] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | sudo tee /etc/apt/sources.list.d/nvhpc.list && sudo apt-get update -y && sudo apt-get install -y nvhpc-24-3 && NVARCH=`uname -s`_`uname -m`; export NVARCH && NVCOMPILERS=/opt/nvidia/hpc_sdk; export NVCOMPILERS && PATH=$NVCOMPILERS/$NVARCH/24.3/compilers/bin:$PATH; export PATH && export PATH=$NVCOMPILERS/$NVARCH/24.3/comm_libs/mpi/bin:$PATH && printenv >> $GITHUB_ENV env: CC: gcc FC: ${{ matrix.compiler }} @@ -285,7 +286,7 @@ jobs: run: ${{ matrix.setup-env }} - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -303,7 +304,7 @@ jobs: echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${HOME}/pkg/json-fortran/lib/pkgconfig/" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HOME}/pkg/json-fortran/lib/" >> $GITHUB_ENV - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Build (CPU backend) @@ -318,8 +319,30 @@ jobs: run: | git apply patches/nvhpc_bge.patch ./regen.sh - ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/23.7/cuda/ + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/ make + - name: Dist (CPU backend) + if: matrix.backend == 'cpu' + run: | + git stash + make dist + mkdir releng + tar xf neko-*.tar.gz -C releng + cd releng/neko-* + patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} + make -j $(nproc) + - name: Dist (CUDA backend) + if: matrix.backend == 'cuda' + run: | + git stash + make dist + mkdir releng + tar xf neko-*.tar.gz -C releng + cd releng/neko-* + patch -u src/common/signal.f90 -i patches/nvhpc_bge.patch + ./configure FC=${FC} FCFLAGS="-O3" --enable-real=${RP} --with-cuda=/opt/nvidia/hpc_sdk/Linux_x86_64/24.3/cuda/ + make -j $(nproc) ReFrame: needs: GNU runs-on: ubuntu-20.04 @@ -340,10 +363,10 @@ jobs: - name: Setup env. run: | sudo apt-get update && sudo apt-get install -y openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 - pip install reframe-hpc + pip install reframe-hpc - name: Cache json-fortran id: cache-json-fortran - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: ~/pkg/json-fortran key: json-fortran-${{ runner.os }}-${{ matrix.os }}-${{ matrix.compiler }} @@ -364,7 +387,7 @@ jobs: run: | reframe --detect-host-topology - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 1 - name: Regression tests @@ -378,7 +401,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup env. run: | sudo apt-get update && sudo apt-get install -y git openmpi-bin libopenmpi-dev autoconf automake autotools-dev libopenblas-dev make git m4 python3 doxygen fonts-freefont-ttf graphviz @@ -395,16 +418,16 @@ jobs: ./regen.sh ./configure FC=${FC} make html - + # Deploy the HTML documentation to GitHub Pages - name: GH Pages Deployment - uses: peaceiris/actions-gh-pages@v3 + uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./doc/html/ allow_empty_commit: false force_orphan: false publish_branch: gh-pages - destination_dir: docs + destination_dir: docs/release keep_files: false enable_jekyll: true diff --git a/.gitignore b/.gitignore index c5a86768cbf..e507c2e03b6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,13 @@ +# Ignore everything which does not have an extension or are a directory +* +!*.* +!*/ + +# Exceptions to the no-extension rule +!AUTHORS +!COPYING + +# General list of files to ignore *.mod *.o *.dirstamp @@ -12,21 +22,17 @@ config.sub depcomp Makefile.in !tests/*/*Makefile.in -genmeshbox -prepart -rea2nbin libneko.a .vscode -neko neko.pc install-sh configure +configure~ cltostring.sh device_config.h neko_config.f90 num_types.f90 comm.F90 -makeneko missing test-driver tests/*/*.log @@ -38,11 +44,21 @@ doxygen.stamp doc/html examples/*/*.nek5000 examples/*/*0.f0* -contrib/average_field_in_space -contrib/average_fields_in_time -contrib/postprocess_fluid_stats *.chkp *.lst +# Ignore temporary files +*~ +*.swp - +# Explicitly ignore binaries to avoid confusion +neko +makeneko +rea2nbin +genmeshbox +prepart +average_field_in_space +average_fields_in_time +postprocess_fluid_stats +map_to_equidistant_1d +calc_lift_from_field diff --git a/AUTHORS b/AUTHORS index 5365c1d7bd3..05e4030c6f9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,9 +1 @@ -Niclas Jansson -Martin Karp -Jacob Wahlgren -Steven W.D. Chien -Adalberto Perez -Adam Peplinski -Timofey Mukha -Philipp Schlatter - +This information has been migrated to CITATION.cff \ No newline at end of file diff --git a/CITATION.cff b/CITATION.cff index 7a9ef473ee1..56476e88553 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,18 +1,44 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: Neko +message: 'If you use Neko, please cite it as below.' +type: software authors: + - family-names: Alekseenko + given-names: Andrey + - family-names: Baconnet + given-names: Victor + - family-names: Chien + given-names: Steven + - family-names: Du + given-names: Shiyu - family-names: Jansson given-names: Niclas - family-names: Karp given-names: Martin - family-names: Mukha given-names: Timofey + - family-names: Olsen + given-names: Time Felle + - family-names: Páll + given-names: Szilárd + - family-names: Peplinski + given-names: Adam - family-names: Perez given-names: Adalberto - - family-names: Baconnet - given-names: Victor - family-names: Schlatter given-names: Philipp -cff-version: 1.2.0 -message: "If you use Neko, please cite it as below." + - family-names: Vincent + given-names: Jonathan + - family-names: Wahlgren + given-names: Jacob +identifiers: + - type: doi + value: 10.5281/zenodo.6631055 +url: 'https://www.neko.cfd' +license: BSD-3-Clause preferred-citation: authors: - family-names: Jansson @@ -25,8 +51,12 @@ preferred-citation: given-names: Stefano - family-names: Schlatter given-names: Philipp - title: "Neko: A Modern, Portable, and Scalable Framework for High-Fidelity Computational Fluid Dynamics" + title: >- + Neko: A Modern, Portable, and Scalable Framework for High-Fidelity + Computational Fluid Dynamics type: article - year: 2021 - url: https://arxiv.org/abs/2107.01243 -title: "Neko" \ No newline at end of file + journal: Computer & Fluids + volume: 275 + year: 2024 + doi: '10.1016/j.compfluid.2024.106243' + diff --git a/Makefile.am b/Makefile.am index 5af2de0821f..da4a3da9cc2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,7 +1,11 @@ pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = neko.pc -SUBDIRS = src contrib +SUBDIRS = src + +if ENABLE_CONTRIB +SUBDIRS += contrib +endif if ENABLE_PFUNIT SUBDIRS += tests diff --git a/README.md b/README.md index 89a44ba2e3b..96eec8b724b 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ # ![Neko](https://user-images.githubusercontent.com/750135/169531665-313c3471-50d1-4c44-964a-fee7312d6459.png) -![CI](https://github.com/ExtremeFLOW/neko/workflows/CI/badge.svg) ![develop](https://github.com/ExtremeFLOW/neko/workflows/develop/badge.svg) +![CI](https://github.com/ExtremeFLOW/neko/workflows/CI/badge.svg) ![develop](https://github.com/ExtremeFLOW/neko/workflows/develop/badge.svg) [![DOI](https://zenodo.org/badge/338607716.svg)](https://zenodo.org/doi/10.5281/zenodo.6631055) ## About Neko is a portable framework for high-order spectral element flow simulations. Written in modern Fortran, Neko adopts an object-oriented approach, allowing multi-tier abstractions of the solver stack and facilitating various hardware backends ranging from general-purpose processors, CUDA and HIP enabled accelerators to SX-Aurora vector processors. Neko has its roots in the spectral element code Nek5000 from UChicago/ANL, from where many of the namings, code structure and numerical methods are adopted. -Neko is currently maintained and developed at KTH Royal Institute of Technology. ## Cloning the project @@ -11,17 +10,33 @@ Neko is currently maintained and developed at KTH Royal Institute of Technology. git clone https://github.com/ExtremeFLOW/neko ``` +## Documentation +Documentation for Neko is available at https://neko.cfd and most things related to the code, cases, and different features are described there. The documentation is always improving, in large part due to our active users and if something is missing or hard to understand, don't be afraid to start a [discussion](https://github.com/ExtremeFLOW/neko/discussions) or create a [Pull request](https://github.com/ExtremeFLOW/neko/pulls). It is a great way to help us improve and also to start getting involved in the project. + ## Building the project -To build the project you will need: A Fortran compiler supporting the Fortran-08 standard, a working MPI installation, JSON-Fortran, and BLAS/lapack. Optional dependencies are gslib and ParMETIS. We use automake to build the project. These instructions should work in general, but as the project is quickly developing, things might change. +To build the project you will need: A Fortran compiler supporting the Fortran-08 standard, a working MPI installation, JSON-Fortran, and BLAS/lapack. Optional dependencies are gslib and ParMETIS. We use autotools to build the project. These instructions should work in general, but as the project is quickly developing, things might change. While we assume MPI and BLAS are installed, if JSON-Fortran is not already available it can be cloned, installed, and the correct paths set with the following commands (Skip this step if you already have an installation of JSON-Fortran). + +```bash +export JSON_INSTALL=/path/to/json-fortran_install # Where you want to install json-fortran +``` +```bash +git clone --depth 1 https://github.com/ExtremeFLOW/json-fortran/ +cmake -S json-fortran -B json-fortran/build -DCMAKE_INSTALL_PREFIX=${JSON_INSTALL} -DUSE_GNU_INSTALL_CONVENTION=ON .. +cmake --build json-fortran/build --parallel +cmake --install json-fortran/build +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JSON_INSTALL}/lib/ #On some systems lib should be replaced with lib64 +export PKG_CONFIG_PATH=${PKG_CONFIG_PATH}:${JSON_INSTALL}/lib/pkgconfig + +``` +A basic CPU version of Neko can then be installed according to the following ```bash cd neko ./regen.sh -./configure --prefix=/path/to/neko_install --with-pfunit=/path/to/pFUnit/installed/PFUNIT-VERSION +./configure --prefix=/path/to/neko_install # Where you want to install neko make install ``` - -More detailed installation instructions can be found in the documentation. +More detailed installation instructions and all the different options (such as how to install Neko for GPUs) can be found in the documentation available at https://neko.cfd. ## Running examples After the project has been built @@ -31,27 +46,20 @@ cd examples/tgv /path/to/neko_install/bin/makeneko tgv.f90 mpirun -np 4 ./neko tgv.case ``` - -## Testing the Code -Assuming you configured with pFUnit you should be able to test the code with +If there is not a .f90 (user) file in the example, the standard executable `neko` can also be used, for example: ```bash -make check +cd examples/hemi +mpirun -np 4 /path/to/neko_install/bin/neko hemi.case ``` +only uses built-in functions and does not need a compiled user file. Whether you will need a user file or not depends on what functionality you want and this is also documented in the documentation. -## Documentation -Documentation for Neko is available at https://neko.cfd/docs. - -To generate the documentation, you need to have both doxygen and dot (part of the Graphviz package) installed (they will be picked up by configure). Once installed, you should be able to generate the documentation with -```bash -make html -``` ## Publications using Neko -* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2021. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. arXiv preprint arXiv:2107.01243. * Jansson, N., 2021. *Spectral Element Simulations on the NEC SX-Aurora TSUBASA*. In proc. HPCAsia 2021. * Karp, M., Podobas, A., Kenter, T., Jansson, N., Plessl, C., Schlatter, P. and Markidis, S., 2022. *A high-fidelity flow solver for unstructured meshes on field-programmable gate arrays: Design, evaluation, and future challenges*. In proc. HPCAsia 2022. * Karp, M., Jansson, N., Podobas, A., Schlatter, P., and Markidis, S., 2022. *Reducing Communication in the Conjugate Gradient Method: A Case Study on High-Order Finite Elements*. In proc. PASC 2022. -* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2022. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. arXiv preprint arXiv:2207:07098. +* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2023. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. The International Journal of High Performance Computing Applications, 37, 5. * Jansson, N., Karp, M., Perez, A., Mukha, T., Ju, Y., Liu, J., Páll, S., Laure, E., Weinkauf, T., Schumacher, J., Schlatter, P., Markidis, S., 2023. *Exploring the Ultimate Regime of Turbulent Rayleigh–Bénard Convection Through Unprecedented Spectral-Element Simulations*. SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. +* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2024. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. Computer & Fluids, 275. ## Acknowledgments The development of Neko was supported by the European Commission Horizon 2020 project grant *EPiGRAM-HS: Exascale Programming Models for Heterogeneous Systems* (grant reference 801039), the Swedish Research Council project grant *Efficient Algorithms for Exascale Computational Fluid Dynamics* (grant reference 2019-04723) and the SeRC Exascale Simulation Software Initiative (SESSI). The Neko logo was designed by Robert Hansen Jagrelius. diff --git a/bench/gs/driver.f90 b/bench/gs/driver.f90 index 49f97140200..47a16f44c31 100644 --- a/bench/gs/driver.f90 +++ b/bench/gs/driver.f90 @@ -46,7 +46,7 @@ program gsbench allocate(u(n)) call rzero(u, n) call device_map(u, u_d, n) - call device_memcpy(u, u_d, n, HOST_TO_DEVICE) + call device_memcpy(u, u_d, n, HOST_TO_DEVICE, sync=.false.) ! warmup do i = 1, niter diff --git a/bench/tgv32/tgv.case b/bench/tgv32/tgv.case index 9e0793039ad..e448dae39b8 100644 --- a/bench/tgv32/tgv.case +++ b/bench/tgv32/tgv.case @@ -6,7 +6,7 @@ "output_boundary": false, "output_checkpoints": false, "output_at_end": false, - "end_time": 20.0, + "end_time": 10.0, "timestep": 1e-3, "numerics": { "time_order": 3, diff --git a/configure.ac b/configure.ac index 6f9896279d5..40b93e3f388 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ AC_PREREQ([2.69]) -AC_INIT([neko],[0.7.2]) +AC_INIT([neko],[0.8.0]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AM_MAINTAINER_MODE AC_CONFIG_MACRO_DIR([m4]) @@ -73,6 +73,10 @@ if test "x$ax_cv_fc_compiler_vendor" = xportland; then FCFLAGS="$FCFLAGS -Mbackslash" fi +if test "x$ax_cv_fc_compiler_vendor" = xnvhpc; then + FCFLAGS="$FCFLAGS -Mbackslash" +fi + # Store build information (date, host, FC) AC_SUBST(NEKO_BUILD_INFO, ["(build: $(date +%Y-%m-%d) on $host using $ax_cv_fc_compiler_vendor)"], []) @@ -261,6 +265,7 @@ AM_CONDITIONAL([ENABLE_ADIOS2], [test "x${have_adios2}" = xyes]) AM_CONDITIONAL([ENABLE_CUDA], [test "x${have_cuda}" = xyes]) AM_CONDITIONAL([ENABLE_HIP], [test "x${have_hip}" = xyes]) AM_CONDITIONAL([ENABLE_OPENCL], [test "x${have_opencl}" = xyes]) +AM_CONDITIONAL([ENABLE_CONTRIB], [test "x${enable_contrib}" = xyes]) # Set device dependent flags AC_SUBST(CUDA_ARCH) @@ -327,8 +332,10 @@ if test "x${enable_contrib}" = xyes; then contrib/rea2nbin/Makefile\ contrib/genmeshbox/Makefile\ contrib/average_fields_in_time/Makefile\ + contrib/calc_lift_from_field/Makefile\ contrib/postprocess_fluid_stats/Makefile\ contrib/average_field_in_space/Makefile\ + contrib/map_to_equidistant_1d/Makefile\ contrib/prepart/Makefile]) fi # Doxygen diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 0bcf5d2f7f2..2cd31c89925 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -3,4 +3,7 @@ SUBDIRS = rea2nbin\ genmeshbox\ average_fields_in_time\ average_field_in_space\ - postprocess_fluid_stats + calc_lift_from_field\ + postprocess_fluid_stats\ + map_to_equidistant_1d + diff --git a/contrib/average_field_in_space/average_field_in_space.f90 b/contrib/average_field_in_space/average_field_in_space.f90 index 1fd8cd67ac9..165646e3c30 100644 --- a/contrib/average_field_in_space/average_field_in_space.f90 +++ b/contrib/average_field_in_space/average_field_in_space.f90 @@ -2,7 +2,6 @@ !! Martin Karp 17/02-23 program average_field_in_space use neko - use mean_flow implicit none character(len=NEKO_FNAME_LEN) :: inputchar, mesh_fname, field_fname, hom_dir, output_fname @@ -15,18 +14,30 @@ program average_field_in_space type(space_t) :: Xh type(mesh_t) :: msh type(gs_t) :: gs_h + type(map_1d_t) :: map_1d type(field_t), pointer :: u, avg_u, old_u, el_heights type(vector_ptr_t), allocatable :: fields(:) - integer, allocatable :: hom_dir_el(:) - integer :: argc, i, n, lx, j, e, n_levels + type(matrix_t) :: avg_matrix + type(vector_t) :: volume_per_gll_lvl + integer :: argc, i, n, lx, j, e, n_levels, dir, ierr, n_1d, tstep + logical :: avg_to_1d = .false. + real(kind=rp) :: coord argc = command_argument_count() - if ((argc .lt. 5) .or. (argc .gt. 5)) then + if ((argc .lt. 4) .or. (argc .gt. 4)) then if (pe_rank .eq. 0) then - write(*,*) 'Usage: ./average_field_in_space mesh.nmsh field.fld dir(x, y, z) n_levels outfield.fld' - write(*,*) 'Example command: ./average_field_in_space mesh.nmsh fieldblabla.fld x n_levels outfield.fld' - write(*,*) 'Computes the spatial average in x of the field fieldblabla.nek5000 and stores in outfield.fld' + write(*,*) 'Usage: ./average_field_in_space mesh.nmsh field.fld dir(x, y, z, xz, xy, yz) outfield.(fld,csv)' + write(*,*) '----' + write(*,*) 'Example command for avg in 1 direction: ./average_field_in_space mesh.nmsh fieldblabla.fld x outfield.fld' + write(*,*) 'Computes spatial average in 1 direction and saves it in outfield.fld' + write(*,*) '----' + write(*,*) 'Example command: ./average_field_in_space mesh.nmsh fieldblabla.fld xy out.csv' + write(*,*) 'Computes the spatial average in 2 directions directly of the field fieldblabla.nek5000 and stores in out.csv' + write(*,*) '----' + write(*,*) 'In out.csv the first col are the coords of the GLL points' + write(*,*) 'In columns 2-n_fields are the averages for all fields in fieldblabla.fld' + write(*,*) 'If averaging in 2 directions output must be .csv and for 1 direction .fld' end if stop end if @@ -42,8 +53,6 @@ program average_field_in_space call get_command_argument(3, inputchar) read(inputchar, *) hom_dir call get_command_argument(4, inputchar) - read(inputchar, *) n_levels - call get_command_argument(5, inputchar) read(inputchar, *) output_fname call mesh_file%read(msh) @@ -96,58 +105,108 @@ program average_field_in_space old_u => neko_field_registry%get_field('old_u') call neko_field_registry%add_field(dof, 'el_heights') el_heights => neko_field_registry%get_field('el_heights') - - !test - allocate(hom_dir_el(msh%nelv)) - - do i = 1, msh%nelv - !find height in hom-dir - !store direction which is hom - !set element to height - !we assume elements are stacked on eachother... - el_dim(1,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(2)%p%x) - el_dim(2,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(3)%p%x) - el_dim(3,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(5)%p%x) - ! 1 corresponds to r, 2 to s, 3 to t - if (trim(hom_dir) .eq. 'x') then - hom_dir_el(i) = maxloc(el_dim(:,1),dim=1) - el_h = el_dim(1,hom_dir_el(i)) - else if (trim(hom_dir) .eq. 'y') then - hom_dir_el(i) = maxloc(el_dim(:,2),dim=1) - el_h = el_dim(2,hom_dir_el(i)) - else if (trim(hom_dir) .eq. 'z') then - hom_dir_el(i) = maxloc(el_dim(:,3),dim=1) - el_h = el_dim(3,hom_dir_el(i)) - else - call neko_error('homogenous direction not supported') - end if - el_heights%x(:,:,:,i) = el_h - end do + ! 1 corresponds to x, 2 to y, 3 to z + if (trim(hom_dir) .eq. 'x') then + dir = 1 + avg_to_1d = .false. + else if (trim(hom_dir) .eq. 'y') then + dir = 2 + avg_to_1d = .false. + else if (trim(hom_dir) .eq. 'z') then + dir = 3 + avg_to_1d = .false. + else if (trim(hom_dir) .eq. 'yz') then + dir = 1 + avg_to_1d = .true. + else if (trim(hom_dir) .eq. 'xz') then + dir = 2 + avg_to_1d = .true. + else if (trim(hom_dir) .eq. 'xy') then + dir = 3 + avg_to_1d = .true. + else + call neko_error('homogenous direction not supported') + end if + call map_1d%init(dof, gs_h, dir, 1e-7_rp) + n_levels = map_1d%n_el_lvls n = u%dof%size() - - call copy(u%x,el_heights%x,n) - call copy(old_u%x,el_heights%x,n) - call copy(avg_u%x,el_heights%x,n) - call perform_global_summation(u, avg_u, old_u, n_levels, & - hom_dir_el,gs_h, coef%mult, msh%nelv, lx) - domain_height = u%x(1,1,1,1) + !allocate array with pointers to all vectors in the file allocate(fields(field_data%size())) - - call field_data%get_list(fields,field_data%size()) - - do i = 1, field_data%size() - call copy(old_u%x,fields(i)%v%x,n) - call perform_local_summation(u,old_u, el_heights, domain_height, & - hom_dir_el, coef, msh%nelv, lx) - call copy(old_u%x,u%x,n) - call copy(avg_u%x,u%x,n) - call perform_global_summation(u, avg_u, old_u, n_levels, & - hom_dir_el,gs_h, coef%mult, msh%nelv, lx) - call copy(fields(i)%v%x,u%x,n) - end do + ! Compute average in two direction directly and store in a csv file output_file = file_t(trim(output_fname)) - call output_file%write(field_data) + do tstep = 0, field_data%meta_nsamples-1 + if (tstep .gt. 0) call field_file%read(field_data) + call field_data%get_list(fields,field_data%size()) + if (pe_rank .eq. 0) write(*,*) 'Averaging field:', tstep + if (avg_to_1d) then + n_1d = n_levels*Xh%lx + call avg_matrix%init(n_1d,field_data%size()+1) + call volume_per_gll_lvl%init(n_1d) + do i = 1, n + volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) = & + volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) + coef%B(i,1,1,1) + end do + call MPI_Allreduce(MPI_IN_PLACE,volume_per_gll_lvl%x, n_1d, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + !ugly way of getting coordinates, computes average + do i = 1, n + if (dir .eq. 1) coord = dof%x(i,1,1,1) + if (dir .eq. 2) coord = dof%y(i,1,1,1) + if (dir .eq. 3) coord = dof%z(i,1,1,1) + avg_matrix%x(map_1d%pt_lvl(i,1,1,1),1) = & + avg_matrix%x(map_1d%pt_lvl(i,1,1,1),1) + coord*coef%B(i,1,1,1) & + /volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) + end do + do j = 2, field_data%size()+1 + do i = 1, n + avg_matrix%x(map_1d%pt_lvl(i,1,1,1),j) = & + avg_matrix%x(map_1d%pt_lvl(i,1,1,1),j) + fields(j-1)%v%x(i)*coef%B(i,1,1,1) & + /volume_per_gll_lvl%x(map_1d%pt_lvl(i,1,1,1)) + end do + end do + call MPI_Allreduce(MPI_IN_PLACE,avg_matrix%x, (field_data%size()+1)*n_1d, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + + call output_file%write(avg_matrix,field_data%time) + ! Compute averages in 1 direction and store in a 3d field (lots of redundant data, sorry) + ! Should output a 2d field in principle + else + do i = 1, msh%nelv + !find height in hom-dir + !direction in local coords (r,s,t) that is hom is stored in map_1d%dir_el + !set element to height + !we assume elements are stacked on eachother... + el_dim(1,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(2)%p%x) + el_dim(2,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(3)%p%x) + el_dim(3,:) = abs(msh%elements(i)%e%pts(1)%p%x-msh%elements(i)%e%pts(5)%p%x) + ! 1 corresponds to x, 2 to y, 3 to z + el_h = el_dim(map_1d%dir_el(i),dir) + el_heights%x(:,:,:,i) = el_h + end do + + call copy(u%x,el_heights%x,n) + call copy(old_u%x,el_heights%x,n) + call copy(avg_u%x,el_heights%x,n) + call perform_global_summation(u, avg_u, old_u, n_levels, & + map_1d%dir_el,gs_h, coef%mult, msh%nelv, lx) + domain_height = u%x(1,1,1,1) + + + do i = 1, field_data%size() + call copy(old_u%x,fields(i)%v%x,n) + call perform_local_summation(u,old_u, el_heights, domain_height, & + map_1d%dir_el, coef, msh%nelv, lx) + call copy(old_u%x,u%x,n) + call copy(avg_u%x,u%x,n) + call perform_global_summation(u, avg_u, old_u, n_levels, & + map_1d%dir_el,gs_h, coef%mult, msh%nelv, lx) + call copy(fields(i)%v%x,u%x,n) + end do + + call output_file%write(field_data,field_data%time) + end if + end do if (pe_rank .eq. 0) write(*,*) 'Done' call neko_finalize @@ -170,10 +229,11 @@ subroutine perform_global_summation(u, avg_u, old_u, n_levels, hom_dir_el, gs_h, do i = 1, n_levels-1 !compute average if (NEKO_BCKND_DEVICE .eq. 1) & - call device_memcpy(u%x, u%x_d, n, HOST_TO_DEVICE) + call device_memcpy(u%x, u%x_d, n, & + HOST_TO_DEVICE, sync=.false.) call gs_h%op(u,GS_OP_ADD) if (NEKO_BCKND_DEVICE .eq. 1) & - call device_memcpy(u%x, u%x_d, n, DEVICE_TO_HOST, sync=.true.) + call device_memcpy(u%x, u%x_d, n, DEVICE_TO_HOST, sync=.true.) call col2(u%x,mult,n) do e = 1, nelv temp_el = 2.0*u%x(:,:,:,e)-old_u%x(:,:,:,e) diff --git a/contrib/calc_lift_from_field/Makefile.am b/contrib/calc_lift_from_field/Makefile.am new file mode 100644 index 00000000000..aaa985f14ac --- /dev/null +++ b/contrib/calc_lift_from_field/Makefile.am @@ -0,0 +1,5 @@ +bin_PROGRAMS = calc_lift_from_field +calc_lift_from_field_SOURCES = calc_lift_from_field.f90 +calc_lift_from_field_LDADD = $(top_builddir)/src/libneko.a +calc_lift_from_field_LDFLAGS = $(LDFLAGS) $(LIBS) +AM_FCFLAGS = -I@top_builddir@/src diff --git a/contrib/calc_lift_from_field/calc_lift_from_field.f90 b/contrib/calc_lift_from_field/calc_lift_from_field.f90 new file mode 100644 index 00000000000..0718b9fe7f7 --- /dev/null +++ b/contrib/calc_lift_from_field/calc_lift_from_field.f90 @@ -0,0 +1,203 @@ +!> Program to calculate the force and acting on a single boundary zone as well as +!! the torque around a point if one changes the value of center (defaults to 0,0,0). +!! Outputs the x,y,z prjections of the pressure and viscous forces and torques and +!! additionally saves the distribution of these quantities along a selected homogenous +!! direction to a csv file. +!! Martin Karp 17/01-24 +program calc_lift_from_field + use neko + use mean_flow + use matrix + implicit none + + character(len=NEKO_FNAME_LEN) :: inputchar, mesh_fname, field_fname, hom_dir, output_fname + type(file_t) :: field_file, mesh_file, output_file + real(kind=rp) :: start_time + type(fld_file_data_t) :: field_data + type(coef_t) :: coef + type(dofmap_t), target :: dof + type(space_t) :: Xh + type(mesh_t) :: msh + type(gs_t) :: gs_h + type(map_1d_t) :: map_1d + type(field_t) :: u, v, w, p + type(field_t) :: s11, s22, s33, s12, s13, s23 + real(kind=rp) :: s11_, s22_, s33_, s12_, s13_, s23_, center(3) + type(matrix_t) :: drag_torq + real(kind=rp), allocatable :: lvl_coords(:) + real(kind=rp), pointer :: line(:,:,:,:) + integer :: argc, i, j, k, n, lx, e, zone_id, dir, f, glb_n_gll_pts, ierr, mem, t + real(kind=rp) :: visc, nv(3), dgtq(12) + + argc = command_argument_count() + + if ((argc .lt. 6) .or. (argc .gt. 6)) then + if (pe_rank .eq. 0) then + write(*,*) 'Usage: ./calc_lift_from_field mesh.nmsh field.fld zone_number viscosity function_of_coord output.csv' + write(*,*) 'Example command: ./calc_lift_from_field mesh.nmsh fieldblabla.fld 5 0.04 y out.csv' + write(*,*) 'Outputs the total force and torque on zone 5 using velocity values from fieldblabla.fld' + write(*,*) 'as well as writes the distribution of the force and torque across y to output.csv' + end if + stop + end if + + call neko_init + + call get_command_argument(1, inputchar) + read(inputchar, *) mesh_fname + mesh_file = file_t(trim(mesh_fname)) + call get_command_argument(2, inputchar) + read(inputchar, *) field_fname + field_file = file_t(trim(field_fname)) + call get_command_argument(3, inputchar) + read(inputchar, *) zone_id + call get_command_argument(4, inputchar) + read(inputchar, *) visc + call get_command_argument(5, inputchar) + read(inputchar, *) hom_dir + call get_command_argument(6, inputchar) + read(inputchar, *) output_fname + output_file = file_t(trim(output_fname)) + + call mesh_file%read(msh) + + call field_data%init(msh%nelv,msh%offset_el) + call field_file%read(field_data) + + lx = field_data%lx + !To make sure any deformation made in the user file is passed onto here as well + do i = 1,msh%nelv + msh%elements(i)%e%pts(1)%p%x(1) = field_data%x%x(linear_index(1,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(2)%p%x(1) = field_data%x%x(linear_index(lx,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(3)%p%x(1) = field_data%x%x(linear_index(1,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(4)%p%x(1) = field_data%x%x(linear_index(lx,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(5)%p%x(1) = field_data%x%x(linear_index(1,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(6)%p%x(1) = field_data%x%x(linear_index(lx,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(7)%p%x(1) = field_data%x%x(linear_index(1,lx,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(8)%p%x(1) = field_data%x%x(linear_index(lx,lx,lx,i,lx,lx,lx)) + + msh%elements(i)%e%pts(1)%p%x(2) = field_data%y%x(linear_index(1,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(2)%p%x(2) = field_data%y%x(linear_index(lx,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(3)%p%x(2) = field_data%y%x(linear_index(1,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(4)%p%x(2) = field_data%y%x(linear_index(lx,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(5)%p%x(2) = field_data%y%x(linear_index(1,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(6)%p%x(2) = field_data%y%x(linear_index(lx,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(7)%p%x(2) = field_data%y%x(linear_index(1,lx,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(8)%p%x(2) = field_data%y%x(linear_index(lx,lx,lx,i,lx,lx,lx)) + + msh%elements(i)%e%pts(1)%p%x(3) = field_data%z%x(linear_index(1,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(2)%p%x(3) = field_data%z%x(linear_index(lx,1,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(3)%p%x(3) = field_data%z%x(linear_index(1,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(4)%p%x(3) = field_data%z%x(linear_index(lx,lx,1,i,lx,lx,lx)) + msh%elements(i)%e%pts(5)%p%x(3) = field_data%z%x(linear_index(1,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(6)%p%x(3) = field_data%z%x(linear_index(lx,1,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(7)%p%x(3) = field_data%z%x(linear_index(1,lx,lx,i,lx,lx,lx)) + msh%elements(i)%e%pts(8)%p%x(3) = field_data%z%x(linear_index(lx,lx,lx,i,lx,lx,lx)) + end do + + call Xh%init(GLL, field_data%lx, field_data%ly, field_data%lz) + + dof = dofmap_t(msh, Xh) + call gs_h%init(dof) + call coef%init(gs_h) + ! Center around which we calculate the torque + center = 0.0_rp + + ! 1 corresponds to r, 2 to s, 3 to t + if (trim(hom_dir) .eq. 'x') then + dir = 1 + line => dof%x + else if (trim(hom_dir) .eq. 'y') then + dir = 2 + line => dof%y + else if (trim(hom_dir) .eq. 'z') then + dir = 3 + line => dof%z + else + call neko_error('The homogeneous direction should be "x", "y"or "z"') + end if + call map_1d%init(dof, gs_h, dir, 1e-7_rp) + + + call u%init(dof) + call v%init(dof) + call w%init(dof) + call p%init(dof) + call s11%init(dof) + call s22%init(dof) + call s33%init(dof) + call s12%init(dof) + call s13%init(dof) + call s23%init(dof) + glb_n_gll_pts = map_1d%n_el_lvls*lx + call drag_torq%init(glb_n_gll_pts, 14) + if (pe_rank .eq. 0) call output_file%set_header('time, coord, forcepx, forcepy, & + & forcepz, forcevx, forcevy, forcevz, torqpx, torqpy, & + & torqpz, torqvx, torqvy, torqvz') + + do t = 1, field_data%meta_nsamples + if (t .ne. 1) call field_file%read(field_data) + if(pe_rank .eq. 0) write(*,*) 'Calc drag/lift at t= ', field_data%time + + call copy(u%x,field_data%u%x,dof%size()) + call copy(v%x,field_data%v%x,dof%size()) + call copy(w%x,field_data%w%x,dof%size()) + call copy(p%x,field_data%p%x,dof%size()) + drag_torq = 0.0_rp + !set coords to somoething big + drag_torq%x(:,2) = 1e15 + + + if(pe_rank .eq. 0) write(*,*) 'Total drag' + + call strain_rate(s11%x, s22%x, s33%x, s12%x, s13%x, s23%x, u, v, w, coef) + call drag_torque_zone(dgtq,field_data%t_counter, msh%labeled_zones(zone_id), center,& + s11%x, s22%x, s33%x, s12%x, s13%x, s23%x,& + p, coef, visc) + if (pe_rank .eq. 0) then + write(*,*) field_data%t_counter,dgtq(1)+dgtq(4),dgtq(1),dgtq(4),'dragx' + write(*,*) field_data%t_counter,dgtq(2)+dgtq(5),dgtq(2),dgtq(5),'dragy' + write(*,*) field_data%t_counter,dgtq(3)+dgtq(6),dgtq(3),dgtq(6),'dragz' + end if + + + do mem = 1,msh%labeled_zones(zone_id)%size + e = msh%labeled_zones(zone_id)%facet_el(mem)%x(2) + f = msh%labeled_zones(zone_id)%facet_el(mem)%x(1) + do k = 1, lx + do j = 1, lx + do i = 1, lx + if (index_is_on_facet(i,j,k,lx,lx,lx,f)) then + nv = coef%get_normal(i,j,k,e,f)*coef%get_area(i,j,k,e,f) + s11_ = s11%x(i,j,k,e) + s12_ = s12%x(i,j,k,e) + s22_ = s22%x(i,j,k,e) + s13_ = s13%x(i,j,k,e) + s23_ = s23%x(i,j,k,e) + s33_ = s33%x(i,j,k,e) + call drag_torque_pt(dgtq,dof%x(i,j,k,e), dof%y(i,j,k,e),dof%z(i,j,k,e),& + center, & + s11_, s22_, s33_, s12_, s13_, s23_,& + p%x(i,j,k,e), nv(1), nv(2), nv(3), visc) + drag_torq%x(map_1d%pt_lvl(i,j,k,e),3:14) = & + drag_torq%x(map_1d%pt_lvl(i,j,k,e),3:14) + dgtq + drag_torq%x(map_1d%pt_lvl(i,j,k,e),2) = line(i,j,k,e) + end if + end do + end do + end do + end do + + call MPI_Allreduce(MPI_IN_PLACE,drag_torq%x(1,3), 12*glb_n_gll_pts, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,drag_torq%x(1,2), glb_n_gll_pts, & + MPI_REAL_PRECISION, MPI_MIN, NEKO_COMM, ierr) + drag_torq%x(:,1) = field_data%time + if (pe_rank .eq. 0) then + call output_file%write(drag_torq) + end if + end do + + call neko_finalize + +end program calc_lift_from_field diff --git a/contrib/map_to_equidistant_1d/Makefile.am b/contrib/map_to_equidistant_1d/Makefile.am new file mode 100644 index 00000000000..c33d59c9fe7 --- /dev/null +++ b/contrib/map_to_equidistant_1d/Makefile.am @@ -0,0 +1,5 @@ +bin_PROGRAMS = map_to_equidistant_1d +map_to_equidistant_1d_SOURCES = map_to_equidistant_1d.f90 +map_to_equidistant_1d_LDADD = $(top_builddir)/src/libneko.a +map_to_equidistant_1dLDFLAGS = $(LDFLAGS) $(LIBS) +AM_FCFLAGS = -I@top_builddir@/src diff --git a/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90 b/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90 new file mode 100644 index 00000000000..11f5ad9ebcf --- /dev/null +++ b/contrib/map_to_equidistant_1d/map_to_equidistant_1d.f90 @@ -0,0 +1,126 @@ +!> Program to interpolate the field from GLL points onto equidistant points in elements in a homogenous direction +!> The number of points inside an element of the input and output field file is supposed to be the same +!> Shiyu Du 16/01-24 note: the program is designed for Cartesian Box meshes with equidistant elements on the hom-dir, +! the version for rotational coordinates is not yet been thought about. +!> Shiyu Du 16/01-24 note: the program is currently tested on CPUs, GPU is not tested yet. +program map_to_equidistant_1d + use neko + use fast3d + use tensor + implicit none + + character(len=NEKO_FNAME_LEN) :: inputchar, field_fname, hom_dir, output_fname + type(file_t) :: field_file, output_file + real(kind=rp) :: x_equid + real(kind=rp), allocatable :: wt(:,:), wtt(:,:), ident(:,:) + type(fld_file_data_t) :: field_data + type(space_t) :: Xh + type(vector_ptr_t), allocatable :: fields(:) + integer :: argc, i, lx, j, file_precision + logical :: dp_precision + + argc = command_argument_count() + + if ((argc .lt. 4) .or. (argc .gt. 4)) then + if (pe_rank .eq. 0) then + write(*,*) 'Usage: ./map_to_equidistant_1d field.fld dir(x, y, z) outfield.fld precision' + write(*,*) 'Example command: ./map_to_equidistant_1d fieldblabla.fld x outfield.fld .true.' + write(*,*) 'Redistribute the points to be equidistant in elements ' + write(*,*) 'in x of the field fieldblabla.nek5000 and stores in outfield.fld, both are double precision' + end if + stop + end if + + call neko_init + + call get_command_argument(1, inputchar) + read(inputchar, fmt='(A)') field_fname + call get_command_argument(2, inputchar) + read(inputchar, *) hom_dir + call get_command_argument(3, inputchar) + read(inputchar, fmt='(A)') output_fname + call get_command_argument(4, inputchar) + read(inputchar, *) dp_precision + + if (dp_precision) then + file_precision = dp + else + file_precision = sp + end if + + field_file = file_t(trim(field_fname),precision=file_precision) + + if (trim(hom_dir) .ne. 'x' .and. trim(hom_dir) .ne. 'y' .and. trim(hom_dir) .ne. 'z') then + call neko_error('The homogenous direction should be "x", "y" or "z"') + end if + + call field_data%init() + if (pe_rank .eq. 0) write(*,*) 'Reading file:', 1 + call field_file%read(field_data) + + lx = field_data%lx + + call Xh%init(GLL, field_data%lx, field_data%ly, field_data%lz) + + ! construct the weight matrix + allocate(wt(lx,lx)) + allocate(wtt(lx,lx)) + allocate(ident(lx,lx)) + ident = 0.0_rp + do i = 1, lx + x_equid = -1.0_rp + (i-1) * 2.0_rp/(lx-1) + call fd_weights_full(x_equid, Xh%zg(:,1), lx-1, 0, wtt(:,i)) + wt(i,:) = wtt(:,i) + ident(i,i) = 1.0_rp + end do + + ! redistribute the coordinates to be equidistant within elements + if (trim(hom_dir) .eq. 'x') then + call tnsr1_3d(field_data%x%x, lx, lx, wt, ident, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'y') then + call tnsr1_3d(field_data%y%x, lx, lx, ident, wtt, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'z') then + call tnsr1_3d(field_data%z%x, lx, lx, ident, ident, wtt, field_data%nelv) + end if + + ! interpolate the field at t=0 + allocate(fields(field_data%size())) + call field_data%get_list(fields,field_data%size()) + do i = 1, field_data%size() + if (trim(hom_dir) .eq. 'x') then + call tnsr1_3d(fields(i)%v%x, lx, lx, wt, ident, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'y') then + call tnsr1_3d(fields(i)%v%x, lx, lx, ident, wtt, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'z') then + call tnsr1_3d(fields(i)%v%x, lx, lx, ident, ident, wtt, field_data%nelv) + end if + end do + + ! output at t=0 + output_file = file_t(trim(output_fname),precision=file_precision) + call output_file%write(field_data, field_data%time) + + ! interpolate field for t>0 + do i = 1, field_data%meta_nsamples-1 + if (pe_rank .eq. 0) write(*,*) 'Reading file:', i+1 + call field_file%read(field_data) + call field_data%get_list(fields,field_data%size()) + do j = 1, field_data%size() + if (trim(hom_dir) .eq. 'x') then + call tnsr1_3d(fields(j)%v%x, lx, lx, wt, ident, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'y') then + call tnsr1_3d(fields(j)%v%x, lx, lx, ident, wtt, ident, field_data%nelv) + else if (trim(hom_dir) .eq. 'z') then + call tnsr1_3d(fields(j)%v%x, lx, lx, ident, ident, wtt, field_data%nelv) + end if + end do + ! output for t>0 + call output_file%write(field_data, field_data%time) + end do + + + if (pe_rank .eq. 0) write(*,*) 'Done' + + call neko_finalize + +end program map_to_equidistant_1d diff --git a/contrib/pyneko/Makefile.am b/contrib/pyneko/Makefile.am index f2607f00531..88c66db51cc 100644 --- a/contrib/pyneko/Makefile.am +++ b/contrib/pyneko/Makefile.am @@ -3,5 +3,4 @@ ACLOCAL_AMFLAGS = -I m4 --install SUBDIRS = src nobase_python_PYTHON = pyNeko/__init__.py\ - pyNeko/case.py\ pyNeko/intf.py diff --git a/contrib/pyneko/examples/hemi/hemi.py b/contrib/pyneko/examples/hemi/hemi.py index a918c0d2702..92614845089 100644 --- a/contrib/pyneko/examples/hemi/hemi.py +++ b/contrib/pyneko/examples/hemi/hemi.py @@ -1,14 +1,19 @@ import pyNeko +import json # Initialize Neko pyNeko.init() # Create a json case -hemi_case = pyNeko.create_case(mesh_file="hemi.nmsh", lx=6, T_end=1e-3) +hemi_case = json.load(open('hemi.case')) # Solve the case pyNeko.solve(hemi_case) -# Increase sample rate and rerun to get some data -hemi_case['parameters']['nsamples'] = 1 +# Increase sample rate and rerun to get more data +hemi_case['case']['nsamples'] = 2 pyNeko.solve(hemi_case) + + +# Finalize Neko +pyNeko.finalize() diff --git a/contrib/pyneko/pyNeko/__init__.py b/contrib/pyneko/pyNeko/__init__.py index 847049f7c69..a4c85f305ed 100644 --- a/contrib/pyneko/pyNeko/__init__.py +++ b/contrib/pyneko/pyNeko/__init__.py @@ -1,4 +1,3 @@ """ A portable framework for high-order spectral element flow simulations """ -from pyNeko.case import create_case from pyNeko.intf import init, finalize, solve diff --git a/contrib/pyneko/pyNeko/case.py b/contrib/pyneko/pyNeko/case.py deleted file mode 100644 index 3a54bc3cb3b..00000000000 --- a/contrib/pyneko/pyNeko/case.py +++ /dev/null @@ -1,39 +0,0 @@ -import json - -def create_case(mesh_file="", - lx=4, - source_term="noforce", - initial_condition="uniform", - nsamples=0, - dt=0.001, - T_end=0.0, - uinf=[1.0,0.0,0.0]) : - default = { - "case" : { - "mesh_file" : mesh_file, - "fluid_scheme" : "pnpn", - "lx" : lx, - "source_term" : source_term, - "initial_condition" : initial_condition - }, - "parameters" : { - "nsamples" : nsamples, - "dt" : dt, - "T_end" : T_end, - "uinf" : uinf, - "ksp_vel" : { - "type" : "cg", - "pc" : "jacobi", - "abstol" : 1e-09 - - }, - "ksp_prs": { - "type" : "gmres", - "pc" : "hsmg", - "abstol" : 1e-09 - } - } - } - return json.loads(json.dumps(default)) - - diff --git a/contrib/pyneko/src/Makefile.am b/contrib/pyneko/src/Makefile.am index f8e3f2ec53f..a3c0a6dbd72 100644 --- a/contrib/pyneko/src/Makefile.am +++ b/contrib/pyneko/src/Makefile.am @@ -1,4 +1,3 @@ lib_LTLIBRARIES = libnekointf.la -libnekointf_la_SOURCES = json_case.f90\ - neko_intf.f90 +libnekointf_la_SOURCES = neko_intf.f90 diff --git a/contrib/pyneko/src/json_case.f90 b/contrib/pyneko/src/json_case.f90 deleted file mode 100644 index d2ae3bf9442..00000000000 --- a/contrib/pyneko/src/json_case.f90 +++ /dev/null @@ -1,263 +0,0 @@ -! Copyright (c) 2022, The Neko Authors -! All rights reserved. -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! -! * Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! -! * Redistributions in binary form must reproduce the above -! copyright notice, this list of conditions and the following -! disclaimer in the documentation and/or other materials provided -! with the distribution. -! -! * Neither the name of the authors nor the names of its -! contributors may be used to endorse or promote products derived -! from this software without specific prior written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -module json_case - use neko - use json_module - implicit none - -contains - - subroutine json_case_create_neko_case(neko_case, json_case) - type(case_t), intent(inout) :: neko_case - type(json_file) :: json_case - type(json_core) :: jcore - type(json_value), pointer :: json_param, p - character(len=:), allocatable :: mesh_file - character(len=:), allocatable :: fluid_scheme - character(len=:), allocatable :: source_term - character(len=:), allocatable :: initial_condition - integer :: lx - logical found_key - type(file_t) :: msh_file - - - - call neko_log%section('Case') - call neko_log%message('Reading case from pyNeko json file') - - ! Fill default parameters - call param_default(neko_case%params) - call json_case%get_core(jcore) - call json_case%get('parameters', json_param, found_key) - if (found_key) then - call json_case_create_neko_params(neko_case%params, json_param, jcore) - else - call neko_warning('No parameter block found') - end if - - ! Read case description - - call json_case%get('case.mesh_file', mesh_file, found_key) - if (.not. found_key) then - call neko_error('Key mesh_file not found') - else - msh_file = file_t(trim(mesh_file)) - call msh_file%read(neko_case%msh) - end if - - call neko_case%usr%init() - call neko_case%usr%usr_msh_setup(neko_case%msh) - - call json_case%get('case.lx', lx, found_key) - if (.not. found_key) then - call neko_error('lx not defined') - end if - - call json_case%get('case.fluid_scheme', fluid_scheme, found_key) - if (.not. found_key) then - call neko_error('Fluid scheme not defined') - else - call fluid_scheme_factory(neko_case%fluid, trim(fluid_scheme)) - call neko_case%fluid%init(neko_case%msh, lx, neko_case%params) - end if - - call json_case%get('case.source_term', source_term, found_key) - if (.not. found_key) then - call neko_error('Source term not defined') - else - call neko_case%fluid%set_source(trim(source_term)) - end if - - call json_case%get('case.initial_condition', initial_condition, found_key) - if (.not. found_key) then - call neko_error('Intiail condition not defined') - else - call set_flow_ic(neko_case%fluid%u, neko_case%fluid%v, neko_case%fluid%w,& - neko_case%fluid%p, neko_case%fluid%c_Xh, neko_case%fluid%gs_Xh,& - initial_condition, neko_case%params) - end if - - - select type(f => neko_case%fluid) - type is(fluid_pnpn_t) - call f%ulag%set(f%u) - call f%vlag%set(f%v) - call f%wlag%set(f%w) - end select - - call neko_case%fluid%validate() - - call neko_case%ab_bdf%set_time_order(neko_case%params%time_order) - - call neko_case%s%init(neko_case%params%nsamples, neko_case%params%T_end) - neko_case%f_out = fluid_output_t(neko_case%fluid, & - path=neko_case%params%output_dir) - call neko_case%s%add(neko_case%f_out) - - call neko_case%q%init(neko_case%params%stats_begin) - if (neko_case%params%stats_mean_flow) then - call neko_case%q%add(neko_case%fluid%mean%u) - call neko_case%q%add(neko_case%fluid%mean%v) - call neko_case%q%add(neko_case%fluid%mean%w) - call neko_case%q%add(neko_case%fluid%mean%p) - - if (neko_case%params%output_mean_flow) then - neko_case%f_mf = mean_flow_output_t(neko_case%fluid%mean, & - neko_case%params%stats_begin, path=neko_case%params%output_dir) - call neko_case%s%add(neko_case%f_mf) - end if - end if - - if (neko_case%params%stats_mean_sqr_flow) then - call neko_case%q%add(neko_case%fluid%mean_sqr%uu) - call neko_case%q%add(neko_case%fluid%mean_sqr%vv) - call neko_case%q%add(neko_case%fluid%mean_sqr%ww) - call neko_case%q%add(neko_case%fluid%mean_sqr%pp) - - if (neko_case%params%output_mean_sqr_flow) then - neko_case%f_msqrf = mean_sqr_flow_output_t(neko_case%fluid%mean_sqr, & - neko_case%params%stats_begin, & - path=neko_case%params%output_dir) - call neko_case%s%add(neko_case%f_msqrf) - end if - end if - - call jobctrl_set_time_limit(neko_case%params%jlimit) - - call neko_log%end_section() - - end subroutine json_case_create_neko_case - - subroutine json_case_create_neko_params(params, json_param, jcore) - type(param_t), intent(inout) :: params - type(json_value), pointer :: json_param - type(json_core) :: jcore - type(json_value), pointer :: p - logical :: found_key - real(kind=rp), allocatable :: uinf(:) - character(len=:), allocatable :: str - - call jcore%get_child(json_param, 'nsamples', p, found_key) - if (found_key) call jcore%get(p, params%nsamples) - - call jcore%get_child(json_param, 'output_bdry', p, found_key) - if (found_key) call jcore%get(p, params%output_bdry) - call jcore%get_child(json_param, 'output_part', p, found_key) - if (found_key) call jcore%get(p, params%output_part) - call jcore%get_child(json_param, 'output_chkp', p, found_key) - if (found_key) call jcore%get(p, params%output_chkp) - - call jcore%get_child(json_param, 'dt', p, found_key) - if (found_key) call jcore%get(p, params%dt) - call jcore%get_child(json_param, 'T_end', p, found_key) - if (found_key) call jcore%get(p, params%T_end) - call jcore%get_child(json_param, 'rho', p, found_key) - if (found_key) call jcore%get(p, params%rho) - call jcore%get_child(json_param, 'mu', p, found_key) - if (found_key) call jcore%get(p, params%mu) - call jcore%get_child(json_param, 'Re', p, found_key) - if (found_key) call jcore%get(p, params%Re) - call jcore%get_child(json_param, 'uinf', p, found_key) - if (found_key) then - call jcore%get(p, uinf) - params%uinf = uinf - end if - - - call jcore%get_child(json_param, 'ksp_prs.type', p, found_key) - if (found_key) then - call jcore%get(p, str) - params%ksp_prs = str - deallocate(str) - end if - - call jcore%get_child(json_param, 'ksp_prs.pc', p, found_key) - if (found_key) then - call jcore%get(p, str) - params%pc_prs = str - deallocate(str) - end if - call jcore%get_child(json_param, 'ksp_prs.abs_tol', p, found_key) - if (found_key) call jcore%get(p, params%abstol_prs) - - call jcore%get_child(json_param, 'ksp_vel.type', p, found_key) - if (found_key) then - call jcore%get(p, str) - params%ksp_vel = str - deallocate(str) - end if - - call jcore%get_child(json_param, 'ksp_vel.pc', p, found_key) - if (found_key) then - call jcore%get(p, str) - params%pc_vel = str - deallocate(str) - end if - call jcore%get_child(json_param, 'ksp_vel.abs_tol', p, found_key) - if (found_key) call jcore%get(p, params%abstol_vel) - - - call jcore%get_child(json_param, 'fluid_inflow', p, found_key) - if (found_key) then - call jcore%get(p, str) - params%fluid_inflow = str - deallocate(str) - end if - - call jcore%get_child(json_param, 'vol_flow_dir', p, found_key) - if (found_key) call jcore%get(p, params%vol_flow_dir) - - call jcore%get_child(json_param, 'proj_prs_dim', p, found_key) - if (found_key) call jcore%get(p, params%proj_prs_dim) - call jcore%get_child(json_param, 'proj_vel_dim', p, found_key) - if (found_key) call jcore%get(p, params%proj_vel_dim) - - call jcore%get_child(json_param, 'stats.begin', p, found_key) - if (found_key) call jcore%get(p, params%stats_begin) - call jcore%get_child(json_param, 'stats.mean_flow', p, found_key) - if (found_key) call jcore%get(p, params%stats_mean_flow) - call jcore%get_child(json_param, 'stats.output_mean_flow', p, found_key) - if (found_key) call jcore%get(p, params%output_mean_flow) - call jcore%get_child(json_param, 'stats.mean_sqr_flow', p, found_key) - if (found_key) call jcore%get(p, params%stats_mean_flow) - call jcore%get_child(json_param, 'stats.output_mean_sqr_flow', p, found_key) - if (found_key) call jcore%get(p, params%output_mean_flow) - - call jcore%get_child(json_param, 'dealias', p, found_key) - if (found_key) call jcore%get(p, params%dealias) - call jcore%get_child(json_param, 'lxd', p, found_key) - if (found_key) call jcore%get(p, params%lxd) - - end subroutine json_case_create_neko_params - -end module json_case diff --git a/contrib/pyneko/src/neko_intf.f90 b/contrib/pyneko/src/neko_intf.f90 index 647d26af0cf..b5e5bb7186d 100644 --- a/contrib/pyneko/src/neko_intf.f90 +++ b/contrib/pyneko/src/neko_intf.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2022, The Neko Authors +! Copyright (c) 2022-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -32,7 +32,6 @@ ! module neko_intf use neko - use json_case use json_module use, intrinsic :: iso_c_binding implicit none @@ -44,34 +43,70 @@ subroutine neko_intf_init() bind(c, name='init') character(len=LOG_SIZE) :: log_buf character(10) :: time character(8) :: date + integer :: nthrds, rw, sw call neko_init() - call date_and_time(time=time, date=date) - call neko_log%section("Session Information") + call date_and_time(time=time, date=date) + call neko_log%section("Session Information") write(log_buf, '(A,A,A,A,1x,A,1x,A,A,A,A,A)') 'Start time: ',& time(1:2),':',time(3:4), '/', date(1:4),'-', date(5:6),'-',date(7:8) - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) write(log_buf, '(a)') 'Running on: ' + sw = 10 if (pe_size .lt. 1e1) then write(log_buf(13:), '(i1,a)') pe_size, ' MPI ' if (pe_size .eq. 1) then write(log_buf(19:), '(a)') 'rank' + sw = 9 else write(log_buf(19:), '(a)') 'ranks' end if + rw = 1 else if (pe_size .lt. 1e2) then write(log_buf(13:), '(i2,a)') pe_size, ' MPI ranks' + rw = 2 else if (pe_size .lt. 1e3) then write(log_buf(13:), '(i3,a)') pe_size, ' MPI ranks' + rw = 3 else if (pe_size .lt. 1e4) then write(log_buf(13:), '(i4,a)') pe_size, ' MPI ranks' + rw = 4 else if (pe_size .lt. 1e5) then write(log_buf(13:), '(i5,a)') pe_size, ' MPI ranks' + rw = 5 else write(log_buf(13:), '(i6,a)') pe_size, ' MPI ranks' + rw = 6 end if - call neko_log%message(log_buf) + + nthrds = 1 + !$omp parallel + !$omp master + !$ nthrds = omp_get_num_threads() + !$omp end master + !$omp end parallel + + if (nthrds .gt. 1) then + if (nthrds .lt. 1e1) then + write(log_buf(13 + rw + sw:), '(a,i1,a)') ', using ', & + nthrds, ' thrds each' + else if (nthrds .lt. 1e2) then + write(log_buf(13 + rw + sw:), '(a,i2,a)') ', using ', & + nthrds, ' thrds each' + else if (nthrds .lt. 1e3) then + write(log_buf(13 + rw + sw:), '(a,i3,a)') ', using ', & + nthrds, ' thrds each' + else if (nthrds .lt. 1e4) then + write(log_buf(13 + rw + sw:), '(a,i4,a)') ', using ', & + nthrds, ' thrds each' + end if + end if + call neko_log%message(log_buf, NEKO_LOG_QUIET) + + write(log_buf, '(a)') 'CPU type : ' + call system_cpu_name(log_buf(13:)) + call neko_log%message(log_buf, NEKO_LOG_QUIET) write(log_buf, '(a)') 'Bcknd type: ' if (NEKO_BCKND_SX .eq. 1) then @@ -87,13 +122,13 @@ subroutine neko_intf_init() bind(c, name='init') else write(log_buf(13:), '(a)') 'CPU' end if - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) if (NEKO_BCKND_HIP .eq. 1 .or. NEKO_BCKND_CUDA .eq. 1 .or. & NEKO_BCKND_OPENCL .eq. 1) then write(log_buf, '(a)') 'Dev. name : ' call device_name(log_buf(13:)) - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) end if write(log_buf, '(a)') 'Real type : ' @@ -105,10 +140,12 @@ subroutine neko_intf_init() bind(c, name='init') case (real128) write(log_buf(13:), '(a)') 'quad precision' end select - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) + call neko_log%end() - call neko_log%newline + call neko_log%newline + end subroutine neko_intf_init subroutine neko_intf_finalize() bind(c, name="finalize") @@ -120,12 +157,10 @@ subroutine neko_intf_solve(pyneko_case, ilen) bind(c, name="solve") integer(c_int), value :: ilen character(len=:), allocatable :: fpyneko_case type(json_file) :: json_case - type(case_t) :: neko_case - - + type(case_t), target :: neko_case if (c_associated(pyneko_case)) then - block + block character(kind=c_char,len=ilen+1),pointer :: s call c_f_pointer(pyneko_case, s) fpyneko_case = s(1:ilen) @@ -134,14 +169,19 @@ subroutine neko_intf_solve(pyneko_case, ilen) bind(c, name="solve") nullify(s) end block end if - - call json_case_create_neko_case(neko_case, json_case) - call json_case%destroy() + call case_init(neko_case, json_case) + call json_case%destroy() + call neko_solve(neko_case) call case_free(neko_case) + !> @todo Add a clean method + call neko_field_registry%free() + call neko_field_registry%init() + + end subroutine neko_intf_solve end module neko_intf diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index c5c0ed8c98a..b2a79d568c5 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -758,19 +758,7 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT = @top_srcdir@/doc/pages/index.md\ - @top_srcdir@/doc/pages/code-style.md\ - @top_srcdir@/doc/pages/governing-equations.md\ - @top_srcdir@/doc/pages/case-file.md\ - @top_srcdir@/doc/pages/point-zones.md\ - @top_srcdir@/doc/pages/simcomps.md\ - @top_srcdir@/doc/pages/accelerators.md\ - @top_srcdir@/doc/pages/testing.md\ - @top_srcdir@/doc/pages/publication-list.md\ - @top_srcdir@/doc/pages/appendix.md\ - @top_srcdir@/doc/pages/statistics.md\ - @top_srcdir@/doc/pages/installation.md\ - @top_srcdir@/doc/pages/dev_patterns.md\ +INPUT = @top_srcdir@/doc/pages\ @top_srcdir@/src\ @top_srcdir@/contrib/rea2nbin\ @top_srcdir@/contrib/average_fields_in_time\ diff --git a/doc/Makefile.am b/doc/Makefile.am index 00281459a96..14a4352ce09 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -11,19 +11,27 @@ doxygen.stamp: Doxyfile $(INPUT) CLEANFILES = doxygen.stamp clean-local: - rm -fr html + rm -fr html html-local: doxygen.stamp -EXTRA_DIST = pages/code-style.md\ - pages/publication-list.md\ - pages/accelerators.md\ - pages/case-file.md\ - pages/statistics.md\ - pages/simcomps.md\ - pages/point-zones.md\ - pages/testing.md\ - pages/appendix.md\ - pages/installation.md\ - pages/dev_patterns.md\ - pages/index.md +EXTRA_DIST = pages/appendices/governing-equations.md \ + pages/appendices/publications.md \ + pages/developer-guide \ + pages/developer-guide/accelerators.md \ + pages/developer-guide/code-style.md \ + pages/developer-guide/contributing.md \ + pages/developer-guide/dev_patterns.md \ + pages/developer-guide/testing.md \ + pages/user-guide \ + pages/user-guide/case-file.md \ + pages/user-guide/installation.md \ + pages/user-guide/io.md \ + pages/user-guide/point-zones.md \ + pages/user-guide/simcomps.md \ + pages/user-guide/statistics-guide.md \ + pages/user-guide/user-file.md \ + pages/appendices.md \ + pages/developer-guide.md \ + pages/index.md \ + pages/user-guide.md diff --git a/doc/pages/appendices.md b/doc/pages/appendices.md new file mode 100644 index 00000000000..25ae8e7a50c --- /dev/null +++ b/doc/pages/appendices.md @@ -0,0 +1,27 @@ +# Appendices {#appendices} + +\tableofcontents + +The appendices contain a few extra pages that are not directly related to the usage +of the code. But can be useful for users and developers alike. + +- [Environmental variable reference](@ref appendices_env-var) +- \subpage governing-equations +- \subpage publications + +## Environmental variable reference {#appendices_env-var} + +| Name | Description | Default value | +| ---------------- | ----------------------------------------------------------- | ------------- | +| `NEKO_AUTOTUNE` | Force Ax auto-tuning strategy (``'1D'``,``'KSTEP'``) | Unset | +| `NEKO_LOG_LEVEL` | Log verbosity level (integer > 0, default: 1) | Unset | +| `NEKO_GS_STRTGY` | Gather-scatter device MPI sync. strategy (0 < integer < 5 ) | Unset | + +### Logging level details + +A number of logging levels are supported. + +- `NEKO_LOG_LEVEL=0` : Quiet mode, minimal logging during execution. +- `NEKO_LOG_LEVEL=1` : Default information mode, adding step informations. +- `NEKO_LOG_LEVEL=2` : Verbose mode, logging extra details. +- `NEKO_LOG_LEVEL=10` : Debug mode. diff --git a/doc/pages/governing-equations.md b/doc/pages/appendices/governing-equations.md similarity index 98% rename from doc/pages/governing-equations.md rename to doc/pages/appendices/governing-equations.md index 3b9093a643d..a35676a1a80 100644 --- a/doc/pages/governing-equations.md +++ b/doc/pages/appendices/governing-equations.md @@ -1,5 +1,7 @@ # Governing equations {#governing-equations} +\tableofcontents + ## Fluid Neko's solves the Navier-Stokes equations formulated as follows. diff --git a/doc/pages/publication-list.md b/doc/pages/appendices/publications.md similarity index 74% rename from doc/pages/publication-list.md rename to doc/pages/appendices/publications.md index b4b53254ab7..490a21ebabc 100644 --- a/doc/pages/publication-list.md +++ b/doc/pages/appendices/publications.md @@ -1,8 +1,8 @@ -# Publication List {#publication-list} +# Publications {#publications} -* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2021. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. arXiv preprint arXiv:2107.01243. * Jansson, N., 2021. *Spectral Element Simulations on the NEC SX-Aurora TSUBASA*. In proc. HPCAsia 2021. * Karp, M., Podobas, A., Kenter, T., Jansson, N., Plessl, C., Schlatter, P. and Markidis, S., 2022. *A high-fidelity flow solver for unstructured meshes on field-programmable gate arrays: Design, evaluation, and future challenges*. In proc. HPCAsia 2022. * Karp, M., Jansson, N., Podobas, A., Schlatter, P., and Markidis, S., 2022. *Reducing Communication in the Conjugate Gradient Method: A Case Study on High-Order Finite Elements*. In proc. PASC 2022. -* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2022. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. arXiv preprint arXiv:2207:07098. +* Karp, M., Massaro, D., Jansson, N., Hart, A., Wahlgren, J., Schlatter, P., and Markidis, S., 2023. *Large-Scale Direct Numerical Simulations of Turbulence Using GPUs and Modern Fortran*. The International Journal of High Performance Computing Applications, 37, 5. * Jansson, N., Karp, M., Perez, A., Mukha, T., Ju, Y., Liu, J., Páll, S., Laure, E., Weinkauf, T., Schumacher, J., Schlatter, P., Markidis, S., 2023. *Exploring the Ultimate Regime of Turbulent Rayleigh–Bénard Convection Through Unprecedented Spectral-Element Simulations*. SC '23: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. +* Jansson, N., Karp, M., Podobas, A., Markidis, S. and Schlatter, P., 2024. *Neko: A modern, portable, and scalable framework for high-fidelity computational fluid dynamics*. Computer & Fluids, 275. diff --git a/doc/pages/appendix.md b/doc/pages/appendix.md deleted file mode 100644 index cfb9a5fcf3e..00000000000 --- a/doc/pages/appendix.md +++ /dev/null @@ -1,9 +0,0 @@ -# Appendix {#appendix} - -## Environmental variable reference {#env-var} - -Name | Description | Default value ----- | ----------- | ------------- -`NEKO_AUTOTUNE` | Force Ax auto-tuning strategy (``'1D'``,``'KSTEP'``) | Unset -`NEKO_LOG_LEVEL` | Log verbosity level (integer > 0, default: 1) | Unset -`NEKO_GS_STRTGY` | Gather-scatter device MPI sync. strategy (0 < integer < 5 ) | Unset diff --git a/doc/pages/case-file.md b/doc/pages/case-file.md deleted file mode 100644 index 59b6ab19e0d..00000000000 --- a/doc/pages/case-file.md +++ /dev/null @@ -1,338 +0,0 @@ -# Case File {#case-file} - -The case file defines all the parameters of a simulation. -The format of the file is JSON, making it easy to read and write case files -using the majority of the popular programming languages. -JSON is heirarchical and, and consists of parameter blocks enclosed in curly -braces. -These blocks are referred to as objects. -The case file makes use objects to separate the configuration of different parts - of the solver. -We refer the reader to the examples shipped with the code to get a good -idea of how a case file looks. -The table below provides a complete reference for all possible configuration -choices. - -## High-level structure -The current high-level structure of the case file is shown below. - -~~~~~~~~~~~~~~~{.json} -{ - "version": 1.0 - "case": { - "numerics": {} - "fluid": {} - "scalar": {} - "statistics": {} - "simulation_components" : [] - "point_zones" : [] - } -} -~~~~~~~~~~~~~~~ -The `version` keywords is reserved to track changes in the format of the file. -The the subsections below we list all the configuration options for each of the high-level objects. -Some parameters will have default values, and are therefore optional. - -## Output frequency control -A common scheme for controlling the output frequency is applied for various -outputs. -It is described already now in order to clarify the meaning of several -parameters found in the tables below. - -The frequency is controlled by two paramters, ending with `_control` and -`_value`, respectively. -The latter name is perhaps not ideal, but it is somewhat difficult to come up -with a good one, suggestions are welcome. - -The `_value` parameter is a number, that defines the output frequency, but the -interpretation of that number depends on the choice of `_control`. -The three following options are possible. -1. `simulationtime`, then `_value` is the time interval between the outputs. -2. `tsteps`, then `_value` is the number of time steps between the outputs. -3. `nsamples`, then `_value` is the total number of outputs that will be - performed in the course of the simulation. -4. `never`, then `_value` is ignored and output is never performed. - - -## The case object - -This object is mostly used as a high-level container for all the other objects, -but also defines several parameters that pertain to the simulation as a whole. - -Name | Description | Admissable values | Default value ------ | ----------- | ----------------- | ------------- -`mesh_file` | The name of the mesh file. | Strings ending with `.nmsh` | - -`output_boundary` | Whether to write a `bdry0.f0000` file with boundary labels. Can be used to check boundary conditions. | `true` or `false` | `false` -`output_directory` | Folder for redirecting solver output. Note that the folder has to exist! | Path to an existing directory | `.` -`load_balancing` | Whether to apply load balancing. | `true` or `false` | `false` -`output_partitions` | Whether to write a `partitions.vtk` file with domain partitioning. | `true` or `false` | `false` -`output_checkpoints` | Whether to output checkpoints, i.e. restart files. | `true` or `false` | `false` -`checkpoint_control` | Defines the interpretation of `checkpoint_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | - -`checkpoint_value` | The frequency of sampling in terms of `checkpoint_control`. | Positive real or integer | - -`restart_file` | checkpoint to use for a restart from previous data | Strings ending with `.chkp` | - -`time_step` | Time-step size. | Positive reals | - -`end_time` | Final time at which the simulation is stopped. | Positive reals | - -`job_timelimit` | The maximum wall clock duration of the simulation. | String formatted as HH:MM:SS | No limit - -## Numerics -Used to define the properties of the numerical discretization. - -Name | Description | Admissable values | Default value ----- | ----------- | ----------------- | ------------- -`polynomial_order` | The oder of the polynomial basis. | Integers, typically 5 to 9 | - | -`time_order` | The order of the time integration scheme. Refer to the `time_scheme_controller` type documention for details. | 1,2, 3 | - -`dealias` | Whether to apply dealiasing to advection terms. | `true` or `false` | `false` -`dealiased_polynomial order` | The polynomial order in the higher-order space used in the dealising. | Integer | `3/2(polynomial_order + 1) - 1` - -## Fluid - -The configuration of the fluid solver and the flow problem. -Contains multiple subobjects for various parts of the setup. - -### Material properties -As per the governing equations, Neko requires the value of the density and -dynamic viscosity to define the flow problem. These can be provided as `rho` and -`mu` in the case file. - -Alternatively, one may opt to provide the Reynolds number, `Re`, which -corresponds to a non-dimensional formulation of the Navier-Stokes equations. -This formulation can effectively be obtained by setting \f$ \rho = 1 \f$ and \f$ -\mu = 1/Re \f$. This is exactly what Neko does under the hood, when `Re` is -provided in the case file. - -Note that if both `Re` and any of the dimensional material properties are -provided, the simulation will issue an error. - -As an alternative to providing material properties in the case file, it is -possible to do that in a special routine in the user file. This is demonstrated -in the `rayleigh-benard-cylinder` example. Ultimately, both `rho` and `mu` have -to be set in the subroutine, but it can be based on arbitrary computations and -arbitrary parameters read from the case file. Additionally, this allows to -change the material properties in time. - - -### Inflow boundary conditions -The object `inflow_condition` is used to specify velocity values at a Dirichlet -boundary. -This does not necessarily have to be an inflow boundary, so the name is not so -good, and will most likely be changed along with type changes in the code. -Since not all cases have Dirichlet boundaries (note, the special case of a -no-slip boundary is treated separately in the configuration), this object -is not obligatory. -The means of prescribing the values are controlled via the `type` keyword: - -1. `user`, the values are set inside the compiled user file. -2. `uniform`, the value is a constant vector, looked up under the `value` - keyword. -3. `blasius`, a Blasius profile is prescribed. Its properties are looked up - in the `case.fluid.blasius` object, see below. - -### Initial conditions -The object `initial_condition` is used to provide initial conditions. -It is mandatory. -Note that this currently pertains to both the fluid, but also scalars. -The means of prescribing the values are controlled via the `type` keyword: - -1. `user`, the values are set inside the compiled user file. The only way to - initialize scalars. -2. `uniform`, the value is a constant vector, looked up under the `value` - keyword. -3. `blasius`, a Blasius profile is prescribed. Its properties are looked up - in the `case.fluid.blasius` object, see below. - -### Blasius profile -The `blasius` object is used to specify the Blasius profile that can be used for the -initial and inflow condition. -The boundary cannot be tilted with respect to the coordinate axes. -It requires the following parameters: - -1. `delta`, the thickness of the boundary layer. -2. `freestream_velocity`, the velocity value in the free stream. -3. `approximation`, the numerical approximation to the Blasius profile. - - `linear`, linear approximation. - - `quadratic`, quadratic approximation. - - `cubic`, cubic approximation. - - `quartic`, quartic approximation. - - `sin`, sine function approximation. - -### Source terms -The `source_terms` object should be used to specify the source terms in the -momentum equation. The object is not mandatory, by default no forcing term is -present. Each source term, is itself a JSON object, so `source_terms` is just an -array of them. Note that with respect to the governing equations, the source -terms define \f$ f^u \f$, meaning that the values are then multiplied by the -density. - -For each source, the `type` keyword defines the kind of forcing that will be -introduced. The following types are currently implemented. - -1. `constant`, constant forcing. Strength defined by the `values` array with 3 - reals corresponding to the 3 components of the forcing. -2. `user_pointwise`, the values are set inside the compiled user file, using the - pointwise user file subroutine. Only works on CPUs! -2. `user_vector`, the values are set inside the compiled user file, using the - non-pointwise user file subroutine. Should be used when running on the GPU. - - -### Boundary types -The optional `boundary_types` keyword can be used to specify boundary conditions. -The reason for it being optional, is that some conditions can be specified -directly inside the mesh file. -In particular, this happens when Nek5000 `.re2` files are converted to `.nmsh`. -Periodic boundary conditions are *always* defined inside the mesh file. - -The value of the keyword is an array of strings, with the following possible -values: -* `w`, a no-slip wall. -* `v`, a Dirichlet boundary. -* `sym`, a symmetry boundary. -* `on`, Dirichlet for the boundary-parallel velocity and homogeneous Neumann for - the wall-normal. The wall-parallel velocity is defined by the initial - condition. -* `o`, outlet boundary. -* `o+dong`, outlet boundary using the Dong condition. -* `on+dong`, an `on` boundary using the Dong condition, ensuring that the - wall-normal velocity is directed outwards. - -In some cases, only some boundary types have to be provided. -For example, when one has periodic boundaries, like in the channel flow example. -In this case, to put the specification of the boundary at the right index, -preceding boundary types can be marked with an empty string. -For example, if boundaries with index 1 and 2 are periodic, and the third one is -a wall, we can set. -``` -"boundary_types": ["", "", "w"] -``` - -## Linear solver configuration -The mandatory `velocity_solver` and `pressure_solver` objects are used to -configure the solvers for the momentum and pressure-Poisson equation. -The following keywords are used, with the corresponding options. - -* `type`, solver type. - - `cg`, a conjugate gradient solver. - - `pipecg`, a pipelined conjugate gradient solver. - - `bicgstab`, a bi-conjugate gradient stabilized solver. - - `cacg`, a communication-avoiding conjugate gradient solver. - - `gmres`, a GMRES solver. Typically used for pressure. -* `preconditioner`, preconditioner type. - - `jacobi`, a Jacobi preconditioner. Typically used for velocity. - - `hsmg`, a hybrid-Schwarz multigrid preconditioner. Typically used for pressure. - - `ident`, an identity matrix (no preconditioner). -* `absolute_tolerance`, tolerance criterion for convergence. -* `max_iterations`, maximum number of iterations before giving up. -* `projection_space_size`, size of the vector space used for accelerating the - solution procedure. If 0, then the projection space is not used. - More important for the pressure equation. - -### Flow rate forcing -The optional `flow_rate_force` object can be used to force a particular flow -rate through the domain. -Useful for channel and pipe flows. -The configuration uses the following parameters: - -* `direction`, the direction of the flow, defined as 0, 1, or 2, corresponding - to x, y or z, respectively. -* `value`, the desired flow rate. -* `use_averaged_flow`, whether `value` specifies the domain-averaged (bulk) - velocity or the volume flow rate. - - -### Full parameter table -All the parameters are summarized in the table below. -This includes all the subobjects discussed above, as well as keyword parameters -that can be described concisely directly in the table. - -Name | Description | Admissable values | Default value -----------------------------------------|----------------------------------------------------------------------------------|--------------------------------------------------|-------------- -`scheme` | The fluid solve type. | `pnpn` | - -`Re` | The Reynolds number. | Positive real | - -`rho` | The density of the fluid. | Positive real | - -`mu` | The dynamic viscosity of the fluid. | Positive real | - -`output_control` | Defines the interpretation of `output_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | - -`output_value` | The frequency of sampling in terms of `output_control`. | Positive real or integer | - -`inflow_condition.type` | Velocity inflow condition type. | `user`, `uniform`, `blasius` | - -`inflow_condition.value` | Value of the inflow velocity. | Vector of 3 reals | - -`initial_condition.type` | Initial condition type. | `user`, `uniform`, `blasius` | - -`initial_condition.value` | Value of the velocity initial condition. | Vector of 3 reals | - -`blasius.delta` | Boundary layer thickness in the Blasius profile. | Positive real | - -`blasius.freestream_velocity` | Freestream velocity in the Blasius profile. | Vector of 3 reals | - -`blasius.approximation` | Numerical approximation of the Blasius profile. | `linear`, `quadratic`, `cubic`, `quartic`, `sin` | - -`source_term.type` | Source term in the momentum equation. | `noforce`, `user`, `user_vector` | - -`boundary_types` | Boundary types/conditions labels. | Array of strings | - -`velocity_solver.type` | Linear solver for the momentum equation. | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres` | - -`velocity_solver.preconditioner` | Linear solver preconditioner for the momentum equation. | `ident`, `hsmg`, `jacobi` | - -`velocity_solver.absolute_tolerance` | Linear solver convergence criterion for the momentum equation. | Positive real | - -`velocity_solver.maxiter` | Linear solver max iteration count for the momentum equation. | Positive real | 800 -`velocity_solver.projection_space_size` | Projection space size for the momentum equation. | Positive integer | 0 -`pressure_solver.type` | Linear solver for the momentum equation. | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres` | - -`pressure_solver.preconditioner` | Linear solver preconditioner for the momentum equation. | `ident`, `hsmg`, `jacobi` | - -`pressure_solver.absolute_tolerance` | Linear solver convergence criterion for the momentum equation. | Positive real | - -`pressure_solver.maxiter` | Linear solver max iteration count for the momentum equation. | Positive real | 800 -`pressure_solver.projection_space_size` | Projection space size for the momentum equation. | Positive integer | 0 -`flow_rate_force.direction` | Direction of the forced flow. | 0, 1, 2 | - -`flow_rate_force.value` | Bulk velocity or volumetric flow rate. | Positive real | - -`flow_rate_force.use_averaged_flow` | Whether bulk velocity or volumetric flow rate is given by the `value` parameter. | `true` or `false` | - -`freeze` | Whether to fix the velocity field at initial conditions. | `true` or `false` | `false` - -## Scalar -The scalar object allows to add a scalar transport equation to the solution. -The solution variable is called `s`, but saved as `temperature` in the fld - files. -Some properties of the object are inherited from `fluid`: the properties of the -linear solver, the value of the density, and the output -control. - -The scalar equation requires defining additional material properties: the -specific heat capacity and thermal conductivity. These are provided as `cp` and -`lambda`. Similarly to the fluid, one can provide the Peclet number, `Pe`, as an -alternative. In this case, `cp` is set to 1 and `lambda` to the inverse of `Pe`. - -The boundary conditions for the scalar are specified through the -`boundary_types` keyword. -It is possible to directly specify a uniform value for a Dirichlet boundary. -The syntax is, e.g. `d=1`, to set the value to 1, see the Ryleigh-Benard -example case. - -Note that the source term configuration for the scalar currently differs from -that of the fluid. This will be addressed in a future release. For now, the -`source_term` keyword should be used, set to either `noforce` (no forcing), -`user` (same as `user_pointwise` for the fluid), and `user_vector` (same as for -the fluid). - -Name | Description | Admissable values | Default value --------------------|-------------------------------------------|----------------------------------|-------------- -`enabled` | Whether to enable the scalar computation. | `true` or `false` | `true` -`Pe` | The Peclet number. | Positive real | - -`cp` | Specific heat cpacity. | Positive real | - -`lambda` | Thermal conductivity. | Positive real | - -`source_term.type` | Source term in the momentum equation. | `noforce`, `user`, `user_vector` | - -`boundary_types` | Boundary types/conditions labels. | Array of strings | - - -## Statistics - -This object adds the collection of statistics for the flud fields. -For additional details on the workflow, see the corresponding page in the -user manual. - -Name | Description | Admissable values | Default value ---------------------|----------------------------------------------------------------------|-------------------|-------------- -`enabled` | Whether to enable the statistics computation. | `true` or `false` | `true` -`start_time` | Time at which to start gathering statistics. | Positive real | 0 -`sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer | 10 - -## Simulation components -Simulation components enable the user to perform various additional operations, -which are not strictly necessary to run the solver. An example could be -computing and output of additional fields, e.g. vorticity. - -A more detailed description as well as a full list of available components and - their setup is provided in a [separate page of the manual](simcomps.md). - -## Point zones -Point zones enable the user to select GLL points in the computational domain according to some geometric criterion. Two predefined geometric shapes are selectable from the case file, boxes and spheres. - -A point zone object defined in the case file can be retrieved from the point zone registry, `neko_point_zone_registry`, and can be used to perform any zone-specific operations (e.g. localized source term, probing...). User-specific point zones can also be added manually to the point zone registry from the user file. - -A more detailed description as well as a full list of available components and - their setup is provided in a [separate page of the manual](point-zones.md). diff --git a/doc/pages/developer-guide.md b/doc/pages/developer-guide.md new file mode 100644 index 00000000000..5d71bf416db --- /dev/null +++ b/doc/pages/developer-guide.md @@ -0,0 +1,11 @@ +# Developer guide {#developer-guide} + +These pages will provide any developer with the information needed to contribute +to Neko. The pages cover topics such as contributing, development patterns, code +style, testing and usage of accelerators. + +- \subpage contributing +- \subpage dev_patterns +- \subpage code-style +- \subpage testing +- \subpage accelerators diff --git a/doc/pages/accelerators.md b/doc/pages/developer-guide/accelerators.md similarity index 89% rename from doc/pages/accelerators.md rename to doc/pages/developer-guide/accelerators.md index b12cb22ee08..48e881bd09f 100644 --- a/doc/pages/accelerators.md +++ b/doc/pages/developer-guide/accelerators.md @@ -1,5 +1,7 @@ # Accelerators {#accelerators} +\tableofcontents + ## Device abstraction layer Neko has a device abstraction layer (device.F90) to manage device memory, data transfer and kernel invocations directly from a familiar Fortran interface, targeting all supported accelerator backends. @@ -57,8 +59,8 @@ Since allocation and association is such an ordinary operation, Neko provides a call device_map(x, x_d, n) ~~~~~~~~~~~~~~~ -#### Data transfer -To copy data between host and device (and device to use) use the routine device::device_memcpy which takes a Fortran array `x` of type `integer`, `integer(i8)`, `real(kind=sp)` or `real(kind=dp)`, its size `n` and the direction as the third argument which can either be `HOST_TO_DEVICE` to copy data to the device, or `DEVICE_TO_HOST` to retrieve data from the device. +#### Data transfer {#accelerators_data-transfer} +To copy data between host and device (and device to use) use the routine device::device_memcpy which takes a Fortran array `x` of type `integer`, `integer(i8)`, `real(kind=sp)` or `real(kind=dp)`, its size `n` and the direction as the third argument which can either be `HOST_TO_DEVICE` to copy data to the device, or `DEVICE_TO_HOST` to retrieve data from the device. The fourth boolean argument, `sync`, controls whether the transfer is synchronous (`.true.`) or asynchronous (`.false.`). ~~~~~~~~~~~~~~~{.f90} integer, allocatable :: x(:) type(c_ptr) :: x_d @@ -67,14 +69,12 @@ To copy data between host and device (and device to use) use the routine device: allocate(x(n)) ... ! Fill the device with data - call device_memcpy(x, x_d, n, HOST_TO_DEVICE) + call device_memcpy(x, x_d, n, HOST_TO_DEVICE, sync=.false.) ... ! Retrieve data from device - call device_memcpy(x, x_d, n, DEVICE_TO_HOST) + call device_memcpy(x, x_d, n, DEVICE_TO_HOST, sync=.true.) ~~~~~~~~~~~~~~~ -@attention device::device_memcpy defaults to asynchronous data transfer. The optional boolean argument `sync` must be true if synchronous transfers are needed. - @note that there's a special device::device_memcpy_cptr routine which only works with pointers, either on the host or the device, and the size needs to be given in bytes. This routine can be used to copy data between two arrays on a device with the direction `DEVICE_TO_DEVICE`. ~~~~~~~~~~~~~~~{.f90} type(c_ptr) :: x_d, y_d @@ -84,9 +84,10 @@ To copy data between host and device (and device to use) use the routine device: call device_memcpy_cptr(y_d, x_d, s, DEVICE_TO_DEVICE) ~~~~~~~~~~~~~~~ +@attention device::device_memcpy_cptr defaults to asynchronous data transfer. The optional boolean argument `sync` must be true if synchronous transfers are needed. @attention It is the programmers' responsibility to make sure that device arrays are kept in sync with the associated host array. Neko does not perform any implicit data movement. -### Offload work +### Offload work {#accelerators_offload-work} To offload work to a device, most routines in Neko have a device version prefixed with `device_`. These routines have the same arguments as the host equivalent, but one must pass device pointers instead of Fortran arrays. For example, we call the `math::add2` routine to add two arrays together on the host. diff --git a/doc/pages/code-style.md b/doc/pages/developer-guide/code-style.md similarity index 77% rename from doc/pages/code-style.md rename to doc/pages/developer-guide/code-style.md index 2e78d781027..11afa102351 100644 --- a/doc/pages/code-style.md +++ b/doc/pages/developer-guide/code-style.md @@ -1,4 +1,7 @@ # Code style {#code-style} + +\tableofcontents + Fortran code should conform to the Fortran 2008 standard and should use an indentation level of 2, except for the extra indentation within `do` `if`, `select` or `where` statements and for each level inside a structure e.g. `type`, `interface`, where the indentation level is 3, and a 0 indentation is used for `module` or `contains` (except for `contains` inside a derived type, where a single indentation level is used). These are the default rules in Emacs' Fortran mode, an example is given below, @@ -36,6 +39,17 @@ end module example Please note that the maximum line length in Neko should not exceed 80 columns. +Additionally the [findent](https://github.com/wvermin/findent) tool can be used +to enforce these rules by assigning the following options. The documentation of +`findent` provide details for emacs, vim and gedit. For VSCode, the +[Modern Fortran](https://marketplace.visualstudio.com/items?itemName=fortran-lang.linter-gfortran) +extension provide an integration. + +```sh +findent -i2 -d3 -f3 -s3 -w3 -t3 -j3 -k- -Rr -c3 < input.f90 > formatted.f90 +``` + + ## Data types For portability reasons, it is essential to only use data type kinds defined in num_types.f90 and avoid legacy constructs like `real*8` or `integer(kind=8)` diff --git a/CONTRIBUTING.md b/doc/pages/developer-guide/contributing.md similarity index 98% rename from CONTRIBUTING.md rename to doc/pages/developer-guide/contributing.md index 4717a9e7e0e..2d641c1fa1f 100644 --- a/CONTRIBUTING.md +++ b/doc/pages/developer-guide/contributing.md @@ -1,4 +1,7 @@ -# Contributing to Neko +# Contributing to Neko {#contributing} + +\tableofcontents + Please read the following guide before contributing new code or a bug fix to Neko. All contributions to Neko must be made under the 3-Clause BSD license. Please refer to the `COPYING` file. @@ -51,7 +54,7 @@ This section contains information on how to add new source files to the build sy Neko uses Autotools for building all sources. You will need to have at least `autoconf` and `automake` installed for development work. It is also highly recommended to have `makedepf90` installed to avoid error-prone manual dependency tracking. -The following steps describe how to add a new Fortran file to Neko`s build system +The following steps describe how to add a new Fortran file to Neko's build system 1. Place the file in an appropriate subdirectory under `src/`. Either create a new subdirectory or place the file in `common` if none of the existing directories is a good match. Avoid placing the file directly under `src` 2. Add the file to the `neko_fortran_SOURCES` list in `src/Makefile.am`, following the pattern of `/newfile.f90`. 3. Ensure correct dependency tracking diff --git a/doc/pages/dev_patterns.md b/doc/pages/developer-guide/dev_patterns.md similarity index 99% rename from doc/pages/dev_patterns.md rename to doc/pages/developer-guide/dev_patterns.md index 15f52784440..706c1a37948 100644 --- a/doc/pages/dev_patterns.md +++ b/doc/pages/developer-guide/dev_patterns.md @@ -1,5 +1,7 @@ # Programming patterns and conventions {#dev_patterns} +\tableofcontents + This section aims to summarize the programming conventions in Neko to guide all developers into writing code with good style and facilitate the reuse of the same programming patterns throughout the code base. It is a good idea to check diff --git a/doc/pages/testing.md b/doc/pages/developer-guide/testing.md similarity index 99% rename from doc/pages/testing.md rename to doc/pages/developer-guide/testing.md index 847e343045b..ce2f1f38e60 100644 --- a/doc/pages/testing.md +++ b/doc/pages/developer-guide/testing.md @@ -1,5 +1,7 @@ # Testing {#testing} +\tableofcontents + ## pFUnit Neko uses the software pFUnit for unit testing. To install the software you can use the following commands, in which you should just set the desired installation path @@ -70,4 +72,4 @@ The instructions will differ somewhat depending on whether you test uses MPI or Find `# Config tests` line and the `AC_CONFIG_FILES` list below it. Add the path to a `Makefile` (note, no `.in`!!) in your test folder. 8. Please also add the compilation products of your test to the `tests/.gitignore` file. - This helps keeps the version controlled file list unpolluted. \ No newline at end of file + This helps keeps the version controlled file list unpolluted. diff --git a/doc/pages/index.md b/doc/pages/index.md index 9f5c6c764c0..332bf6f67a8 100644 --- a/doc/pages/index.md +++ b/doc/pages/index.md @@ -8,31 +8,41 @@ accelerators to SX-Aurora vector processors. Neko has its roots in the spectral element code Nek5000 from UChicago/ANL, from where many of the namings, code structure and numerical methods are adopted. -Neko is currently maintained and developed at KTH Royal Institute of -Technology. - - -\tableofcontents - -\subpage installation - -\subpage governing-equations - -\subpage case-file - -\subpage simcomps - -\subpage point-zones - -\subpage accelerators - -\subpage statistics-guide - -\subpage code-style - -\subpage dev_patterns - -\subpage testing - -\subpage appendix - +## Structure of the Manual + +In order to facilitate reading of the documentation. The manual is divided into +several sections. The [user guides](@ref user-guide) are intended for users of the code, while the +[developer guides](@ref developer-guide) are intended for developers of the code. The [appendix](@ref appendices) contains +additional information that is not directly related to the usage of the code. + +- \subpage user-guide + - [Installation](@ref installation) explains how to download and compile Neko + on your platform. + - [Case File](@ref case-file) discusses the various parameters and options + for your case setup such as boundary conditions, output control, etc. + - [User File](@ref user-file) explains all the user functions and how they can + be used to run more advanced simulations. + - [Simulation Components](@ref simcomps) presents some extra functionalities + and tools such as computation and output of additional fields, in-situ +post-processing operations, data sampling, etc. + - [Point Zones](@ref point-zones) allow you to select zones in the mesh for +application of source terms, initial conditions, etc. + - [Statistics Guide](@ref statistics-guide) outlines the steps to generate +3D and 2D field statistics. + - [Input/Output](@ref io) explains how to read and write various types of data +using the Neko framework. +- \subpage developer-guide + - [Contributing](@ref contributing) presents basic instructions to add +your contributions to Neko. + - [Development Patterns](@ref dev_patterns) outlines the standards to be used +when developing code in the Neko framework, such as naming conventions and +documentation. + - [Code Style](@ref code-style) introduces some extra programming conventions +related to coding style and IDEs. + - [Testing](@ref testing) outlines the steps to run and add unit tests to the +code base with pFUnit. + - [Accelerators](@ref accelerators) discusses important concepts and +conventions related to GPU programming in the Neko framework. +- \subpage appendices + - [Governing Equations](@ref governing-equations) used in our solvers. + - [Publications](@ref publications) diff --git a/doc/pages/simcomps.md b/doc/pages/simcomps.md deleted file mode 100644 index 4f8e91b6c3d..00000000000 --- a/doc/pages/simcomps.md +++ /dev/null @@ -1,70 +0,0 @@ - -# Simulation components {#simcomps} -## What are simulation components? -Simulation components, or simcomps fo short, incapsulate additional -functionality that may be useful for certain cases but not necessary to run the -solver. -This can include computation and output of additional fields, in-situ -post-processing operations, data sampling, etc. - -By design, simulation components can tap into every aspect of the simulation, -so they can be quite powerful. -As the code grows, we expect to add more and more simcomps to the code. - -## Adding simulation components to the case -Each simcomp is defined as a single JSON object at are added to an array -of objects called `simulation_components`, which resides directly under the -`case` object. - -## Controling execution and file output -Each simulation component is, by default, executed once per time step to -perform associated computations and output. -However, this can be modified by using the `compute_control` and `compute_value` -parameters for the computation and the `output_control and` and -`output_value` for the output to disk. -The paramters for the `_control` values are the same as for the fluid and -checkpointing. - -For example, in the `tgv` example case the `vorticity` component is executed -once per 50 time steps. -~~~~~~~~~~~~~~~{.json} -{ - "type": "vorticity", - "compute_control": "tsteps", - "compute_value": 50 -} -~~~~~~~~~~~~~~~ -If no parameters for the `output_` parametersare provided, they areset to be the - same as for `compute_`. - - ## List of simulation components - - ### vorticity - Computes the vorticity field an stores in the field registry as `omega_x`, - `omega_y` and `omega_z`. - Currently produces no output. - - ### lambda2 - Computes \f$ \lambda_2 \f$ for the velocity field and stores it in the normal output files as the first unused field. - This means that \f$ \lambda_2 \f$ can be found in the temeprature field in then fld files if running without a scalar - and s1 if neko is run with one scalar. - - ### probes - Probes selected solution fields at the points given inside an input file. Example usage: - ~~~~~~~~~~~~~~~{.json} - { - "type": "probes", - "compute_control": "simulationtime", - "compute_value" : 1, - "points_file": "probes.csv", - "output_file": "output.csv", - "fields": ["w","s"] - } - ~~~~~~~~~~~~~~~ -This probes the fields 'w', and 's' in the points described by points.csv and outputs into output.csv every 1 time units. - - - - - - diff --git a/doc/pages/statistics.md b/doc/pages/statistics.md deleted file mode 100644 index f527b79847d..00000000000 --- a/doc/pages/statistics.md +++ /dev/null @@ -1,45 +0,0 @@ - -# Statistics guide {#statistics-guide} - -Under development, updated incrementally -## Notes on the statistics calculation in Neko -***Daniele Massaro, Martin Karp (KTH)*** - -1) Run your simulations and collect mean_field* and stats* files by having the statistics object added to the case file, and specifying the write interval to something suitable. - -2) For each RUN_i, you get a set of mean_field* and stats* files. You can average them for each single RUN_i, or average all of them only once (after re-ordering them properly). If you follow the second approach, go to step 4. -Here, for each RUN_i, we compute the averaged means with "average_fields_in_time": ---mean -`srun --unbuffered /your/location/neko/bin/average_fields_in_time meanXX.fld T0 mean_p.fld` -where T0 is the initial time. To get some hints on the input for the script one can simply run `./average_fields_in_time` without any arguments. For RUN_1 the time T0 can be taken from the log of the first simulation, or from the header of the first mean_field* file; in this way you discard that file. For RUN_i, with i>1, it can be taken from header of the last file mean_field* of the previous simulation RUN_{i-1}. -In the command line, for the name "meanXX.fld", XX indicates the number of the nek5000 file. In mean_fieldXX.nek5000 you set the number of the first mean0* file to read and the number of steps corresponding to the number of files. In this way, the code generates a mean_p0.f00000 and mean_post0.nek5000. It is suggested to rename mean_p0.f00000 as mean_p0.f0000i and move it to a separate folder where you take the average with all the others. ---stats -`srun --unbuffered /your/location/neko/bin/average_fields_in_time statXX.fld T0 stat_p.fld` -T0 is the same as before. In stat0.nek5000 you set the number of the first stat0* file to read and the number of steps corresponds to the number of files. It is suggested to rename stat_p0.f00000 as stat_p0.f0000i and move it to a separate folder where you take the average with all the others. -Repeat this for each RUN_i folder. Eventually, given n RUN_i folders, you will get n mean_p* and stat_p* files. - -3) Take the average of the averaged runs. Now, the time average over all the n simulations is taken. The procedure is similar, but changing the output name is recommended to avoid over-writing. --- mean -`srun --unbuffered /your/location/neko/bin/average_fields mean_p0.fld T0 mean_post.fld` -where T0 is the initial time which has been used to compute mean_p* for RUN_1. --- stats -`srun --unbuffered /your/location/neko/bin/average_fields stat_p0.fld T0 stat_post.fld` -where T0 is the initial time which has been used to compute mean_p* for RUN_1. - - - - - -4) Compute Reynolds stress tensors and other statistical moments (see the list). -`srun --unbuffered /your/location/neko/bin/postprocess_fluid_stats mesh_file.nmsh mean_post0.fld stat_post0.fld` - -5) We also provide a tool to average the resulting field in a homogenous direction in `bin/average_field_in_space`. The required arguments are shown if one runs the program without any input. Currently it requires the number of elements in the homogenous direction as an input argument, e.g. -`./average_field_in_space mesh.nmsh field.fld x 18 outfield.fld` -if we want to average a field in the x direction on a mesh with 18 elements in x and output the averaged field in outfield0.nek5000. - - - - - - - diff --git a/doc/pages/user-guide.md b/doc/pages/user-guide.md new file mode 100644 index 00000000000..e2c35ee3e27 --- /dev/null +++ b/doc/pages/user-guide.md @@ -0,0 +1,13 @@ +# User guide {#user-guide} + +These pages will provide any user with the information needed to run Neko. Both +for simple examples where one just wish to configure a case and run it, to more +advanced topics such as statistics and user defined extensions. + +- \subpage installation +- \subpage case-file +- \subpage user-file +- \subpage simcomps +- \subpage point-zones +- \subpage statistics-guide +- \subpage io diff --git a/doc/pages/user-guide/case-file.md b/doc/pages/user-guide/case-file.md new file mode 100644 index 00000000000..98cd8e04fb6 --- /dev/null +++ b/doc/pages/user-guide/case-file.md @@ -0,0 +1,508 @@ +# Case File {#case-file} + +\tableofcontents + +The case file defines all the parameters of a simulation. +The format of the file is JSON, making it easy to read and write case files +using the majority of the popular programming languages. +JSON is heirarchical and, and consists of parameter blocks enclosed in curly +braces. +These blocks are referred to as objects. +The case file makes use objects to separate the configuration of different parts + of the solver. +We refer the reader to the examples shipped with the code to get a good +idea of how a case file looks. +The table below provides a complete reference for all possible configuration +choices. + +## High-level structure +The current high-level structure of the case file is shown below. + +~~~~~~~~~~~~~~~{.json} +{ + "version": 1.0 + "case": { + "numerics": {} + "fluid": {} + "scalar": {} + "statistics": {} + "simulation_components" : [] + "point_zones" : [] + } +} +~~~~~~~~~~~~~~~ +The `version` keywords is reserved to track changes in the format of the file. +The the subsections below we list all the configuration options for each of the high-level objects. +Some parameters will have default values, and are therefore optional. + +## Output frequency control +A common scheme for controlling the output frequency is applied for various +outputs. +It is described already now in order to clarify the meaning of several +parameters found in the tables below. + +The frequency is controlled by two paramters, ending with `_control` and +`_value`, respectively. +The latter name is perhaps not ideal, but it is somewhat difficult to come up +with a good one, suggestions are welcome. + +The `_value` parameter is a number, that defines the output frequency, but the +interpretation of that number depends on the choice of `_control`. +The three following options are possible. +1. `simulationtime`, then `_value` is the time interval between the outputs. +2. `tsteps`, then `_value` is the number of time steps between the outputs. +3. `nsamples`, then `_value` is the total number of outputs that will be + performed in the course of the simulation. +4. `never`, then `_value` is ignored and output is never performed. + + +## The case object + +This object is mostly used as a high-level container for all the other objects, +but also defines several parameters that pertain to the simulation as a whole. + +| Name | Description | Admissible values | Default value | +| -------------------------- | ----------------------------------------------------------------------------------------------------- | ----------------------------------------------- | ------------- | +| `mesh_file` | The name of the mesh file. | Strings ending with `.nmsh` | - | +| `output_boundary` | Whether to write a `bdry0.f0000` file with boundary labels. Can be used to check boundary conditions. | `true` or `false` | `false` | +| `output_directory` | Folder for redirecting solver output. Note that the folder has to exist! | Path to an existing directory | `.` | +| `output_precision` | Whether to output snapshots in single or double precision | `single` or `double` | `single` | +| `load_balancing` | Whether to apply load balancing. | `true` or `false` | `false` | +| `output_partitions` | Whether to write a `partitions.vtk` file with domain partitioning. | `true` or `false` | `false` | +| `output_checkpoints` | Whether to output checkpoints, i.e. restart files. | `true` or `false` | `false` | +| `checkpoint_control` | Defines the interpretation of `checkpoint_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | - | +| `checkpoint_value` | The frequency of sampling in terms of `checkpoint_control`. | Positive real or integer | - | +| `restart_file` | checkpoint to use for a restart from previous data | Strings ending with `.chkp` | - | +| `timestep` | Time-step size | Positive reals | - | +| `variable_timestep` | Whether to use variable dt | `true` or `false` | `false` | +| `max_timestep` | Maximum time-step size when variable time step is activated | Positive reals | - | +| `target_cfl` | The desired CFL number | Positive real | `0.4` | +| `cfl_max_update_frequency` | The minimum interval between two time-step-updating steps in terms of time steps | Integer | `0` | +| `cfl_running_avg_coeff` | The running average coefficient `a` where `cfl_avg_new = a * cfl_new + (1-a) * cfl_avg_old` | Positive real between `0` and `1` | `0.5` | +| `max_dt_increase_factor` | The maximum scaling factor to increase time step | Positive real greater than `1` | `1.2` | +| `min_dt_decrease_factor` | The minimum scaling factor to decrease time step | Positive real less than `1` | `0.5` | +| `end_time` | Final time at which the simulation is stopped. | Positive reals | - | +| `job_timelimit` | The maximum wall clock duration of the simulation. | String formatted as HH:MM:SS | No limit | + +

Boundary type numbering in the `output_boundary` field

+ +When the `output_boundary` setting is set to `true`, and additional `.fld` file +will be stored in the beginning of the simulation, where the recognized +boundaries will be marked with an integer number. This is a good way to debug +the simulation setup. The value of the number depends on the type of the +boundary as follows: + +1. A wall boundary, i.e. the `w` label. +2. A Dirichlet boundary, i.e. the `v` label. +3. An outlet boundary, i.e. the `o` label. +4. A symmetry boundary, i.e. the `sym` label. +5. A periodic boundary. +6. An wall-normal transpiration boundary, i.e. the `on` label. + +Note that the boundary conditions can be both prescribed via the labels in the +case file or built into the mesh via conversion from a `.re2` file. Both types +will be picked up and marked in the field produced by `output_boundary`. + + +## Numerics +Used to define the properties of the numerical discretization. + +| Name | Description | Admissible values | Default value | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------- | -------------------------- | ------------------------------- | +| `polynomial_order` | The oder of the polynomial basis. | Integers, typically 5 to 9 | - | +| `time_order` | The order of the time integration scheme. Refer to the `time_scheme_controller` type documention for details. | 1,2, 3 | - | +| `dealias` | Whether to apply dealiasing to advection terms. | `true` or `false` | `false` | +| `dealiased_polynomial order` | The polynomial order in the higher-order space used in the dealising. | Integer | `3/2(polynomial_order + 1) - 1` | + +## Fluid + +The configuration of the fluid solver and the flow problem. +Contains multiple subobjects for various parts of the setup. + +### Material properties +As per the governing equations, Neko requires the value of the density and +dynamic viscosity to define the flow problem. These can be provided as `rho` and +`mu` in the case file. + +Alternatively, one may opt to provide the Reynolds number, `Re`, which +corresponds to a non-dimensional formulation of the Navier-Stokes equations. +This formulation can effectively be obtained by setting \f$ \rho = 1 \f$ and \f$ +\mu = 1/Re \f$. This is exactly what Neko does under the hood, when `Re` is +provided in the case file. + +Note that if both `Re` and any of the dimensional material properties are +provided, the simulation will issue an error. + +As an alternative to providing material properties in the case file, it is +possible to do that in a special routine in the user file. This is demonstrated +in the `rayleigh-benard-cylinder` example. Ultimately, both `rho` and `mu` have +to be set in the subroutine, but it can be based on arbitrary computations and +arbitrary parameters read from the case file. Additionally, this allows to +change the material properties in time. + +### Boundary types {#case-file_boundary-types} +The optional `boundary_types` keyword can be used to specify boundary conditions. +The reason for it being optional, is that some conditions can be specified +directly inside the mesh file. +In particular, this happens when Nek5000 `.re2` files are converted to `.nmsh`. +Periodic boundary conditions are *always* defined inside the mesh file. + +The value of the keyword is an array of strings, with the following possible +values: + +* Standard boundary conditions + * `w`, a no-slip wall. + * `v`, a velocity Dirichlet boundary. + * `sym`, a symmetry boundary. + * `o`, outlet boundary. + * `on`, Dirichlet for the boundary-parallel velocity and homogeneous Neumann for + the wall-normal. The wall-parallel velocity is defined by the initial + condition. + +* Advanced boundary conditions + * `d_vel_u`, `d_vel_v`, `d_vel_w` (or a combination of them, separated by a `"/"`), a + Dirichlet boundary for more complex velocity profiles. This boundary condition uses a + [more advanced user interface](#user-file_field-dirichlet-update). + * `d_pres`, a boundary for specified non-uniform pressure profiles, similar in + essence to `d_vel_u`,`d_vel_v` and `d_vel_w`. Can be combined with other + complex Dirichlet coonditions by specifying e.g.: `"d_vel_u/d_vel_v/d_pres"`. + * `o+dong`, outlet boundary using the Dong condition. + * `on+dong`, an `on` boundary using the Dong condition, ensuring that the + wall-normal velocity is directed outwards. + +In some cases, only some boundary types have to be provided. +For example, when one has periodic boundaries, like in the channel flow example. +In this case, to put the specification of the boundary at the right index, +preceding boundary types can be marked with an empty string. +For example, if boundaries with index 1 and 2 are periodic, and the third one is +a wall, we can set. +``` +"boundary_types": ["", "", "w"] +``` + +### Inflow boundary conditions {#case-file_fluid-if} +The object `inflow_condition` is used to specify velocity values at a Dirichlet +boundary. +This does not necessarily have to be an inflow boundary, so the name is not so +good, and will most likely be changed along with type changes in the code. +Since not all cases have Dirichlet boundaries (note, the special case of a +no-slip boundary is treated separately in the configuration), this object +is not obligatory. +The means of prescribing the values are controlled via the `type` keyword: + +1. `user`, the values are set inside the compiled user file. +2. `uniform`, the value is a constant vector, looked up under the `value` + keyword. +3. `blasius`, a Blasius profile is prescribed. Its properties are looked up + in the `case.fluid.blasius` object, see below. + +### Initial conditions {#case-file_fluid-ic} +The object `initial_condition` is used to provide initial conditions. +It is mandatory. +Note that this currently pertains to both the fluid, but also scalars. +The means of prescribing the values are controlled via the `type` keyword: + +1. `user`, the values are set inside the compiled user file. The only way to + initialize scalars. +2. `uniform`, the value is a constant vector, looked up under the `value` + keyword. +3. `blasius`, a Blasius profile is prescribed. Its properties are looked up + in the `case.fluid.blasius` object, see below. + +### Blasius profile +The `blasius` object is used to specify the Blasius profile that can be used for the +initial and inflow condition. +The boundary cannot be tilted with respect to the coordinate axes. +It requires the following parameters: + +1. `delta`, the thickness of the boundary layer. +2. `freestream_velocity`, the velocity value in the free stream. +3. `approximation`, the numerical approximation to the Blasius profile. + - `linear`, linear approximation. + - `quadratic`, quadratic approximation. + - `cubic`, cubic approximation. + - `quartic`, quartic approximation. + - `sin`, sine function approximation. + +### Source terms {#case-file_fluid-source-term} +The `source_terms` object should be used to specify the source terms in the +momentum equation. The object is not mandatory, by default no forcing term is +present. Each source term, is itself a JSON object, so `source_terms` is just an +array of them. Note that with respect to the governing equations, the source +terms define \f$ f^u \f$, meaning that the values are then multiplied by the +density. + +For each source, the `type` keyword defines the kind of forcing that will be +introduced. Furthermore, the `start_time` and `end_time` keywords can be used to +set a time frame for when the source term is active. Note, however, that these +keywords have no effect on the user-defined source terms, but their execution +can, of course, be directly controlled in the user code. By default, all source +terms are active during the entire simulation. + +The following types are currently implemented. + +1. `constant`, constant forcing. Strength defined by the `values` array with 3 + reals corresponding to the 3 components of the forcing. +2. `boussinesq`, a source term introducing boyancy based on the Boussinesq + approximation, \f$ \rho \beta (T - T_{ref} \cdot g) \f$. Here, \f$ rho \f$ is + density, \f$ \beta \f$ the thermal expansion coefficient, \f$ g \f$ the + gravity vector, and $T_{ref}$ a reference value of the scalar, typically + temperature. + + Reads the following entries: + - `scalar_field`: The name of the scalar that drives the source term, + defaults to "s". + - `reference_value`: The reference value of the scalar. + - `g`: The gravity vector. + - `beta`: The thermal expansion coefficient, defaults to the inverse of + `ref_value`. +3. `user_pointwise`, the values are set inside the compiled user file, using the + pointwise user file subroutine. Only works on CPUs! +4. `user_vector`, the values are set inside the compiled user file, using the + non-pointwise user file subroutine. Should be used when running on the GPU. +4. `brinkman`, Brinkman permeability forcing inside a pre-defined region. + +#### Brinkman +The Brinkman source term introduces regions of resistance in the fluid domain. +The volume force \f$ f_i \f$ applied in the selected regions are proportional to the +fluid velocity component \f$ u_i \f$. + +\f{eqnarray*}{ + f_i(x) &=& - B(x) u_i(x), \\ + B(x) &=& \kappa_0 + (\kappa_1 - \kappa_0) \xi(x) \frac{q + 1}{q + \xi(x)}, + \f} + +where, \f$ x \f$ is the current location in the domain, \f$ \xi: x \mapsto [0,1] \f$ +represent an indicator function for the resistance where \f$ \xi(x) = 0 \f$ is a free +flow. \f$ \kappa_i \f$ describes the limits for the force application at \f$ \xi(x)=0 \f$ +and \f$ \xi(x)=1 \f$. A penalty parameter \f$ q \f$ help us to reduce numerical problems. + +The indicator function will be defined based on the object type. The following +types are currently implemented. + +1. `boundary_mesh`, the indicator function for a boundary mesh is computed in + two steps. First, the signed distance function is computed for the boundary + mesh. Then, the indicator function is computed using the distance transform + function specified in the case file. +2. `point_zone`, the indicator function is defined as 1 inside the point zone + and 0 outside. + +Each object are added to a common indicator field by means of a point-wise max +operator. This means that the indicator field will be the union of all the +regions defined by the objects. + +To assist correct placement and scaling of objects from external sources, the +meshes can be transformed using the `mesh_transform` object. The object can be +used to apply a transformation to the boundary mesh. The following types are +currently implemented. + +1. `none`, no transformation is applied. +2. `bounding_box`, the boundary mesh is transformed to fit inside a box defined + by `box_min` and `box_max`. The box is defined by two vectors of 3 reals + each. The `keep_aspect_ratio` keyword can be used to keep the aspect ratio of + the boundary mesh. + +After the indicator field is computed, it is filtered using a filter type +specified in the case file. The filter is used to smooth the indicator field +before computing the Brinkman force. The following types are currently +implemented. + +1. `none`, no filtering is applied. + +The filtering can be defined for each object separately. Additionally, the +filter can be specified for the entire source term, in which case it will be +applied to the final indicator field, after all sources have been added. + +Additional keywords are available to modify the Brinkman force term. + +| Name | Description | Admissible values | Default value | +| ---------------------------------- | --------------------------------------------------------------------------------------------- | --------------------------------- | ------------- | +| `brinkman.limits` | Brinkman factor at free-flow (\f$ \kappa_0 \f$) and solid domain (\f$ \kappa_1 \f$). | Vector of 2 reals. | - | +| `brinkman.penalty` | Penalty parameter \f$ q \f$ when estimating Brinkman factor. | Real | \f$ 1.0 \f$ | +| `objects` | Array of JSON objects, defining the objects to be immersed. | Each object must specify a `type` | - | +| `distance_transform.type` | How to map from distance field to indicator field. | `step`, `smooth_step` | - | +| `distance_transform.value` | Values used to define the distance transform, such as cut-off distance for the step function. | Real | - | +| `filter.type` | Type of filtering applied to the indicator field either globally or for the current object. | `none` | `none` | +| `mesh_transform.type` | Apply a transformation to the boundary mesh. | `bounding_box`, `none` | `none` | +| `mesh_transform.box_min` | Lower left front corner of the box to fit inside. | Vector of 3 reals | - | +| `mesh_transform.box_max` | Upper right back corner of the box to fit inside. | Vector of 3 reals | - | +| `mesh_transform.keep_aspect_ratio` | Keep the aspect ratio of the boundary mesh. | `true` or `false` | `true` | + +Example of a Brinkman source term where a boundary mesh and a point zone are +combined to define the resistance in the fluid domain. The indicator field for +the boundary mesh is computed using a step function with a cut-off distance of +\f$ 0.1 \f$. The indicator field for the point zone is not filtered. + +~~~~~~~~~~~~~~~{.json} +"source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "boundary_mesh", + "name": "some_mesh.stl", + "distance_transform": { + "type": "step", + "value": 0.1 + }, + }, + { + "type": "point_zone", + "name": "cylinder_zone", + "filter": { + "type": "none" + } + } + ], + "brinkman": { + "limits": [0.0, 100.0], + "penalty": 1.0 + } + } +] +~~~~~~~~~~~~~~~ + + +## Linear solver configuration +The mandatory `velocity_solver` and `pressure_solver` objects are used to +configure the solvers for the momentum and pressure-Poisson equation. +The following keywords are used, with the corresponding options. + +* `type`, solver type. + - `cg`, a conjugate gradient solver. + - `pipecg`, a pipelined conjugate gradient solver. + - `bicgstab`, a bi-conjugate gradient stabilized solver. + - `cacg`, a communication-avoiding conjugate gradient solver. + - `gmres`, a GMRES solver. Typically used for pressure. + - `fusedcg`, a conjugate gradient solver optimised for accelerators using kernel fusion. +* `preconditioner`, preconditioner type. + - `jacobi`, a Jacobi preconditioner. Typically used for velocity. + - `hsmg`, a hybrid-Schwarz multigrid preconditioner. Typically used for pressure. + - `ident`, an identity matrix (no preconditioner). +* `absolute_tolerance`, tolerance criterion for convergence. +* `max_iterations`, maximum number of iterations before giving up. +* `projection_space_size`, size of the vector space used for accelerating the + solution procedure. If 0, then the projection space is not used. + More important for the pressure equation. +* `projection_hold_steps`, steps for which the simulation does not use projection after starting + or time step changes. E.g. if 5, then the projection space will start to update at the 6th + time step and the space will be utilized at the 7th time step. + +### Flow rate forcing +The optional `flow_rate_force` object can be used to force a particular flow +rate through the domain. +Useful for channel and pipe flows. +The configuration uses the following parameters: + +* `direction`, the direction of the flow, defined as 0, 1, or 2, corresponding + to x, y or z, respectively. +* `value`, the desired flow rate. +* `use_averaged_flow`, whether `value` specifies the domain-averaged (bulk) + velocity or the volume flow rate. + + +### Full parameter table +All the parameters are summarized in the table below. +This includes all the subobjects discussed above, as well as keyword parameters +that can be described concisely directly in the table. + +| Name | Description | Admissible values | Default value | +| --------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------ | ------------- | +| `scheme` | The fluid solve type. | `pnpn` | - | +| `Re` | The Reynolds number. | Positive real | - | +| `rho` | The density of the fluid. | Positive real | - | +| `mu` | The dynamic viscosity of the fluid. | Positive real | - | +| `output_control` | Defines the interpretation of `output_value` to define the frequency of writing checkpoint files. | `nsamples`, `simulationtime`, `tsteps`, `never` | - | +| `output_value` | The frequency of sampling in terms of `output_control`. | Positive real or integer | - | +| `inflow_condition.type` | Velocity inflow condition type. | `user`, `uniform`, `blasius` | - | +| `inflow_condition.value` | Value of the inflow velocity. | Vector of 3 reals | - | +| `initial_condition.type` | Initial condition type. | `user`, `uniform`, `blasius` | - | +| `initial_condition.value` | Value of the velocity initial condition. | Vector of 3 reals | - | +| `blasius.delta` | Boundary layer thickness in the Blasius profile. | Positive real | - | +| `blasius.freestream_velocity` | Free-stream velocity in the Blasius profile. | Vector of 3 reals | - | +| `blasius.approximation` | Numerical approximation of the Blasius profile. | `linear`, `quadratic`, `cubic`, `quartic`, `sin` | - | +| `source_terms` | Array of JSON objects, defining additional source terms. | See list of source terms above | - | +| `boundary_types` | Boundary types/conditions labels. | Array of strings | - | +| `velocity_solver.type` | Linear solver for the momentum equation. | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres` | - | +| `velocity_solver.preconditioner` | Linear solver preconditioner for the momentum equation. | `ident`, `hsmg`, `jacobi` | - | +| `velocity_solver.absolute_tolerance` | Linear solver convergence criterion for the momentum equation. | Positive real | - | +| `velocity_solver.maxiter` | Linear solver max iteration count for the momentum equation. | Positive real | 800 | +| `velocity_solver.projection_space_size` | Projection space size for the momentum equation. | Positive integer | 20 | +| `velocity_solver.projection_hold_steps` | Holding steps of the projection for the momentum equation. | Positive integer | 5 | +| `pressure_solver.type` | Linear solver for the momentum equation. | `cg`, `pipecg`, `bicgstab`, `cacg`, `gmres` | - | +| `pressure_solver.preconditioner` | Linear solver preconditioner for the momentum equation. | `ident`, `hsmg`, `jacobi` | - | +| `pressure_solver.absolute_tolerance` | Linear solver convergence criterion for the momentum equation. | Positive real | - | +| `pressure_solver.maxiter` | Linear solver max iteration count for the momentum equation. | Positive real | 800 | +| `pressure_solver.projection_space_size` | Projection space size for the momentum equation. | Positive integer | 20 | +| `pressure_solver.projection_hold_steps` | Holding steps of the projection for the momentum equation. | Positive integer | 5 | +| `flow_rate_force.direction` | Direction of the forced flow. | 0, 1, 2 | - | +| `flow_rate_force.value` | Bulk velocity or volumetric flow rate. | Positive real | - | +| `flow_rate_force.use_averaged_flow` | Whether bulk velocity or volumetric flow rate is given by the `value` parameter. | `true` or `false` | - | +| `freeze` | Whether to fix the velocity field at initial conditions. | `true` or `false` | `false` | + +## Scalar {#case-file_scalar} +The scalar object allows to add a scalar transport equation to the solution. +The solution variable is called `s`, but saved as `temperature` in the fld + files. +Some properties of the object are inherited from `fluid`: the properties of the +linear solver, the value of the density, and the output +control. + +The scalar equation requires defining additional material properties: the +specific heat capacity and thermal conductivity. These are provided as `cp` and +`lambda`. Similarly to the fluid, one can provide the Peclet number, `Pe`, as an +alternative. In this case, `cp` is set to 1 and `lambda` to the inverse of `Pe`. + +The boundary conditions for the scalar are specified through the +`boundary_types` keyword. +It is possible to directly specify a uniform value for a Dirichlet boundary. +The syntax is, e.g. `d=1`, to set the value to 1, see the Ryleigh-Benard +example case. + +The configuration of source terms is the same as for the fluid. A demonstration +of using source terms for the scalar can be found in the `scalar_mms` example. + +| Name | Description | Admissible values | Default value | +| ------------------------- | -------------------------------------------------------- | ------------------------------ | ------------- | +| `enabled` | Whether to enable the scalar computation. | `true` or `false` | `true` | +| `Pe` | The Peclet number. | Positive real | - | +| `cp` | Specific heat cpacity. | Positive real | - | +| `lambda` | Thermal conductivity. | Positive real | - | +| `boundary_types` | Boundary types/conditions labels. | Array of strings | - | +| `initial_condition.type` | Initial condition type. | `user`, `uniform` | - | +| `initial_condition.value` | Value of the velocity initial condition. | Real | - | +| `source_terms` | Array of JSON objects, defining additional source terms. | See list of source terms above | - | + +## Statistics + +This object adds the collection of statistics for the fluid fields. For +additional details on the workflow, see the +[corresponding page](@ref statistics-guide) in the user manual. + +| Name | Description | Admissible values | Default value | +| ------------------- | -------------------------------------------------------------------- | ----------------- | ------------- | +| `enabled` | Whether to enable the statistics computation. | `true` or `false` | `true` | +| `start_time` | Time at which to start gathering statistics. | Positive real | 0 | +| `sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer | 10 | + +## Simulation components +Simulation components enable the user to perform various additional operations, +which are not strictly necessary to run the solver. An example could be +computing and output of additional fields, e.g. vorticity. + +A more detailed description as well as a full list of available components and + their setup is provided in a [separate page of the manual](simcomps.md). + +## Point zones +Point zones enable the user to select GLL points in the computational domain +according to some geometric criterion. Two predefined geometric shapes are +selectable from the case file, boxes and spheres. + +A point zone object defined in the case file can be retrieved from the point +zone registry, `neko_point_zone_registry`, and can be used to perform any +zone-specific operations (e.g. localized source term, probing...). User-specific +point zones can also be added manually to the point zone registry from the user +file. + +A more detailed description as well as a full list of available components and + their setup is provided in a [separate page of the manual](point-zones.md). diff --git a/doc/pages/installation.md b/doc/pages/user-guide/installation.md similarity index 76% rename from doc/pages/installation.md rename to doc/pages/user-guide/installation.md index da7d27f61d1..cb44a166b5d 100644 --- a/doc/pages/installation.md +++ b/doc/pages/user-guide/installation.md @@ -1,10 +1,12 @@ # Installing Neko {#installation} +\tableofcontents + Neko can be installed in various ways, either building directly from source, manually compiling all dependencies and Neko or via Spack. Pre-built Docker images are also provided for each release of Neko. ## Building from source -To build Neko, you will need a Fortran compiler supporting the Fortran-08 standard, autotools, a working MPI installation supporting the Fortran 2008 bindings (`mpi_f08`), BLAS/LAPACK and JSON-Fortran. Optional dependencies are gslib and ParMETIS. +To build Neko, you will need a Fortran compiler supporting the Fortran-08 standard, autotools, pkg-config, a working MPI installation supporting the Fortran 2008 bindings (`mpi_f08`), BLAS/LAPACK and JSON-Fortran. Optional dependencies are PFunit, gslib and ParMETIS. Follow the steps below to install the less common dependencies (e.g. JSON-Fortran). @@ -100,31 +102,31 @@ In the above command, `[options]` refers to either optional features or packages Features are enabled and disabled by passing either `--enable-FEATURE[=arg]` or `--disable-FEATURE` to `configure`. A list of currently supported features are given in the table below. -Name | Description ----- | ------------ -`--enable-real=Xp` | Specify working precision of REAL types:
`sp` -- `REAL(kind=4)`
`dp` -- `REAL(kind=8)` (default)
`qp` -- `REAL(kind=16)`
-`--enable-contrib` | Compile various tools -`--enable-device-mpi` | Enable device aware MPI +| Name | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | +| `--enable-real=Xp` | Specify working precision of REAL types:
`sp` -- `REAL(kind=4)`
`dp` -- `REAL(kind=8)` (default)
`qp` -- `REAL(kind=16)`
| +| `--enable-contrib` | Compile various tools | +| `--enable-device-mpi` | Enable device aware MPI | Optional packages are controlled by passing either `--with-PACKAGE[=ARG]` or `--without-PACKAGE` to `configure`. A list of all supported optional packages are given in the table below. -Name | Description ----- | ----------- -`--with-blas=` | Use BLAS library `` -`--with-lapack=` | Use LAPACK library `` -`--with-metis=DIR` | Directory for metis -`--with-metis-libdir=LIBDIR` | Directory for metis library (if different) -`--with-parmetis=DIR` | Compile with support for parmetis library -`--with-parmetis-libdir=LIBDIR` | Directory for parmetis library (if different) -`--with-adios2=DIR` | Compile with support for ADIOS2 -`--with-gslib=DIR` | Compile with support for gslib -`--with-libxsmm` | Compile with support for libxsmm -`--with-hip=DIR` | Compile with HIP backend -`--with-cuda=DIR` | Compile with CUDA backend -`--with-opencl=DIR` | Compile with OpenCL backend -`--with-nvtx=DIR` | Compile with support for NVTX -`--with-roctx=DIR` | Compile with support for ROCTX -`--with-pfunit=DIR` | Directory for pFUnit (see \subpage testing) +| Name | Description | +| ------------------------------- | --------------------------------------------- | +| `--with-blas=` | Use BLAS library `` | +| `--with-lapack=` | Use LAPACK library `` | +| `--with-metis=DIR` | Directory for metis | +| `--with-metis-libdir=LIBDIR` | Directory for metis library (if different) | +| `--with-parmetis=DIR` | Compile with support for parmetis library | +| `--with-parmetis-libdir=LIBDIR` | Directory for parmetis library (if different) | +| `--with-adios2=DIR` | Compile with support for ADIOS2 | +| `--with-gslib=DIR` | Compile with support for gslib | +| `--with-libxsmm` | Compile with support for libxsmm | +| `--with-hip=DIR` | Compile with HIP backend | +| `--with-cuda=DIR` | Compile with CUDA backend | +| `--with-opencl=DIR` | Compile with OpenCL backend | +| `--with-nvtx=DIR` | Compile with support for NVTX | +| `--with-roctx=DIR` | Compile with support for ROCTX | +| `--with-pfunit=DIR` | Directory for pFUnit (see \subpage testing) | @note Accelerators backends are not enabled as a feature in Neko, but rather via optional packages. diff --git a/doc/pages/user-guide/io.md b/doc/pages/user-guide/io.md new file mode 100644 index 00000000000..568d3a589e9 --- /dev/null +++ b/doc/pages/user-guide/io.md @@ -0,0 +1,50 @@ +# Input-output {#io} + +\tableofcontents + +## Mesh + +Neko has it's own mesh format, `.nmsh`. All meshes should be 3D and consist of +hexahedral elements. A 2D or 1D case can be mimicked by having a single element +across selected axes and applying periodic boundary conditions. + +A native mesh generator for simple box meshes, called `genmeshbox`, is part of +Neko, and is used in some of the examples, for example `advecting_cone`. The +usage is straightforward, and is well described by the help string provided when +running the utility. + +The main way of obtaining a `.nmsh` is converting a Nek5000 `.re2` mesh file +using the `rea2nbin` utility. Nek5000, in turn, has several converters among its +tools, such as `gmsh2nek`. The latter is also available under the `contrib` +directory in Neko. The workflow is thus to export your mesh into a format, which +can be converted to `.re2`, and then convert the `.re2` to `.nmsh`. Of course +one can also use native Nek5000 tools to produce the `.re2`, such as `genbox`. +In the future, native support for other formats than `.nmsh` will be added for +convenience. + +It should be noted, that the `.re2` format allows to store boundary conditions. +This is relevant for users that have old Nek5000 cases, who wish to convert them +to Neko. Boundary condition is converted and used by Neko, and for these +boundaries one does not need to provide information in the `boundary_types` +keyword in the case file. This is why in some of the examples, `boundary_types` +is only filled for some of the boundaries. However, since this feature +complicates the code and leads to somewhat confusing case setups, it is planned +to deprecate it at some point. Note that periodic boundaries are also directly +encoded into the mesh file, and this will remain so in the future. + +## Three-dimensional field output +Neko stores the 3D fields with results in the `.fld` format, which is the same +as in Nek5000. The advantage of adopting this format, is that there is a reader +in Paraview and Visit, which can be used to visualize them. Note that the latest +version of Paraview actually has two reader for `.fld`. For now, Neko has only +been tested with the older reader, which uses Visit under the hood. A file with +the `.nek5000` extension is used as the entry point for the readers and stores +some metadata. Users may also find the Python package `pymech` useful for +working with `.fld`s. Note that only the first output `.fld` file stores the +mesh. + +## Checkpoint files +Simulations cannot be restarted from `.fld` files. Instead, separate checkpoint +files can be output for the purpose of restarts. These contain additional +information allowing a clean restart, with, e.g., the correct time integration +order. A separate file format, `.chkp` is adopted for the checkpoint files. diff --git a/doc/pages/point-zones.md b/doc/pages/user-guide/point-zones.md similarity index 77% rename from doc/pages/point-zones.md rename to doc/pages/user-guide/point-zones.md index 2bc1b25813e..00e4c431b28 100644 --- a/doc/pages/point-zones.md +++ b/doc/pages/user-guide/point-zones.md @@ -1,5 +1,7 @@ - # Point zones {#point-zones} + +\tableofcontents + ## What are point zones? Point zones are subsections of the computational domain which are @@ -12,10 +14,10 @@ being applying a localized source term or probing a particular zone of interest. ## Predefined geometrical shapes -There are two predefined shapes from which to initialize a point zone in the case -file: boxes and spheres. Each shape is described by its own subtype -`box_point_zone_t` and `sphere_point_zone_t`, extending the abstract class -`point_zone_t`. +There are three predefined shapes from which to initialize a point zone in the +case file: boxes, spheres and cylinders. Each shape is described by its own +subtype `box_point_zone_t`, `sphere_point_zone_t` and `cylinder_point_zone_t`, +extending the abstract class `point_zone_t`. ### Box @@ -47,13 +49,29 @@ A sphere is defined by its center and its radius. ] ~~~~~~~~~~~~~~~ +### Cylinder + +A cylinder is defined by its end points and its radius. + +~~~~~~~~~~~~~~~{.json} +[ + { + "name": "mycylinder", + "geometry": "cylinder", + "start": [0.0, 0.0, 0.0], + "end": [0.0, 0.0, 1.0], + "radius": 0.01 + }, +] +~~~~~~~~~~~~~~~ + ## User-defined geometrical shapes The current version of Neko does not support user-defined shapes from the case file. That said, shapes can be defined manually into new types by extending `point_zone_t` and implementing the abstract `criterion` interface. -## Using point zones +## Using point zones {#point-zones_using-point-zones} Point zones defined in the case file are stored in a point zone registry, `neko_point_zone_registry`. The point zone registry allows for the retrieval of diff --git a/doc/pages/user-guide/simcomps.md b/doc/pages/user-guide/simcomps.md new file mode 100644 index 00000000000..1ba0dccd661 --- /dev/null +++ b/doc/pages/user-guide/simcomps.md @@ -0,0 +1,111 @@ +# Simulation components {#simcomps} + +\tableofcontents + +## What are simulation components? +Simulation components, or simcomps fo short, incapsulate additional +functionality that may be useful for certain cases but not necessary to run the +solver. +This can include computation and output of additional fields, in-situ +post-processing operations, data sampling, etc. + +By design, simulation components can tap into every aspect of the simulation, +so they can be quite powerful. +As the code grows, we expect to add more and more simcomps to the code. + +## Adding simulation components to the case +Each simcomp is defined as a single JSON object at are added to an array +of objects called `simulation_components`, which resides directly under the +`case` object. + +## Controling execution and file output +Each simulation component is, by default, executed once per time step to perform +associated computations and output. However, this can be modified by using the +`compute_control` and `compute_value` parameters for the computation and the +`output_control and` and `output_value` for the output to disk. The parameters +for the `_control` values are the same as for the fluid and checkpointing. +Additionally, one can set `output_control` to `global` and `never`. The former +will sync the `output_` parameter to that of the fluid. Choosing `never` will +suppress output all together. If no parameters for the `output_` parameters are + provided, they are set to be the same as for `compute_`. + +For simcomps that compute 3D fields, the output can be either added to the main +`.fld` file, containing velocity and pressure, or saved to a separate file. For +the latter, the `output_filename` keyword should be provided. One can +additionally provide the `precision` keyword, which can be set to either +`single` or `double` to control the precision of the written data. + +For example, in the `tgv` example case the `vorticity` component is executed +once per 50 time steps. The `output_` parameters are synced to that, and the +vorticity fields will be added to the main `.fld` file. +~~~~~~~~~~~~~~~{.json} +{ + "type": "vorticity", + "compute_control": "tsteps", + "compute_value": 50 +} +~~~~~~~~~~~~~~~ + + ## List of simulation components + + ### vorticity + Computes the vorticity field an stores in the field registry as `omega_x`, + `omega_y` and `omega_z`. + Currently produces no output. + + ### lambda2 + Computes \f$ \lambda_2 \f$ for the velocity field and stores it in the normal output files as the first unused field. + This means that \f$ \lambda_2 \f$ can be found in the temperature field in then fld files if running without a scalar + and s1 if neko is run with one scalar. + + ### probes + Probes selected solution fields at the points given inside an input file. + Example usage: + ~~~~~~~~~~~~~~~{.json} + { + "type": "probes", + "compute_control": "simulationtime", + "compute_value" : 1, + "points_file": "probes.csv", + "output_file": "output.csv", + "fields": ["w","s"] + } + ~~~~~~~~~~~~~~~ +This probes the fields 'w', and 's' in the points described by points.csv and +outputs into output.csv every 1 time units. + +The probed information will be saved in the output file in the following format: + +~~~~~~~~~~~~~~~{.csv} +N_p, N_f, fields[0], fields[1], ..., fields[N_f-1] +p_0_x, p_0_y, p_0_z +p_1_x, p_1_y, p_1_z +... +p_N_p_x, p_N_p_y, p_N_p_z +time_0, p_0_field_0, p_0_field_1, ..., p_0_field_N_f-1 +time_0, p_1_field_0, p_1_field_1, ..., p_1_field_N_f-1 +... +time_0, p_N_p_field_0, p_N_p_field_1, ..., p_N_p_field_N_f-1 +time_1, p_0_field_0, p_0_field_1, ..., p_0_field_N_f-1 +time_1, p_1_field_0, p_1_field_1, ..., p_1_field_N_f-1 +... +time_N_p, p_N_p_field_0, p_N_p_field_1, ..., p_N_p_field_N_f-1 +~~~~~~~~~~~~~~~ + + ### field_writer + Outputs registered 3D fields to an `.fld` file. Requires a list of field names + in the `fields` keyword. Primarily to be used for outputting new fields defined + in the user file. The fields are added to then `neko_field_registry` object and + are expected to be updated in the user file, or, perhaps, by other simcomps. + Since this simcomp does not compute anything `compute_` configuration is + irrelevant. + ~~~~~~~~~~~~~~~{.json} + { + "type": "field_writer", + "fields": ["my_field1", "my_field2"], + "output_filename": "myfields", + "precision": "double", + "output_control" : "simulation_time", + "output_value" : 1.0 + } + ~~~~~~~~~~~~~~~ diff --git a/doc/pages/user-guide/statistics-guide.md b/doc/pages/user-guide/statistics-guide.md new file mode 100644 index 00000000000..b6ce8f92e89 --- /dev/null +++ b/doc/pages/user-guide/statistics-guide.md @@ -0,0 +1,171 @@ +# Statistics guide {#statistics-guide} + +\tableofcontents + +Under development, updated incrementally + +Statistics in the context of Neko, is the common name for fields that are averaged in time and possible also space. + +The statistics module in Neko computes the temporal average of a wide range of fields. + +In this page we use the following convention for a field +- \f$ u \f$, the instantaneous field. +- \f$ \langle u \rangle_t \f$, the temporal average of \f$ u \f$. +- \f$ u = \langle u \rangle + u' \f$, the Reynolds decomposition of \f$ u \f$, where \f$ u' \f$ is the fluctuation of \f$ u \f$ around the mean field. + +The temporal average of a field \f$u\f$ is the approximation of the integral + +$$ +\langle u \rangle_t = \int_{T_0}^{T_N} u dt +$$ + +In Neko, this is computed as + +$$ +\langle u \rangle_t = \sum_{i=0}^N u_i \Delta t_i +$$ +where \f$ u_0 \f$ is the fields value at \f$ T_0 \f$ and \f$ N \f$ is the number of time steps needed to reach \f$ T_N \f$, \f$ T_N = T_0 + \sum_{i=0}^N \Delta t_i \f$. + +In the statistics in Neko, various averages of the the different velocity components, derivatives and pressure are computed. In total, 44 "raw statistics" are computed that are required to compute the Reynolds stress budgets, mean fields, and the different terms in the turbulent kinetic energy equation. + +## Using statistics +Statistics are enable in the the case file as the following: + +| Name | Description | Admissible values | Default value | +| ------------------- | -------------------------------------------------------------------- | ----------------- | ------------- | +| `enabled` | Whether to enable the statistics computation. | `true` or `false` | `true` | +| `start_time` | Time at which to start gathering statistics. | Positive real | 0 | +| `sampling_interval` | Interval, in timesteps, for sampling the flow fields for statistics. | Positive integer | 10 | + +In addition to the usual controls for the output, which then outputs the averages computes from the last time the statistics were written to file. + +For example, if one wants to sample the fields every 4 time steps and compute the averages in time intervals of 20 and write the output every 20 time units, and start collecting statistics after an initial transient of 50 time units the following would work: + +~~~~~~~~~~~~~~~{.json} +"statistics": { + "enabled": true, + "start_time": 50.0, + "sampling_interval": 4, + "output_control": "simulationtime", + "output_value": 20, + } +~~~~~~~~~~~~~~~ +When the output is written one obtains two .fld files called mean_field and stats. + +## List of fields in output files +In `mean_field` the following averages are stored. The stored in variable column is which field one finds the computed statistic if one opens the file in paraview or visit. + +| Number | Statistic | Stored in variable | +| ------ | --------- | ------------------ | +| 1 | \f$ \langle p \rangle \f$ | Pressure| +| 2 | \f$ \langle u \rangle \f$ | X-Velocity| +| 3 | \f$ \langle v \rangle \f$ | Y-Velocity| +| 4 | \f$ \langle w \rangle \f$ | Z-Velocity| + +In `stats` several other statistics are stored, and while not all might be interesting to your specific use case, with them most different budgets and quantities of interest can be computed. They are stored as the following: + + +| Number | Statistic | Stored in variable | +| ------ | --------- | ------------------ | +| 1 | \f$ \langle pp \rangle \f$ | Pressure| +| 2 | \f$ \langle uu \rangle \f$ | X-Velocity| +| 3 | \f$ \langle vv \rangle \f$ | Y-Velocity| +| 4 | \f$ \langle ww \rangle \f$ | Z-Velocity| +| 5 | \f$ \langle uv \rangle \f$ | Temperature| +| 6 | \f$ \langle uw \rangle \f$ | Scalar 1 (s1)| +| 7 | \f$ \langle vw \rangle \f$ | Scalar 2 (s2)| +| 8 | \f$ \langle uuu \rangle \f$ | s3| +| 9 | \f$ \langle vvv \rangle \f$ | s4| +| 10 | \f$ \langle www \rangle \f$ | s5| +| 11 | \f$ \langle uuv \rangle \f$ | s6 | +| 12 | \f$ \langle uuw \rangle \f$ | s7 | +| 13 | \f$ \langle uvv \rangle \f$ | s8 | +| 14 | \f$ \langle uvw \rangle \f$ | s9 | +| 15 | \f$ \langle vvw \rangle \f$ | s10 | +| 16 | \f$ \langle uww \rangle \f$ | s11 | +| 17 | \f$ \langle vww \rangle \f$ | s12 | +| 18 | \f$ \langle uuuu \rangle \f$ | s13 | +| 19 | \f$ \langle vvvv \rangle \f$ | s14 | +| 20 | \f$ \langle wwww \rangle \f$ | s15 | +| 21 | \f$ \langle ppp \rangle \f$ | s16 | +| 22 | \f$ \langle pppp \rangle \f$ | s17 | +| 23 | \f$ \langle pu \rangle \f$ | s18 | +| 24 | \f$ \langle pv \rangle \f$ | s19 | +| 25 | \f$ \langle pw \rangle \f$ | s20 | +| 26 | \f$ \langle p \frac{\partial u} {\partial x} \rangle \f$ | s21 | +| 27 | \f$ \langle p \frac{\partial u} {\partial y}\rangle \f$ | s22 | +| 28 | \f$ \langle p \frac{\partial u} {\partial z}\rangle \f$ | s23 | +| 29 | \f$ \langle p \frac{\partial v} {\partial x}\rangle \f$ | s24 | +| 30 | \f$ \langle p \frac{\partial v} {\partial y}\rangle \f$ | s25 | +| 31 | \f$ \langle p \frac{\partial v} {\partial z}\rangle \f$ | s26 | +| 32 | \f$ \langle p \frac{\partial w} {\partial x}\rangle \f$ | s27 | +| 33 | \f$ \langle p \frac{\partial w} {\partial y}\rangle \f$ | s28 | +| 34 | \f$ \langle p \frac{\partial w} {\partial z}\rangle \f$ | s29 | +| 35 | \f$ \langle e11 \rangle \f$ | s30 | +| 36 | \f$ \langle e22 \rangle \f$ | s31 | +| 37 | \f$ \langle e33 \rangle \f$ | s32 | +| 38 | \f$ \langle e12 \rangle \f$ | s33 | +| 39 | \f$ \langle e13 \rangle \f$ | s34 | +| 40 | \f$ \langle e23 \rangle \f$ | s35 | + +where \f$e11,e22...\f$ is computed as: +$$ +\begin{aligned} +e11 &= \left(\frac{\partial u}{\partial x}\right)^2 + \left(\frac{\partial u}{\partial y}\right)^2 + \left(\frac{\partial u}{\partial z}\right)^2 \\\\ +e22 &= \left(\frac{\partial v}{\partial x}\right)^2 + \left(\frac{\partial v}{\partial y}\right)^2 + \left(\frac{\partial v}{\partial z}\right)^2 \\\\ +e33 &= \left(\frac{\partial w}{\partial x}\right)^2 + \left(\frac{\partial w}{\partial y}\right)^2 + \left(\frac{\partial w}{\partial z}\right)^2 \\\\ +e12 &= \frac{\partial u}{\partial x} \frac{\partial v}{\partial x} + \frac{\partial u}{\partial y}\frac{\partial v}{\partial y}+ \frac{\partial u}{\partial z}\\frac{\partial v}{\partial z} \\\\ +e13 &= \frac{\partial u}{\partial x} \frac{\partial w}{\partial x} + \frac{\partial u}{\partial y}\frac{\partial w}{\partial y}+ \frac{\partial u}{\partial z}\\frac{\partial w}{\partial z} \\\\ +e23 &= \frac{\partial v}{\partial x} \frac{\partial w}{\partial x} + \frac{\partial v}{\partial y}\frac{\partial w}{\partial y}+ \frac{\partial v}{\partial z}\\frac{\partial w}{\partial z} \\\\ +\end{aligned} +$$ + + +# Postprocessing +Of course, these statistics are only the "raw statistics" in the sense that in general we are not interested in \f$ \langle uu\rangle \f$, but rather say the rms of the velocity fluctuation. FOr this we need to postprocess the statistics. + +There is some rudimentary postprocessing to compute the spatial averages of fld filesa and also to combine the statistics collected from several runs (compute average in time) and also compute both the mean velocity gradient and the Reynolds stresses available among the contrib scripts. By running the contrib scripts without any arguments one gets a hint on their usage, and also the text below gives a guide on how to postprocess the raw statistics. The postprocessing part of Neko is expanding and changing quite a lot at the moment, where we currently envision primarily using python for the postprocessing of the final statistics. + + + +## Notes on the statistics calculation in Neko +***Daniele Massaro, Martin Karp (KTH)*** + +1) Run your simulations and collect mean_field* and stats* files by having the statistics object added to the case file, and specifying the write interval to something suitable. + +2) For each RUN_i, you get a set of mean_field* and stats* files. You can average them for each single RUN_i, or average all of them only once (after re-ordering them properly). If you follow the second approach, go to step 4. +Here, for each RUN_i, we compute the averaged means with "average_fields_in_time": +--mean +`srun --unbuffered /your/location/neko/bin/average_fields_in_time meanXX.fld T0 mean_p.fld` +where T0 is the initial time. To get some hints on the input for the script one can simply run `./average_fields_in_time` without any arguments. For RUN_1 the time T0 can be taken from the log of the first simulation, or from the header of the first mean_field* file; in this way you discard that file. For RUN_i, with i>1, it can be taken from header of the last file mean_field* of the previous simulation RUN_{i-1}. +In the command line, for the name "meanXX.fld", XX indicates the number of the nek5000 file. In mean_fieldXX.nek5000 you set the number of the first mean0* file to read and the number of steps corresponding to the number of files. In this way, the code generates a mean_p0.f00000 and mean_post0.nek5000. It is suggested to rename mean_p0.f00000 as mean_p0.f0000i and move it to a separate folder where you take the average with all the others. +--stats +`srun --unbuffered /your/location/neko/bin/average_fields_in_time statXX.fld T0 stat_p.fld` +T0 is the same as before. In stat0.nek5000 you set the number of the first stat0* file to read and the number of steps corresponds to the number of files. It is suggested to rename stat_p0.f00000 as stat_p0.f0000i and move it to a separate folder where you take the average with all the others. +Repeat this for each RUN_i folder. Eventually, given n RUN_i folders, you will get n mean_p* and stat_p* files. + +3) Take the average of the averaged runs. Now, the time average over all the n simulations is taken. The procedure is similar, but changing the output name is recommended to avoid over-writing. +-- mean +`srun --unbuffered /your/location/neko/bin/average_fields mean_p0.fld T0 mean_post.fld` +where T0 is the initial time which has been used to compute mean_p* for RUN_1. +-- stats +`srun --unbuffered /your/location/neko/bin/average_fields stat_p0.fld T0 stat_post.fld` +where T0 is the initial time which has been used to compute mean_p* for RUN_1. + + + + + +4) Compute Reynolds stress tensors and other statistical moments (see the list). +`srun --unbuffered /your/location/neko/bin/postprocess_fluid_stats mesh_file.nmsh mean_post0.fld stat_post0.fld` + +5) We also provide a tool to average the resulting field in a homogenous direction in `bin/average_field_in_space`. The required arguments are shown if one runs the program without any input. Currently it requires the number of elements in the homogenous direction as an input argument, e.g. +`./average_field_in_space mesh.nmsh field.fld x 18 outfield.fld` +if we want to average a field in the x direction on a mesh with 18 elements in x and output the averaged field in outfield0.nek5000. + + + + + + + diff --git a/doc/pages/user-guide/user-file.md b/doc/pages/user-guide/user-file.md new file mode 100644 index 00000000000..66ec1993a3f --- /dev/null +++ b/doc/pages/user-guide/user-file.md @@ -0,0 +1,952 @@ +# User File {#user-file} + +\tableofcontents + +The user file is a fortran file where the user can implement their own functions +to extend the capabilities of the default Neko executable. The user file can be +used for setting advanced initial/boundary conditions, source terms, I/O +operations, and interactions with the Neko framework. + +## Compiling and running + +The user file is a regular Fortran `.f90` file that needs to be compiled with +`makeneko`, located in the `bin` folder of your neko installation. To compile a +user file `user.f90`, run: + +```bash +makeneko user.f90 +``` + +If everything goes well, you should observe the following output: + +```bash +N E K O build tool, Version 0.7.99 +(build: 2024-02-13 on x86_64-pc-linux-gnu using gnu) + +Building user NEKO ... done! +``` + +Compiling your user file with `makeneko` will create a `neko` executable, which +you will need to execute with your case file as an argument. For example, if +your case file is called `user.case`: + +```bash +./neko user.case +``` + +Or in parallel using MPI: + +```bash +mpirun -n 8 ./neko user.case +``` + + +## High-level structure + +The current high-level structure of the user file is shown below. + +```fortran +module user + use neko + implicit none + +contains + + ! Register user defined functions here (see user_intf.f90) + subroutine user_setup(u) + type(user_t), intent(inout) :: u + + + end subroutine user_setup + +end module user + +``` + +The user file implements the `user` module. The `user` modules contains a +subroutine named `user_setup`, which we use to interface the internal procedures +defined in `src/common/user_intf.f90` with the subroutines that you will +implement in your user file. Each user subroutine should be implemented under +the `contains` statement, below `user_setup`. + +@note The above code snippet is the most basic code structure for the user file. +Compiling it and running it would be equivalent to running the "vanilla" neko +executable `bin/neko` in your local neko installation folder. + +## Default user functions + +The following user functions, if defined in the user file, will always be +executed, regardless of what is set in the case file: + +- [user_init_modules](@ref user-file_init-and-final): For initializing user + variables and objects +- [user_finalize_modules](@ref user-file_init-and-final): For finalizing, e.g + freeing variables and terminating processes +- [user_check](@ref user-file_user-check): Executed at the end of every time step, + for e.g. computing and/or outputting user defined quantities. +- [material_properties](@ref user-file_mat-prop): For computing and setting material + properties such as `rho`, `mu`, `cp` and `lambda`. +- [user_mesh_setup](@ref user-file_user-mesh-setup): For applying a deformation to + the mesh element nodes, before the simulation time loop. +- [scalar_user_bc](@ref user-file_scalar-bc): For applying boundary conditions to + the scalar, on all zones that are not already specified with uniform dirichlet + values e.g. `d=1`. For more information on the scalar, see the [relevant section of the case file](@ref case-file_scalar). + +### Initializing and finalizing {#user-file_init-and-final} + +The two subroutines `user_init_modules` and `user_finalize_modules` may be used +to initialize/finalize any user defined variables, external objects, or +processes. They are respectively executed right before/after the simulation time +loop. + +```.f90 + + ! Initialize user variables or external objects + subroutine initialize(t, u, v, w, p, coef, params) + real(kind=rp) :: t + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(coef_t), intent(inout) :: coef + type(json_file), intent(inout) :: params + + ! insert your initialization code here + + end subroutine initialize + + ! Finalize user variables or external objects + subroutine finalize(t, params) + real(kind=rp) :: t + type(json_file), intent(inout) :: params + + ! insert your code here + + end subroutine initialize +``` + +In the example above, the subroutines `initialize` and `finalize` contain the +actual implementations. They must also be interfaced to the internal procedures +`user_init_modules` and `user_finalize_modules` in `user_setup`: + +```.f90 + + ! Register user defined functions (see user_intf.f90) + subroutine user_setup(u) + type(user_t), intent(inout) :: u + + u%user_init_modules => initialize + u%user_finalize_modules => finalize + + end subroutine user_setup + +``` + +@note `user_init_modules` and `user_finalize_modules` are independent of each +other. Using one does not require the use of the other. + +### Computing at every time step {#user-file_user-check} + +The subroutine `user_check` is executed at the end of every time step. It can be +used for computing and/or outputting your own variables/quantities at every time +step. +```.f90 + ! This is called at the end of every time step + subroutine usercheck(t, tstep, u, v, w, p, coef, param) + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + type(coef_t), intent(inout) :: coef + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: param + + ! insert code below + + end subroutine usercheck + +``` + +In the example above, the subroutine `usercheck` contains the actual +implementation, and needs to be registered by adding: + +```.f90 +u%user_check => usercheck +``` + +to our `user_setup`. + +### Setting material properties {#user-file_mat-prop} + +`material_properties` allows for more complex computations and setting of +various material properties, such as `rho`, `mu` for the fluid and `cp`, +`lambda` for the scalar. The example below is taken from the +[rayleigh-benard-cylinder +example](https://github.com/ExtremeFLOW/neko/blob/564686b127ff75a362a06126c6b23e9b4e21879e/examples/rayleigh-benard-cylinder/rayleigh.f90#L22C1-L38C41). + +```.f90 + + subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params) + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + real(kind=rp), intent(inout) :: rho, mu, cp, lambda + type(json_file), intent(inout) :: params + real(kind=rp) :: Re + + call json_get(params, "case.fluid.Ra", Ra) + call json_get(params, "case.scalar.Pr", Pr) + + Re = sqrt(Ra / Pr) + mu = 1.0_rp / Re + lambda = mu / Pr + rho = 1.0_rp + cp = 1.0_rp + end subroutine set_material_properties + +``` + +And of course not forgetting to register our function in `user_setup` by adding +the following line: + +```.f90 +u%material_properties => set_material_properties +``` + +### Runtime mesh deformation {#user-file_user-mesh-setup} + +This user function allows for the modification of the mesh at runtime, by acting +on the element nodes of the mesh specified in the case file. This function is +only called once before the simulation time loop. The example below is taken +from the [tgv +example](https://github.com/ExtremeFLOW/neko/blob/a0613606360240e5059e65d6d98f4a57cf73e237/examples/tgv/tgv.f90#L27-L42). + +```.f90 + ! Rescale mesh + subroutine user_mesh_scale(msh) + type(mesh_t), intent(inout) :: msh + integer :: i, p, nvert + real(kind=rp) :: d + d = 4._rp + + ! original mesh has size 0..8 to be mapped onto -pi..pi + ! will be updated later to a method giving back the vertices of the mesh + nvert = size(msh%points) + do i = 1, nvert + msh%points(i)%x(1) = (msh%points(i)%x(1) - d) / d * pi + msh%points(i)%x(2) = (msh%points(i)%x(2) - d) / d * pi + msh%points(i)%x(3) = (msh%points(i)%x(3) - d) / d * pi + end do + + end subroutine user_mesh_scale + +``` + +The registering of the above function in `user_setup` should then be done as follows: + +```.f90 +u%user_mesh_setup => user_mesh_scale +``` + +### Scalar boundary conditions {#user-file_scalar-bc} + +This user function can be used to specify the scalar boundary values, on all +zones that are not already set to uniform Dirichlet or Neumann values e.g. `d=1` +or `n=0`. For more information on the scalar, see the +[relevant section of the case file](@ref case-file_scalar). The example below +sets the scalar boundary condition values to be a linear function of the `z` +coordinate (taken from the +[rayleigh-benard example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/rayleigh-benard-cylinder/rayleigh.f90#L41-L63)). + +```.f90 + + subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) + real(kind=rp), intent(inout) :: s + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: y + real(kind=rp), intent(in) :: z + real(kind=rp), intent(in) :: nx + real(kind=rp), intent(in) :: ny + real(kind=rp), intent(in) :: nz + integer, intent(in) :: ix + integer, intent(in) :: iy + integer, intent(in) :: iz + integer, intent(in) :: ie + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + ! This will be used on all zones without labels + s = 1.0_rp - z + + end subroutine set_scalar_boundary_conditions + +``` + +This function will be called on all the points on the relevant boundaries. The +registering of the above function in `user_setup` should be done as follows: + +```.f90 +u%scalar_user_bc => set_scalar_boundary_conditions +``` + +## Case-specific user functions + +As explained in the [case file](case-file.md) page, certain components of the +simulation can be set to be user defined. These components and their associated +user functions are: + +| Description | User function | JSON Object in the case file | +|---------------------------------|------------------------------------------------------------|----------------------------------| +| Fluid initial condition | [fluid_user_ic](@ref user-file_user-ic) | `case.fluid.initial_condition` | +| Scalar initial condition | [scalar_user_ic](@ref user-file_user-ic) | `case.scalar.initial_condition` | +| Fluid inflow boundary condition | [fluid_user_if](@ref user-file_fluid-user-if) | `case.fluid.inflow_condition` | +| Scalar boundary conditions | [scalar_user_bc](@ref user-file_scalar-bc) | (user function is always called) | +| Fluid source term | [fluid_user_f_vector or fluid_user_f](@ref user-file_user-f) | `case.fluid.source_terms` | +| Scalar source term | [scalar_user_f_vector or scalar_user_f](@ref user-file_user-f) | `case.scalar.source_terms` | +| Fluid and Scalar boundary conditions | [field_dirichlet_update](@ref user-file_field-dirichlet-update) | `case.fluid.boundary_types` and/or `case.scalar.boundary_types` | + +Note that `scalar_user_bc` is included for completeness but is technically not case-specific. + +### Fluid and Scalar initial conditions {#user-file_user-ic} + +Enabling user defined initial conditions for the fluid and/or scalar is done by +setting the `initial_condition.type` to `"user"` in the relevant sections of the +case file, `case.fluid` and/or `case.scalar`. + +```.json + +"case": { + "fluid": { + "initial_condition": { + "type": "user" + } + } +} +``` + +See the relevant sections on the [fluid](@ref case-file_fluid-ic) and +[scalar](@ref case-file_scalar) initial conditions in the +[case file page](@ref case-file) for more details. + +The associated user functions for the fluid and/or scalar initial conditions can +then be added to the user file. An example for the fluid taken from the +[advecting cone example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/advecting_cone/advecting_cone.f90#L48-L75), +is shown below. + +```.f90 + + !> Set the advecting velocity field. + subroutine set_velocity(u, v, w, p, params) + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: x, y + + do i = 1, u%dof%size() + x = u%dof%x(i,1,1,1) + y = u%dof%y(i,1,1,1) + + ! Angular velocity is pi, giving a full rotation in 2 sec + u%x(i,1,1,1) = -y*pi + v%x(i,1,1,1) = x*pi + w%x(i,1,1,1) = 0 + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine set_velocity + +``` + +@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if +running on GPUs, and the calls to `device_memcpy` to transfer data between the +host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for +more information on how this works. + +The same can be done for the scalar, with the example below also inspired from +the +[advecting cone example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/advecting_cone/advecting_cone.f90#L14-L45): + +```.f90 + + !> User initial condition for the scalar + subroutine set_s_ic(s, params) + type(field_t), intent(inout) :: s + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: cone_radius, mux, muy, x, y, r, theta + + ! Center of the cone + mux = 1 + muy = 0 + + cone_radius = 0.5 + + do i = 1, s%dof%size() + x = s%dof%x(i,1,1,1) - mux + y = s%dof%y(i,1,1,1) - muy + + r = sqrt(x**2 + y**2) + theta = atan2(y, x) + + ! Check if the point is inside the cone's base + if (r > cone_radius) then + s%x(i,1,1,1) = 0.0 + else + s%x(i,1,1,1) = 1.0 - r / cone_radius + endif + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine set_s_ic + +``` + +@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if +running on GPUs, and the calls to `device_memcpy` to transfer data between the +host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for +more information on how this works. + +We should also add of the following lines in `user_setup`, registering our user +functions `set_velocity` and `set_s_ic` to be used as the fluid and scalar +initial conditions: + +```.f90 +u%fluid_user_ic => set_velocity +u%scalar_user_ic => set_s_ic +``` + +### Fluid inflow condition {#user-file_fluid-user-if} + +Enabling user defined inflow condition for the fluid is done by setting +the `case.fluid.inflow_condition.type` to `"user"`: + +```.json + +"case": { + "fluid": { + "inflow_condition": { + "type": "user" + } + } +} +``` + +See the [the relevant section](@ref case-file_fluid-if) in the +[case file page](@ref case-file.md) for more details. The associated user +function for the fluid inflow condition can then be added to the user file. +An example inspired from the +[lid-driven cavity example](https://github.com/ExtremeFLOW/neko/blob/aa72ad9bf34cbfbac0ee893c045639fdd095f80a/examples/lid/lid.f90#L29-L53) +is shown below. + +```.f90 + ! user-defined boundary condition + subroutine user_bc(u, v, w, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) + real(kind=rp), intent(inout) :: u + real(kind=rp), intent(inout) :: v + real(kind=rp), intent(inout) :: w + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: y + real(kind=rp), intent(in) :: z + real(kind=rp), intent(in) :: nx + real(kind=rp), intent(in) :: ny + real(kind=rp), intent(in) :: nz + integer, intent(in) :: ix + integer, intent(in) :: iy + integer, intent(in) :: iz + integer, intent(in) :: ie + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + real(kind=rp) lsmoothing + lsmoothing = 0.05_rp ! length scale of smoothing at the edges + + u = step( x/lsmoothing ) * step( (1._rp-x)/lsmoothing ) + v = 0._rp + w = 0._rp + + end subroutine user_bc +``` + +We should also add of the following line in `user_setup`, registering our user +function `user_bc` to be used as the fluid inflow conditions: + +```.f90 +u%fluid_user_if => user_bc +``` + +### Fluid and scalar source terms {#user-file_user-f} + +Enabling user defined source terms for the fluid and/or scalar is done by adding +JSON Objects to the `case.fluid.source_terms` and/or `case.scalar.source_terms` +lists. + +```.json + +"case": { + "fluid": { + "source_terms": + [ + { + "type": "user_vector" + } + ] + } +} +``` + +See the relevant sections on the [fluid](@ref case-file_fluid-source-term) and +[scalar](@ref case-file_scalar) source terms in the [case file page](@ref case-file) for +more details. + +@attention There are two variants of the source term user functions: `_user_f` +and `_user_f_vector`. The former is called when setting `"user_pointwise"` as +the source term type, while the latter requires the use of the `"user_vector"` +keyword in the case file. The pointwise variant, `fluid_user_f` is not supported +on GPUs. In general, `fluid_user_f_vector` is the prefered variant, and is the +one which will be use in our examples below. The same applies for the scalar +source term user functions. + +The associated user functions for the fluid and/or scalar source terms can then +be added to the user file. An example for the fluid, taken from the +[rayleigh-benard-cylinder example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard-cylinder/rayleigh.f90#L101C1-L121C44), +is shown below. + +```.f90 + ! Sets the z-component of the fluid forcing term = scalar + subroutine set_bousinesq_forcing_term(f, t) + class(fluid_user_source_term_t), intent(inout) :: f + real(kind=rp), intent(in) :: t + + ! Retrieve u,v,w,s fields from the field registry + type(field_t), pointer :: u, v, w, s + u => neko_field_registry%get_field('u') + v => neko_field_registry%get_field('v') + w => neko_field_registry%get_field('w') + s => neko_field_registry%get_field('s') + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_rzero(f%u_d,f%dm%size()) + call device_rzero(f%v_d,f%dm%size()) + call device_copy(f%w_d,s%x_d,f%dm%size()) + else + call rzero(f%u,f%dm%size()) + call rzero(f%v,f%dm%size()) + call copy(f%w,s%x,f%dm%size()) + end if + end subroutine set_bousinesq_forcing_term +``` + +@note Notice the use of the `neko_field_registry` to retrieve the velocity and +scalar fields. See [Registries](@ref user-file_tips_registries) for more information +about registries in neko. @note Notice the use of the `NEKO_BCKND_DEVICE` flag, +which will be set to 1 if running on GPUs, and the use of `device_` functions. +See [Running on GPUs](@ref user-file_tips_running-on-gpus) for more information on +how this works. + +The same can be done for the scalar, with the example below also taken from the +[scalar_mms example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/scalar_mms/scalar_mms.f90#L28-L47): + +```.f90 + + !> Set source term + subroutine set_source(f, t) + class(scalar_user_source_term_t), intent(inout) :: f + real(kind=rp), intent(in) :: t + real(kind=rp) :: x, y + integer :: i + + do i = 1, f%dm%size() + x = f%dm%x(i,1,1,1) + y = f%dm%y(i,1,1,1) + + ! 0.01 is the viscosity + f%s(i,1,1,1) = cos(x) - 0.01 * sin(x) - 1.0_rp + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(f%s, f%s_d, f%dm%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine set_source + +``` + +@note Notice the use of the `NEKO_BCKND_DEVICE` flag, which will be set to 1 if +running on GPUs, and the call to `device_memcpy` to transfer data between the +host and the device. See [Running on GPUs](@ref user-file_tips_running-on-gpus) for +more information on how this works. + +We should also add of the following lines in `user_setup`, registering our user +functions `set_boussinesq_forcing_term` and `set_source` to be used as the fluid +and scalar source terms: + +```.f90 +u%fluid_user_f_vector => set_boussinesq_forcing_term +u%scalar_user_f_vector => set_source +``` + +### Complex fluid and/or scalar boundary conditions {#user-file_field-dirichlet-update} + +This user function can be used to specify dirichlet boundary values for velocity +components `u,v,w`, the pressure `p`, and/or the scalar `s`. This type of boundary +condition allows for time-dependent velocity profiles (currently +not possible with a standard `user_inflow`) or non-uniform pressure profiles +to e.g. impose an outlet pressure computed from another simulation. + +The selection of such boundary condition is done in the `case.fluid.boundary_types` +array for the velocities and pressure, and in the `case.scalar.boundary_types` +array for the scalar. The [case file](@ref case-file_boundary-types) outlines which keywords can be used for such purpose: +* `d_vel_u` for the `u` component of the velocity field +* `d_vel_v` for the `v` component of the velocity field +* `d_vel_w` for the `w` component of the velocity field +* `d_pres` for the pressure field +* `d_s` for the scalar field (cannot be combined with the above) + +The separator `"/"` can be used to combine the keywords related to `u,v,w` and `p`. +For example, if one wants to only apply `u,v` and `p` values on a given boundary, one +should use `"d_vel_u/d_vel_v/d_pres"`. In this case, the `w` component would be +left untouched (not zeroed!). An example of case file from the +[cyl-boundary-layer example](https://github.com/ExtremeFLOW/neko/blob/develop/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case) is shown below. + +```.json + +"case": { + "fluid": { + "boundary_types": [ + "d_vel_u/d_vel_v/d_vel_w", + "d_vel_u/d_vel_v/d_vel_w/d_pres", + "sym", + "w", + "on", + "on", + "w" + ] + } + "scalar": { + "boundary_types": [ + "d_s", + "d_s", + "", + "", + "", + "" + ] + } +} +``` + +In this example, we indicate in `case.fluid.boundary_types` that we would like +to apply a velocity profile on all three components `u,v,w` on the boundary +number 1 (in this case, the inlet boundary). On boundary number 2 (the outlet +boundary), we also indicate the three velocity components, with the addition +of the pressure. In `case.scalar.boundary_types`, we indicate the same for the +scalar on boundaries 1 and 2 (inlet and outlet). + +@attention Do not confuse the `d_s` and `d=x` boundary conditions for the scalar. +The latter is to be used to specify a constant Dirichlet value `x` along the +relevant boundary. + +Once the appropriate boundaries have been identified and labeled, +the user function `field_dirichlet_update` should be used to compute and +apply the desired values to our velocity/pressure/scalar field(s). The prefix +"field" in `field_dirichlet_update` refers to the fact that +a list of entire fields is passed down for the user to edit. + +The fields that are passed down are tied to the `boundary_types` keywords passed in +the case file. The function `field_dirichlet_update` is then called internally, +one time in the `fluid` solver and one time in the `scalar` solver (if enabled). + +Finally, depending on which boundary labels were input, the fields given to the user +are copied onto the solution field boundaries. + +The header of the user function is given in the code snippet below. + +```.f90 + subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver) + type(field_list_t), intent(inout) :: field_bc_list + type(bc_list_t), intent(inout) :: bc_bc_list + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + character(len=*), intent(in) :: which_solver +``` + +The arguments and their purpose are as follows: + +* `field_bc_list` is the list of the field that can be edited. It is a list +of `field_t` objects. + * The field `i` contained in `field_bc_list` is accessed using + `field_bc_list%fields(i)%f` and will refer to a `field_t` object. + * If `which_solver = "fluid"`, it will contain the 4 fields `u,v,w,p`. They + are retrieved in that order in `field_bc_list`, i.e. `u` corresponds to + `field_bc_list%fields(1)%f`, etc. + * If `which_solver = "scalar"`, it will only contain the scalar field `s`. + +* `bc_bc_list` contains a list of the `bc_t` objects to help access the + boundary indices through the boundary `mask`. + * The boundary `i` contained in `bc_bc_list` is accessed with + `bc_bc_list%bc(i)%bcp`. + * The boundary mask of the `i`-th `bc_t` object contained in `bc_bc_list` is accessed + with `bc_bc_list%bc(i)%bcp%msk`. It contains the linear indices of each GLL point on + the `i`-th boundary facets. + @note `msk(0)` contains the size of the array. The first boundary index is `msk(1)`. + * If `which_solver = "fluid"`, it will contain the 4 `bc_t` objects + corresponding to `d_vel_u`, `d_vel_v`, `d_vel_w`, and `d_pres`. They + can be retrieved in that order, in the same way as for `field_bc_list`. + * If `which_solver = "scalar"`, it will only the 1 `bc_t` object + corresponding to `d_s`. + +* `coef` is a `coef_t` object containing various numerical parameters and +variables, such as the polynomial order `lx`, derivatives, facet normals... +* `t`, `tstep` are self-explanatory. +* `which_solver` takes the value `"fluid"` when the user function is called in +the fluid solver. It takes the value `"scalar"` when it is called in the scalar +solver. + +Links to the documentation to learn more about what the types mentioned above +contain and how to use them: field::field_t, bc::bc_t, coefs::coef_t. + +The user function should be registered in `user_setup` with the following line: + +```.f90 +u%user_dirichlet_update => dirichlet_update +``` + +A very simple example illustrating the above is shown below, which is taken from the +[cyl_boundary_layer example](https://github.com/ExtremeFLOW/neko/blob/feature/field_bcs/examples/cyl_boundary_layer/cyl_bl.f90) + +```.f90 + ! Initial example of using user specified dirichlet bcs + ! Note: This subroutine will be called two times, once in the fluid solver, and once + ! in the scalar solver (if enabled). + ! We apply u = (1,0,0) at the inlet/outlet, p = -1 at the outlet, and s(y,z) = sin(y)*sin(z) + ! at the inlet/outlet. + subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver) + type(field_list_t), intent(inout) :: field_bc_list + type(bc_list_t), intent(inout) :: bc_bc_list + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + character(len=*), intent(in) :: which_solver + + integer :: i + real(kind=rp) :: y,z + + ! Only do this at the first time step since our BCs are constants. + if (tstep .ne. 1) return + + ! Check that we are being called by `fluid` + if (trim(which_solver) .eq. "fluid") then + + associate(u => field_bc_list%fields(1)%f, & + v => field_bc_list%fields(2)%f, & + w => field_bc_list%fields(3)%f, & + p => field_bc_list%fields(4)%f) + + ! + ! Perform operations on u%x, v%x, w%x and p%x here + ! Note that we are checking if fields are allocated. If a + ! boundary type only contains e.g. "d_vel_u/d_pres", the fields + ! v%x and w%x will not be allocated. + ! + ! Here we are applying very simple uniform boundaries (u,v,w) = (1,0,0) + ! and pressure outlet of p = -1 + ! + if (allocated(u%x)) u = 1.0_rp + if (allocated(v%x)) v = 0.0_rp + if (allocated(w%x)) w = 0.0_rp + if (allocated(p%x)) p = -1.0_rp + + end associate + + ! Check that we are being called by `scalar` + else if (trim(which_solver) .eq. "scalar") then + + associate( s => field_bc_list%fields(1)%f, & + s_bc => bc_bc_list%bc(1)%bcp) + + ! + ! Perform operations on the scalar field here + ! Note that we are checking if the field is allocated, in + ! case the boundary is empty. + ! + if (allocated(s%x)) then + + do i = 1, s_bc%msk(0) + y = s_bc%dof%y(s_bc%msk(i), 1, 1, 1) + z = s_bc%dof%z(s_bc%msk(i), 1, 1, 1) + s%x(s_bc%msk(i), 1, 1, 1) = sin(y)*sin(z) + end do + + end if + end associate + + end if + + end subroutine dirichlet_update +``` + +This example is applying constant dirichlet values at the selected +boundaries for the velocity components and presure. The scalar is applied a +function `s(y,z) = sin(y)*sin(z)` to demonstrate the usage of boundary masks. + +@attention The notation `u = 1.0_rp` is only possible because of the overloading of the +assignement operator `=` in `field_t`. In general, a field's array should be +accessed and modified with `u%%x`. + +Note that we are only applying our boundary values at the first timestep, +which is done simply with the line `if (tstep .ne. 1) return`. This is a trick +that can be used for time independent boundary profiles that require +some kind of time consuming operation like interpolation or reading from a file, +which would add overhead if executed at every time step. + +Observe that we always check if the fields are allocated before manipulating +them. This is to prevent accidental memory access if only part of the velocity +components or pressure are given in `case.fluid.boundary_types`. Fields in the +lists are only allocated if they are present in the case file.For +example, if we removed the `d_pres` condition in the JSON case file code snippet +above, the pressure field for our boundary condition would not be allocated ( +in the example above, `allocated(p%x)` would never be `true`). `"boundary_types": ["d_vel_u", "d_vel_v"]` will allocate the two first +fields in `field_bc_list`, which is the same behaviour as +`"boundary_types": ["d_vel_u/d_vel_v", ""]`. + +@attention All the rules for [Running on GPUs](@ref user-file_tips_running-on-gpus) +apply when working on field arrays. Use `device_memcpy` to make sure the device +arrays are also updated. + +## Additional remarks and tips + +### Running on GPUs {#user-file_tips_running-on-gpus} + +When running on GPUs, special care must be taken when using certain user +functions. The short explanation is that the device (GPU) has its own memory and +cannot directly access the memory on the host (CPU). This means that data and +more specifically arrays must be copied manually from the host to the device +(see device::device_memcpy). + +@attention In some cases, data transfer via `device_memcpy` is avoidable. Neko +has some device math functions implemented that operate directly on device +arrays. If you can decompose whatever operations you are performing in a user +function into a set of instructions from the `math` module (e.g. `cadd`, +`cfill`, `sub2`, ...), you may use the corresponding `device_math` functions to +[offload work to the GPU](@ref accelerators_offload-work). See the +[fluid forcing code snippet](@ref user-file_user-f) for a simple example. For +more advanced examples, see the +[rayleigh-benard example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard/rayleigh.f90#L96-119) +or the +[tgv example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/tgv/tgv.f90#L146-172). + +To illustrate this, let us have a look at the +[fluid initial condition code snippet](@ref user-file_user-ic): + +```.f90 + + !> Set the advecting velocity field. + subroutine set_velocity(u, v, w, p, params) + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: x, y + + ! + ! 1. Set the initial condition in fields u%x, v%x, w%x + ! + do i = 1, u%dof%size() + x = u%dof%x(i,1,1,1) + y = u%dof%y(i,1,1,1) + + ! Angular velocity is pi, giving a full rotation in 2 sec + u%x(i,1,1,1) = -y*pi + v%x(i,1,1,1) = x*pi + w%x(i,1,1,1) = 0 + end do + + ! + ! 2. Copy the data set in u%x, v%x, w%x to the device arrays + ! u%x_d, v%x_d, w%x_d. + ! + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine set_velocity + +``` + +The code above is used to set the fluid initial condition, by specifying the +values of fields `u,v,w` (and `p`) at all points in the domain. Notice that we +have divided the above code into two parts. + +In the first part, we set the velocity components `u=-y*pi*`, `v=x*pi*`, and +`w=0`, which updates the velocity field arrays `u%%x, v%%x, w%%x` allocated on +the **host (CPU)**. If we were to run on GPUs, these lines of code would only +act on the velocity arrays on the host (CPU), leaving the device (GPU) arrays +untouched. + +We take care of this in the second part, for all three velocity arrays. To +update the device (GPU) arrays, we use `device_memcpy` to copy the data +contained in a host (CPU) array to a device (GPU) array. Looking at the details +of the `device_memcpy` calls, we note the following: +- Device arrays are refered to by appending the suffix `_d` to the host array + variable name (e.g. `u%%x` and `u%%x_d`). This is the standard in Neko. +- We specify the direction of the data movement with the flag `HOST_TO_DEVICE`. + Other flags can also be used to move data from device to host + (`DEVICE_TO_HOST`) or device to device (`DEVICE_TO_DEVICE`). See the + [accelerators page](@ref accelerators_data-transfer) for more details on this. +- The `sync` argument is a non-optional argument which dictates wether or not to + perform the data transfer synchronously. + +@attention Use asynchronous data transfers at your own risk! If you are unsure, +use `sync = .true.` as a starting point. + +Finally, observe that we use the flag `NEKO_BCKND_DEVICE` to check if we are +indeed running on GPUs. In that case, `NEKO_BCKND_DEVICE` would be equal to 1. + +### Registries {#user-file_tips_registries} + +Neko uses the concept of `registry` as a practical way to retrieve fields and +point zones anywhere in the user file. + +The field registry `neko_field_registry` is often used in user functions where +certain fields are not directly accessible as arguments. One can retrieve any +field in the registry by its `name` with `neko_field_registry%%get_field(name)`. +Default fields that are added to the registry are `u,v,w,p` and `s` if running +with the scalar enabled. For a practical example of usage, see the +[rayleigh benard example](https://github.com/ExtremeFLOW/neko/blob/49925b7a04a638259db3b1ddd54349ca57f5d207/examples/rayleigh-benard/rayleigh.f90#L102-L105) + +Other fields may be added to the registry by various simulation components. For +example: +- If running with `simulation_components.vorticity` enabled, the fields + `omega_x, omega_y, omega_z` will be accessible in the registry. +- If running with `simulation_components.lambda2` enabled, the field `lambda2` + will be accessible in the registry. + +@note You can add your own fields to the registry with `neko_field_registry%%add_field` (see field_registry::add_field). + +The point zone registry, `neko_point_zone_registry`, can be used to retrieve +pointers to `point_zone_t` objects defined in the case file. See +[using point zones](#point-zones_using-point-zones) for detailed instructions. diff --git a/examples/TS_channel/README.md b/examples/TS_channel/README.md new file mode 100644 index 00000000000..634e5c0aa05 --- /dev/null +++ b/examples/TS_channel/README.md @@ -0,0 +1,10 @@ +## Tollmien-Schlichting (TS) wave test in channel flow. +This test refers to [1]. +The temporal instability of TS wave in a channel flow at Re_c=5000 is tested in this example. +Domain Size: (x, y, z) = (2pi/1.12, 2, 2pi/2.1), where x and z are two homogeneous derections. +A recommended number of elements: (x, y, z) = (36, 36, 36) for decaying case and (16, 16, 16) for transitional case. +A 2D (k_z=0) TS wave and two 3D (k_z=\pm 1) oblique TS waves are settled up as the initial condition in a channel with 1 period on streamwise and spanwise direction. +One could use contrib/map_to_equidistant_1d twice to interpolate the fields onto a mesh whose grid points are equidistant in x- and z- directions. And then Fourier transform could be performed to get the amplitude of the TS wave. + +Reference: +[1] Schlatter, P.C., 2005. Large-eddy simulation of transition and turbulence in wall-bounded shear flow (Doctoral dissertation, ETH Zurich). diff --git a/examples/TS_channel/TS_channel.case b/examples/TS_channel/TS_channel.case new file mode 100644 index 00000000000..10d35027374 --- /dev/null +++ b/examples/TS_channel/TS_channel.case @@ -0,0 +1,49 @@ +{ +"version": 1.0, +"case": { + "mesh_file": "box.nmsh", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "output_precision": "double", + "checkpoint_control": "simulationtime", + "checkpoint_value": 100, + "end_time": 200, + "timestep": 0.02, + "constant_cfl": 0.4, + "numerics": { + "time_order": 3, + "polynomial_order": 7, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 5000, + "initial_condition": { + "type": "user" + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-5, + "max_iterations": 800 + }, + "flow_rate_force": { + "direction": 1, + "value": 0.66666666666666666666, + "use_averaged_flow": true + }, + "boundary_types": ["", "", "w", "w"], + "output_control": "simulationtime", + "output_value": 1 + } +} +} \ No newline at end of file diff --git a/examples/TS_channel/TS_channel.f90 b/examples/TS_channel/TS_channel.f90 new file mode 100644 index 00000000000..c5642468978 --- /dev/null +++ b/examples/TS_channel/TS_channel.f90 @@ -0,0 +1,312 @@ +module user + use neko + use ieee_arithmetic, only: ieee_is_nan + + implicit none + + integer, parameter :: num_rows = 1500 ! resolution of TS wave in Cheb points + integer, parameter :: num_columns = 7 ! y, real and imag part of TS wave in Cheb points + integer, parameter :: num_ygll = 8*8 ! number of GLL points in the y direction + real (kind=rp) :: Re = 5000.0_rp + real (kind=rp), parameter :: pi_rp = 4.0_rp * atan (1.0_rp) +contains + + ! Register user defined functions (see user_intf.f90) + subroutine user_setup(usr) + type(user_t), intent(inout) :: usr + + usr%user_mesh_setup => user_mesh_scale + usr%fluid_user_ic => user_ic + + end subroutine user_setup + + + ! Rescale mesh + ! Original mesh size: (2.0, 2.0, 1.0). + ! New mesh can easily be genreated with genmeshbox. + subroutine user_mesh_scale(msh) + type(mesh_t), intent(inout) :: msh + integer :: i + + do i = 1, size(msh%points) + msh%points(i)%x(1) = pi_rp*msh%points(i)%x(1)/1.12_rp + msh%points(i)%x(3) = pi_rp*msh%points(i)%x(3)*2.0_rp/2.1_rp + end do + + end subroutine user_mesh_scale + + ! User defined initial condition + subroutine user_ic(u, v, w, p, params) + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: params + integer :: i, j, i_y + real(kind=rp) :: uvw(3) + + real(kind=rp), dimension(num_rows, num_columns):: data_mode_cheb_2D + real(kind=rp), dimension(num_rows, num_columns):: data_mode_cheb_3D + real(kind=rp) :: y_GLC(num_rows) + real(kind=rp) :: TS2D_GLC(num_rows,num_columns-1) + real(kind=rp) :: TS3D_GLC(num_rows,num_columns-1) + integer :: ios + + real(kind=rp) :: x_fixed, z_fixed + real(kind=rp), dimension(num_ygll) :: y_GLL + real(kind=rp) :: TS2D_GLL(num_ygll,num_columns-1) + real(kind=rp) :: TS3D_GLL(num_ygll,num_columns-1) + + real(kind=rp) :: ur_2D, ui_2D, vr_2D, vi_2D + real(kind=rp) :: ur_3D, ui_3D, vr_3D, vi_3D, wr_3D, wi_3D + + ! data reading + open(unit=10, file='TSwave_cheb_2D.csv', status='old', action='read', iostat=ios) + if (ios /= 0) then + print *, "2D TS wave: Error opening the file!" + stop + end if + do i = 1, num_rows + read(10,*) (data_mode_cheb_2D(i, j), j = 1, num_columns) + end do + close(10) + y_GLC = data_mode_cheb_2D(:,1) + TS2D_GLC = data_mode_cheb_2D(:,2:num_columns) + + open(unit=10, file='TSwave_cheb_3D.csv', status='old', action='read', iostat=ios) + if (ios /= 0) then + print *, "3D TS wave: Error opening the file!" + stop + end if + do i = 1, num_rows + read(10,*) (data_mode_cheb_3D(i, j), j = 1, num_columns) + end do + close(10) + TS3D_GLC = data_mode_cheb_3D(:,2:num_columns) + + ! alternative of point zone + i_y = 1 + x_fixed = u%dof%x(1,1,1,1) + z_fixed = u%dof%z(1,1,1,1) + + ! initialize y_GLL + do i = 1, num_ygll + y_GLL(i) = 0.0 + end do + + do i = 1, u%dof%size() + if (.not. in_array(u%dof%y(i,1,1,1),y_GLL)) then + y_GLL(i_y) = u%dof%y(i,1,1,1) + i_y = i_y +1 + end if + end do + + do i = 1, num_columns-1 + do j = 1, num_ygll + TS2D_GLL(j,i) = GLC_GLL_interp(TS2D_GLC(:,i),y_GLC,y_GLL(j)) + TS3D_GLL(j,i) = GLC_GLL_interp(TS3D_GLC(:,i),y_GLC,y_GLL(j)) + end do + end do + + do i = 1, u%dof%size() + ur_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,1)) + ui_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,2)) + vr_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,3)) + vi_2D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS2D_GLL(:,4)) + ur_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,1)) + ui_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,2)) + vr_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,3)) + vi_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,4)) + wr_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,5)) + wi_3D = pick_pt(u%dof%y(i,1,1,1),y_GLL,TS3D_GLL(:,6)) + uvw = channel_ic(u%dof%x(i,1,1,1),u%dof%y(i,1,1,1),u%dof%z(i,1,1,1), & + ur_2D,ui_2D,vr_2D,vi_2D, & + ur_3D,ui_3D,vr_3D,vi_3D,wr_3D,wi_3D) + u%x(i,1,1,1) = uvw(1) + v%x(i,1,1,1) = uvw(2) + w%x(i,1,1,1) = uvw(3) + end do + + end subroutine user_ic + + function in_array(y,y_list) result(is_in) + logical :: is_in + real(kind=rp) :: y, y_list(num_ygll) + integer :: i + real(kind=rp) :: tol + + tol = 1e-7_rp + is_in = .false. + + do i = 1, num_ygll + if (abs(y-y_list(i)).le.tol) then + is_in = .true. + exit + end if + end do + + end function in_array + + function pick_pt(y_target,y_source,Pt_source) result(Pt_target) + real(kind=rp) :: y_target, Pt_target + real(kind=rp), dimension(num_ygll) :: y_source, Pt_source + real(kind=rp) :: tol + integer :: i + logical :: found + + tol = 1e-14_rp + found = .false. + do i = 1, num_ygll + if (abs(y_target-y_source(i)) .le. tol) then + Pt_target = Pt_source(i) + found = .true. + exit + end if + end do + + if (.not. found) then + print *, 'tolerence too small for picking points!!!!!!' + end if + + end function pick_pt + + function channel_ic(x, y, z, & + ur_2D,ui_2D,vr_2D,vi_2D, & + ur_3D,ui_3D,vr_3D,vi_3D,wr_3D,wi_3D) result(uvw) + real(kind=rp) :: x, y, z + real(kind=rp) :: uvw(3) + real(kind=rp) :: ub + + real(kind=rp) :: ur_2D, ui_2D, vr_2D, vi_2D + real(kind=rp) :: ur_3D, ui_3D, vr_3D, vi_3D, wr_3D, wi_3D + real(kind=rp) :: TS_amp_2D, TS_amp_3D, alpha, beta + + complex(kind=rp) :: spa_osci_2D + complex(kind=rp) :: u_mode_2D, v_mode_2D + real(kind=rp) :: u_pert_TS_2D, v_pert_TS_2D + + complex(kind=rp) :: u_mode_3D, v_mode_3D, w_mode_3D + complex(kind=rp) :: spa_osci_3D_p, spa_osci_3D_n + real(kind=rp) :: u_pert_TS_3D, v_pert_TS_3D, w_pert_TS_3D + + ! amplitude for decaying TS wave: + ! TS_amp_2D = 1e-6_rp + ! TS_amp_3D = 0.0_rp + + ! amplitude for secondary instability TS wave: + TS_amp_2D = 3e-2_rp + TS_amp_3D = 1e-4_rp + + alpha = 1.12_rp + beta = 2.1_rp + + spa_osci_2D = exp((0.0_rp,1.0_rp)*alpha*x) + u_mode_2D = cmplx(ur_2D,ui_2D,rp) + v_mode_2D = cmplx(vr_2D,vi_2D,rp) + + u_pert_TS_2D = TS_amp_2D*real(u_mode_2D*spa_osci_2D,rp) + v_pert_TS_2D = TS_amp_2D*real(v_mode_2D*spa_osci_2D,rp) + + spa_osci_3D_p = exp((0.0_rp,1.0_rp)*alpha*x+(0.0_rp,1.0_rp)*beta*z) + spa_osci_3D_n = exp((0.0_rp,1.0_rp)*alpha*x-(0.0_rp,1.0_rp)*beta*z) + u_mode_3D = cmplx(ur_3D,ui_3D,rp) + v_mode_3D = cmplx(vr_3D,vi_3D,rp) + w_mode_3D = cmplx(wr_3D,wi_3D,rp) + + u_pert_TS_3D = TS_amp_3D*real(u_mode_3D*spa_osci_3D_p,rp) + u_pert_TS_3D = u_pert_TS_3D + TS_amp_3D*real(u_mode_3D*spa_osci_3D_n,rp) + v_pert_TS_3D = TS_amp_3D*real(v_mode_3D*spa_osci_3D_p,rp) + v_pert_TS_3D = v_pert_TS_3D + TS_amp_3D*real(v_mode_3D*spa_osci_3D_n,rp) + w_pert_TS_3D = TS_amp_3D*real(w_mode_3D*spa_osci_3D_p,rp) + w_pert_TS_3D = w_pert_TS_3D + TS_amp_3D*real(-w_mode_3D*spa_osci_3D_n,rp) + + ub = 1.0_rp-y*y + + uvw(1) = ub + u_pert_TS_2D + u_pert_TS_3D + uvw(2) = v_pert_TS_2D + v_pert_TS_3D + uvw(3) = w_pert_TS_3D + + end function channel_ic + + ! The raw data in .csv file is on GLC points, + ! hence a interpolation from GLC points to GLL points is performed. + function GLC_GLL_interp(f_GLC,x_GLC,x_GLL) result(f_GLL) + real(kind=rp) :: x_GLL, f_GLL + real(kind=rp), dimension(num_rows) :: f_GLC, x_GLC + real(kind=rp), dimension(num_rows) :: chi, psi + real(kind=rp) :: xn, xs, xe, xnc, dchebyshev + integer :: i, N, cj + logical :: is_nan + + N = num_rows - 1 + xn = x_GLL + xs = x_GLC(1) + xe = x_GLC(N+1) + xnc = ( 2.0_rp*xn - (xs+xe) )/ & + ( xe - xs) + dchebyshev = eval_dchebyshev(xnc,N) + do i = 1, N+1 + chi(i) = ( 2.0_rp*x_GLC(i) - (xs+xe) )/ & + ( xe - xs) + + if (i .eq. 1) then + cj = 2.0_rp + else if (i .eq. N+1) then + cj = 2.0_rp + else + cj = 1.0_rp + end if + + psi(i) = ((-1.0_rp)**(i) * (1.0_rp-xnc*xnc) * dchebyshev)/ & + (cj * N*N * (xnc-chi(i))) + + is_nan = ieee_is_nan(psi(i)) + if (is_nan .or. psi(i) .gt. 1e+3) then + psi(i) = 1.0_rp + end if + + end do + + f_GLL = 0.0_rp + do i = 1, N+1 + f_GLL = f_GLL + psi(i)*f_GLC(i) + end do + + end function GLC_GLL_interp + + ! Evaluate the derivative of the Chebyshev polynomials of the first kind + function eval_dchebyshev(x,N) result(dT_Nx) + real(kind=rp) :: x, dT_Nx, tmp + integer :: N, j, nn ! N should be num_rows - 1 + real(kind=rp), dimension(N+1) :: dT + + dT(1) = 0.0_rp + dT(2) = 1.0_rp + + do j = 3, N+1 + nn = j - 2 + tmp = eval_chebyshev(x,nn) + if (nn .eq. 1) then + dT(j) = 2.0_rp*(nn+1.0_rp)*tmp + else + dT(j) = 2.0_rp*(nn+1.0_rp)*tmp + (nn+1.0_rp)/(nn-1.0_rp)*dT(j-2) + end if + end do + + dT_Nx = dT(N+1) + end function eval_dchebyshev + + ! Evaluate the Chebyshev polynomials of the first kind + function eval_chebyshev(x,N) result(T_Nx) + real(kind=rp) :: x, T_Nx + integer :: N, j ! N should be num_rows - 1 + real(kind=rp), dimension(N+1) :: T + T(1) = 1.0_rp + T(2) = x + do j = 3, N+1 + T(j) = 2.0_rp*x*T(j-1) - T(j-2) + end do + T_Nx = T(N+1) + end function eval_chebyshev + +end module user diff --git a/examples/TS_channel/TSwave_cheb_2D.csv b/examples/TS_channel/TSwave_cheb_2D.csv new file mode 100644 index 00000000000..c6f3065c2d1 --- /dev/null +++ b/examples/TS_channel/TSwave_cheb_2D.csv @@ -0,0 +1,1500 @@ +-1.000000000000000000e+00,4.584758335742073160e-09,-6.605710750344088278e-09,-0.000000000000000000e+00,-0.000000000000000000e+00,-0.000000000000000000e+00,-0.000000000000000000e+00 +-9.999978038281271964e-01,-4.015715757417417654e-05,4.569989657188079451e-05,5.619723763318393232e-11,4.938177670084886477e-11,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.999912153221552913e-01,-1.606457918596924050e-04,1.827882689522557561e-04,8.992473093557475700e-10,7.902535884006655202e-10,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.999802345110231361e-01,-3.614719394510981679e-04,4.111652352468359657e-04,4.551750322530005835e-09,4.000872470589427502e-09,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.999648614429623716e-01,-6.426530441740362564e-04,7.306753622148200612e-04,1.438212443575207382e-08,1.264525925619368440e-08,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.999450961854965403e-01,-1.004213472097001576e-03,1.141101164334928746e-03,3.510073438960935335e-08,3.087358290910671271e-08,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.999209388254417519e-01,-1.446184279505558770e-03,1.642163282546146722e-03,7.275447708838917253e-08,6.402269644946420233e-08,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.998923894689052405e-01,-1.968603028368624917e-03,2.233520585655198072e-03,1.347198280147815615e-07,1.186168143886666890e-07,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.998594482412856976e-01,-2.571513590167696928e-03,2.914770424059085277e-03,2.296943538311653487e-07,2.023683297837774581e-07,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.998221152872722728e-01,-3.254965803044406868e-03,3.685448874605150912e-03,3.676867012547371414e-07,3.241787592500070484e-07,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.997803907708440185e-01,-4.019015250155398913e-03,4.545030984689757934e-03,5.600050670667448155e-07,4.941398079966697259e-07,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.997342748752695574e-01,-4.863722874374213669e-03,5.492931154011141022e-03,8.192441633677707300e-07,7.235345479748266059e-07,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.996837678031056384e-01,-5.789154538022578227e-03,6.528503416356815081e-03,1.159270685870362062e-06,1.024838687943126465e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.996288697761966935e-01,-6.795380694260139999e-03,7.651041913961972862e-03,1.595207300688258317e-06,1.411721933535674662e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.995695810356738376e-01,-7.882475804836575756e-03,8.859781267889960815e-03,2.143415162548993643e-06,1.899049411149232100e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.995059018419534258e-01,-9.050517988285667823e-03,1.015389708960214542e-02,2.821474980010050348e-06,2.502883152689057148e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.994378324747364983e-01,-1.029958830017454509e-02,1.153250644820765121e-02,3.648166643228454403e-06,3.240483609283829179e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.993653732330070039e-01,-1.162977037689127621e-02,1.299466849679906978e-02,4.643447436725973307e-06,4.130311192300953368e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.992885244350309115e-01,-1.304114956475402320e-02,1.453938493992837540e-02,5.828428851008756323e-06,5.192027803534321687e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.992072864183545455e-01,-1.453381257942183181e-02,1.616560076609400640e-02,7.225352016233520099e-06,6.446498353873764760e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.991216595398032529e-01,-1.610784643491034590e-02,1.787220480932491182e-02,8.857561781682559340e-06,7.915792225669524219e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.990316441754796273e-01,-1.776333832410204386e-02,1.965803053563370401e-02,1.074947946040248018e-05,9.623184683296080429e-06,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.989372407207620652e-01,-1.950037429246448059e-02,2.152185671087786592e-02,1.292657427116070628e-05,1.159315819827774761e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.988384495903031013e-01,-2.131903883009049483e-02,2.346240814931219165e-02,1.541533348996891026e-05,1.385140363593812098e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.987352712180274095e-01,-2.321941403472903287e-02,2.547835659641024472e-02,1.824323135229658467e-05,1.642482133433034830e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.986277060571295827e-01,-2.520157880929160474e-02,2.756832152734582653e-02,2.143869673244703586e-05,1.934152200916413204e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.985157545800730228e-01,-2.726560783720866077e-02,2.973087096329946055e-02,2.503107961704672488e-05,2.263082745833955740e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.983994172785870536e-01,-2.941157105189550966e-02,3.196452251818065332e-02,2.905061641899702067e-05,2.632327105685760822e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.982786946636650338e-01,-3.163953246180171985e-02,3.426774429667543004e-02,3.352839416655188442e-05,3.045059801067757604e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.981535872655622477e-01,-3.394954960057282312e-02,3.663895576994501135e-02,3.849631357931886176e-05,3.504576533468219926e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.980240956337933511e-01,-3.634167222606450587e-02,3.907652894547419126e-02,4.398705107903873513e-05,4.014294153755993625e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.978902203371300406e-01,-3.881594170390346987e-02,4.157878942943630390e-02,5.003401979358865909e-05,4.577750595021980233e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.977519619635988324e-01,-4.137238998905035103e-02,4.414401729483752312e-02,5.667132954231383381e-05,5.198604773628320158e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.976093211204778433e-01,-4.401103862350966983e-02,4.677044843979915462e-02,6.393374587127469534e-05,5.880636448366306497e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.974622984342947918e-01,-4.673189772445730805e-02,4.945627575114016489e-02,7.185664819808252777e-05,6.627746038074531544e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.973108945508237788e-01,-4.953496528103580598e-02,5.219965013999435660e-02,8.047598707080369181e-05,7.443954395257284119e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.971551101350828450e-01,-5.242022596938809148e-02,5.499868178174252553e-02,8.982824057399080373e-05,8.333402534199321884e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.969949458713304180e-01,-5.538765022001540528e-02,5.785144152347981850e-02,9.995036997267215805e-05,9.300351304743627139e-05,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.968304024630633142e-01,-5.843719329740406504e-02,6.075596200935324404e-02,1.108797745994818669e-04,1.034918101573731021e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.966614806330126308e-01,-6.156879422721388273e-02,6.371023917151931448e-02,1.226542460476181065e-04,1.148439100017481073e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.964881811231409703e-01,-6.478237494204601432e-02,6.671223341532400708e-02,1.353119217092943027e-04,1.271059912209631347e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.963105046946395538e-01,-6.807783932561203843e-02,6.975987104507169745e-02,1.488912376911875684e-04,1.403254122313376162e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.961284521279242243e-01,-7.145507211111129520e-02,7.285104572099709852e-02,1.634308811763625889e-04,1.545507050284642282e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.959420242226324493e-01,-7.491393810197208292e-02,7.598362000669420246e-02,1.789697422992080902e-04,1.698315683201930785e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.957512217976196567e-01,-7.845428116555669407e-02,7.915542658579444169e-02,1.955468655357937271e-04,1.862188599836089409e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.955560456909556821e-01,-8.207592309034392186e-02,8.236426985397188316e-02,2.132014006658221827e-04,2.037645887616183120e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.953564967599211055e-01,-8.577866295579579525e-02,8.560792746733915382e-02,2.319725533812533856e-04,2.225219052053675783e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.951525758810034761e-01,-8.956227636818504811e-02,8.888415207032281107e-02,2.518995356229655042e-04,2.425450919338398162e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.949442839498933155e-01,-9.342651399065823870e-02,9.219067241228171006e-02,2.730215156161921856e-04,2.638895530093001438e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.947316218814804545e-01,-9.737110132830022191e-02,9.552519547648959108e-02,2.953775677005182555e-04,2.866118025191999492e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.945145906098497024e-01,-1.013957374318578131e-01,9.888540779287195714e-02,3.190066220755136475e-04,3.107694523657559844e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.942931910882769619e-01,-1.055000943230918337e-01,1.022689769401261828e-01,3.439474143428407371e-04,3.364211991378266678e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.940674242892252321e-01,-1.096838161361816666e-01,1.056735535357281458e-01,3.702384350650712646e-04,3.636268101790013385e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.938372912043398344e-01,-1.139465182837329227e-01,1.090967727844612128e-01,3.979178793506877709e-04,3.924471087114223835e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.936027928444443047e-01,-1.182877869443615254e-01,1.125362561899469760e-01,4.270235964901448827e-04,4.229439581050497307e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.933639302395360637e-01,-1.227071780735831535e-01,1.159896131785476125e-01,4.575930397122510195e-04,4.551802452230696767e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.931207044387818650e-01,-1.272042167891999642e-01,1.194544428253631607e-01,4.896632160889939113e-04,4.892198627830635733e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.928731165105130207e-01,-1.317783968969926112e-01,1.229283358444472479e-01,5.232706367262008945e-04,5.251276908533830884e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.926211675422206282e-01,-1.364291799805502914e-01,1.264088760940936251e-01,5.584512672383153088e-04,5.629695773374848635e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.923648586405513505e-01,-1.411559954026006536e-01,1.298936424203705753e-01,5.952404785394235006e-04,6.028123175807622398e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.921041909313020879e-01,-1.459582389859540030e-01,1.333802103630307434e-01,6.336729980400307742e-04,6.447236329761612183e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.918391655594147593e-01,-1.508352732500017612e-01,1.368661541640279200e-01,6.737828613214641785e-04,6.887721485638292462e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.915697836889719730e-01,-1.557864265573697671e-01,1.403490483366521269e-01,7.156033643362985294e-04,7.350273697909135697e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.912960465031916968e-01,-1.608109926998656791e-01,1.438264695112680680e-01,7.591670161269938825e-04,7.835596581533231519e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.910179552044214857e-01,-1.659082306899832826e-01,1.472959984203584261e-01,8.045054922207073994e-04,8.344402059715324595e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.907355110141340404e-01,-1.710773643945936728e-01,1.507552215886171498e-01,8.516495887259622989e-04,8.877410101750737251e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.904487151729213235e-01,-1.763175821772061946e-01,1.542017329057207009e-01,9.006291770727780028e-04,9.435348451488133423e-04,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.901575689404893410e-01,-1.816280367207549762e-01,1.576331359401051957e-01,9.514731596120402620e-04,1.001895234588507950e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.898620735956522587e-01,-1.870078448854993747e-01,1.610470452065719538e-01,1.004209426055420159e-03,1.062896422471532390e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.895622304363274058e-01,-1.924560875666547766e-01,1.644410883607708984e-01,1.058864810744415035e-03,1.126613343075429444e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.892580407795287245e-01,-1.979718094019875807e-01,1.678129078258102924e-01,1.115465050963754213e-03,1.193121590076127178e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.889495059613619965e-01,-2.035540190047723241e-01,1.711601626720931080e-01,1.174034746176056288e-03,1.262497384701687184e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.886366273370181812e-01,-2.092016891019843061e-01,1.744805303087682835e-01,1.234597318362810328e-03,1.334817543156801081e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.883194062807676428e-01,-2.149137560948944370e-01,1.777717083051130742e-01,1.297174973399350170e-03,1.410159443040083591e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.879978441859542659e-01,-2.206891205593774496e-01,1.810314161389475485e-01,1.361788663636867309e-03,1.488600988937754466e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.876719424649894608e-01,-2.265266470915031360e-01,1.842573968625139946e-01,1.428458051613333904e-03,1.570220577195734126e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.873417025493456123e-01,-2.324251650058182239e-01,1.874474189446555805e-01,1.497201475037026512e-03,1.655097059908537237e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.870071258895497524e-01,-2.383834683021361645e-01,1.905992781423443694e-01,1.568035913096781087e-03,1.743309708225850322e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.866682139551777864e-01,-2.444003159238797018e-01,1.937107986896731138e-01,1.640976954054720362e-03,1.834938174770612629e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.863249682348473879e-01,-2.504744322397058354e-01,1.967798354245550718e-01,1.716038764132165771e-03,1.930062455431996553e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.859773902362116704e-01,-2.566045072075279765e-01,1.998042752656677690e-01,1.793234057970977948e-03,2.028762850373290374e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.856254814859526370e-01,-2.627891975230010613e-01,2.027820388808080077e-01,1.872574070387421049e-03,2.131119924521994446e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.852692435297746298e-01,-2.690271263838397431e-01,2.057110822591804444e-01,1.954068529762595114e-03,2.237214467450008779e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.849086779323972252e-01,-2.753168843664817733e-01,2.085893980686171212e-01,2.037725632742173974e-03,2.347127452480931263e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.845437862775482385e-01,-2.816570304467751473e-01,2.114150178148847259e-01,2.123552020739971711e-03,2.460939995541131398e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.841745701679575076e-01,-2.880460917291755574e-01,2.141860125249895608e-01,2.211552757861768876e-03,2.578733313301258354e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.838010312253490097e-01,-2.944825651791651566e-01,2.169004949282093975e-01,2.301731310499186261e-03,2.700588680858049921e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.834231710904342005e-01,-3.009649178691596871e-01,2.195566201585230914e-01,2.394089528526981148e-03,2.826587389237281058e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.830409914229045754e-01,-3.074915875216546191e-01,2.221525880761713312e-01,2.488627628204762281e-03,2.956810702158182980e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.826544939014246749e-01,-3.140609839837957828e-01,2.246866439068208599e-01,2.585344176892092824e-03,3.091339812706609281e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.822636802236245357e-01,-3.206714897900287409e-01,2.271570798744191322e-01,2.684236079275773446e-03,3.230255799666643685e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.818685521060921406e-01,-3.273214616110995401e-01,2.295622363264834820e-01,2.785298565500161742e-03,3.373639583749555530e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.814691112843658694e-01,-3.340092302250968204e-01,2.319005030440808857e-01,2.888525180913495850e-03,3.521571883456135754e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.810653595129272819e-01,-3.407331022658136588e-01,2.341703205942564092e-01,2.993907777754947763e-03,3.674133170705736005e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.806572985651929031e-01,-3.474913612606537194e-01,2.363701811889792337e-01,3.101436508518272579e-03,3.831403626588281847e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.802449302335066728e-01,-3.542822690283070619e-01,2.384986299514322838e-01,3.211099821129807623e-03,3.993463097101128401e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.798282563291319525e-01,-3.611040661069046531e-01,2.405542660469523752e-01,3.322884455967361865e-03,4.160391048826990457e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.794072786822439758e-01,-3.679549734420433649e-01,2.425357437220989620e-01,3.436775444762498518e-03,4.332266524535395569e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.789819991419210776e-01,-3.748331933761483992e-01,2.444417729841460163e-01,3.552756111236366268e-03,4.509168099095075058e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.785524195761373667e-01,-3.817369109830798246e-01,2.462711205601711972e-01,3.670808073513449898e-03,4.691173835360985984e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.781185418717538438e-01,-3.886642955625760343e-01,2.480226110446334120e-01,3.790911248436659373e-03,4.878361240506193415e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.776803679345105191e-01,-3.956135014366701230e-01,2.496951275995739905e-01,3.913043857818200751e-03,5.070807222417976490e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.772378996890179748e-01,-4.025826701903341132e-01,2.512876123050816202e-01,4.037182436300036301e-03,5.268588046569175863e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.767911390787489267e-01,-4.095699311689335320e-01,2.527990675651267671e-01,4.163301841153165678e-03,5.471779293356328344e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.763400880660293435e-01,-4.165734028771165831e-01,2.542285562660634990e-01,4.291375263997976308e-03,5.680455815438764308e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.758847486320304521e-01,-4.235911951298248490e-01,2.555752022349048747e-01,4.421374244055587856e-03,5.894691695989352602e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.754251227767596344e-01,-4.306214093513427432e-01,2.568381913083817025e-01,4.553268683372937303e-03,6.114560207206808790e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.749612125190515455e-01,-4.376621415584195374e-01,2.580167714003854251e-01,4.687026863873674370e-03,6.340133769545601254e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.744930198965594537e-01,-4.447114826351561412e-01,2.591102527272519107e-01,4.822615465911802776e-03,6.571483911832372987e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.740205469657464699e-01,-4.517675200563890936e-01,2.601180089233641746e-01,4.959999588787148035e-03,6.808681231535170832e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.735437958018759996e-01,-4.588283395016230504e-01,2.610394765758605184e-01,5.099142773005719415e-03,7.051795356165829927e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.730627684990029724e-01,-4.658920261455591905e-01,2.618741557703765932e-01,5.240007023959882340e-03,7.300894905187480140e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.725774671699647378e-01,-4.729566666574972489e-01,2.626216104133639040e-01,5.382552837589853027e-03,7.556047453045303293e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.720878939463715174e-01,-4.800203500736582862e-01,2.632814679061648566e-01,5.526739227416144207e-03,7.817319492901290620e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.715940509785973012e-01,-4.870811696499460774e-01,2.638534198759184823e-01,5.672523753351691421e-03,8.084776401270189067e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.710959404357700775e-01,-4.941372243366404238e-01,2.643372215900573474e-01,5.819862552086398598e-03,8.358482403694609403e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.705935645057625072e-01,-5.011866200484255129e-01,2.647326921240029662e-01,5.968710368854074355e-03,8.638500541394186771e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.700869253951824867e-01,-5.082274715185780556e-01,2.650397143936237820e-01,6.119020590868619845e-03,8.924892638883557411e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.695760253293631559e-01,-5.152579038659700350e-01,2.652582348406193891e-01,6.270745282206197091e-03,9.217719273026057578e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.690608665523531284e-01,-5.222760532651549159e-01,2.653882632436194711e-01,6.423835220051444216e-03,9.517039742754548545e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.685414513269068326e-01,-5.292800696132888838e-01,2.654298726059474700e-01,6.578239932443621929e-03,9.822912040239724363e-03,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.680177819344745194e-01,-5.362681169858526387e-01,2.653831985442586250e-01,6.733907737298169480e-03,1.013539282330076265e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.674898606751921593e-01,-5.432383754198074621e-01,2.652484389464386805e-01,6.890785782677637944e-03,1.045453738877777372e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.669576898678714505e-01,-5.501890422648226808e-01,2.650258536243137453e-01,7.048820088319364754e-03,1.078039964731325982e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.664212718499893828e-01,-5.571183340424930019e-01,2.647157641608571810e-01,7.207955588590159515e-03,1.111303209954987367e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.658806089776785786e-01,-5.640244868867303474e-01,2.643185526248835737e-01,7.368136176424852982e-03,1.145248581350641684e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.653357036257157464e-01,-5.709057589804829025e-01,2.638346615784758487e-01,7.529304748307066170e-03,1.179881040324115386e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.647865581875129104e-01,-5.777604312189714708e-01,2.632645931977857234e-01,7.691403250529520175e-03,1.215205400926326618e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.642331750751055308e-01,-5.845868083927304726e-01,2.626089087570533387e-01,7.854372726343230443e-03,1.251226327979241583e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.636755567191421790e-01,-5.913832205889862914e-01,2.618682275891214273e-01,8.018153364144520179e-03,1.287948335367388071e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.631137055688746562e-01,-5.981480250143095256e-01,2.610432265332075508e-01,8.182684546412419616e-03,1.325375784474308941e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.625476240921463367e-01,-6.048796065098318575e-01,2.601346391111887812e-01,8.347904899750339947e-03,1.363512882792339981e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.619773147753815090e-01,-6.115763784829398864e-01,2.591432546765214950e-01,8.513752345562602269e-03,1.402363682599087370e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.614027801235747184e-01,-6.182367853219902232e-01,2.580699175359055797e-01,8.680164151719498705e-03,1.441932079867616623e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.608240226602797751e-01,-6.248593013696038811e-01,2.569155251835966713e-01,8.847076984438173933e-03,1.482221813257874471e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.602410449275984305e-01,-6.314424345207434452e-01,2.556810287005907290e-01,9.014426961095689875e-03,1.523236463277133704e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.596538494861691637e-01,-6.379847250419435900e-01,2.543674309267998646e-01,9.182149703953042252e-03,1.564979451634458674e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.590624389151560791e-01,-6.444847471572411335e-01,2.529757851820551018e-01,9.350180393827714834e-03,1.607454040620293068e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.584668158122380266e-01,-6.509411113552638906e-01,2.515071947045323664e-01,9.518453824537385921e-03,1.650663332779059647e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.578669827935962777e-01,-6.573524630231112331e-01,2.499628111368637218e-01,9.686904457767563092e-03,1.694610270649303529e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.572629424939035347e-01,-6.637174849251269437e-01,2.483438334147624327e-01,9.855466478140352191e-03,1.739297636640309633e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.566546975663126062e-01,-6.700348970136877913e-01,2.466515069542286054e-01,1.002407384880701913e-02,1.784728053097698122e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.560422506824440836e-01,-6.763034572563005886e-01,2.448871218835259878e-01,1.019266036724367740e-02,1.830903982449505474e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.554256045323753499e-01,-6.825219630261748494e-01,2.430520120385311900e-01,1.036115972103395302e-02,1.877827727551320547e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.548047618246282564e-01,-6.886892511136515349e-01,2.411475536784464524e-01,1.052950554402406515e-02,1.925501432165602558e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.541797252861573542e-01,-6.948041983130824883e-01,2.391751642765901908e-01,1.069763147243584477e-02,1.973927081553765450e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.535504976623380147e-01,-7.008657221557975658e-01,2.371363009638535468e-01,1.086547120115614537e-02,2.023106503221926145e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.529170817169542174e-01,-7.068727809090493697e-01,2.350324590497357147e-01,1.103295853958408644e-02,2.073041367787135242e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.522794802321865593e-01,-7.128243744602505982e-01,2.328651715433825709e-01,1.120002746797059585e-02,2.123733189970538476e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.516376960085998205e-01,-7.187195447699179507e-01,2.306360065492002454e-01,1.136661219331565158e-02,2.175183329752127318e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.509917318651310847e-01,-7.245573755973505614e-01,2.283465668887986466e-01,1.153264720499423690e-02,2.227392993626841830e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.503415906390768608e-01,-7.303369929263821003e-01,2.259984882616382640e-01,1.169806733051625275e-02,2.280363235960793447e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.496872751860808703e-01,-7.360575654395383749e-01,2.235934375558802090e-01,1.186280779058472915e-02,2.334094960500975036e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.490287883801215019e-01,-7.417183038408756701e-01,2.211331116990822576e-01,1.202680425381811094e-02,2.388588921964101414e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.483661331134992656e-01,-7.473184621160452457e-01,2.186192367932850988e-01,1.218999289155088886e-02,2.443845727752841715e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.476993122968236927e-01,-7.528573368323550064e-01,2.160535654559793128e-01,1.235231043183974281e-02,2.499865839813563903e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.470283288590011228e-01,-7.583342670581173506e-01,2.134378766006879702e-01,1.251369421293119059e-02,2.556649576541563176e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.463531857472214925e-01,-7.637486344059704058e-01,2.107739732198016247e-01,1.267408223663003992e-02,2.614197114809329453e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.456738859269452346e-01,-7.690998624851832499e-01,2.080636811615534376e-01,1.283341322056882547e-02,2.672508492069506844e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.449904323818909546e-01,-7.743874169339112212e-01,2.053088473949208070e-01,1.299162665014031749e-02,2.731583608529175589e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.443028281140214419e-01,-7.796108063023531320e-01,2.025113391786268480e-01,1.314866282959417654e-02,2.791422229484294415e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.436110761435310135e-01,-7.847695795125044782e-01,1.996730423259598652e-01,1.330446293297617155e-02,2.852023987685856771e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.429151795088319687e-01,-7.898633271788012555e-01,1.967958593828450786e-01,1.345896905360831688e-02,2.913388385720244375e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.422151412665417114e-01,-7.948916808338183371e-01,1.938817089718965214e-01,1.361212425333781055e-02,2.975514798582207243e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.415109644914688714e-01,-7.998543114172472768e-01,1.909325236561095762e-01,1.376387261079842217e-02,3.038402476182705189e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.408026522765996491e-01,-8.047509299206060396e-01,1.879502487006571576e-01,1.391415926869285502e-02,3.102050545960748085e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.400902077330849371e-01,-8.095812866858166368e-01,1.849368408482036930e-01,1.406293048027285747e-02,3.166458015576633506e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.393736339902262200e-01,-8.143451702802523284e-01,1.818942669409046031e-01,1.421013365515309997e-02,3.231623775629782064e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.386529341954613637e-01,-8.190424073689522855e-01,1.788245020895547766e-01,1.435571740381289853e-02,3.297546602404856364e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.379281115143518477e-01,-8.236728611300078162e-01,1.757295289530690641e-01,1.449963158134393720e-02,3.364225160665129666e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.371991691305681105e-01,-8.282364313272041478e-01,1.726113358850835389e-01,1.464182733027077910e-02,3.431658006427117102e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.364661102458753383e-01,-8.327330531195336061e-01,1.694719155595295623e-01,1.478225712203337823e-02,3.499843589841239444e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.357289380801203649e-01,-8.371626959884609365e-01,1.663132639197675011e-01,1.492087479759987047e-02,3.568780257985249699e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.349876558712167940e-01,-8.415253636381191393e-01,1.631373786515099567e-01,1.505763560701676301e-02,3.638466257770826134e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.342422668751307890e-01,-8.458210918488967556e-01,1.599462582424134582e-01,1.519249624786843215e-02,3.708899738765317894e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.334927743658669730e-01,-8.500499490760194954e-01,1.567419002233552960e-01,1.532541490270973583e-02,3.780078756081921287e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.327391816354543286e-01,-8.542120335774673334e-01,1.535262998693244230e-01,1.545635127487803712e-02,3.852001273217017102e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.319814919939308773e-01,-8.583074739833789213e-01,1.503014495495432756e-01,1.558526662362033285e-02,3.924665164866899397e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.312197087693302455e-01,-8.623364269410956906e-01,1.470693368878276397e-01,1.571212379793918146e-02,3.998068219760483877e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.304538353076660107e-01,-8.662990765900545620e-01,1.438319435911406818e-01,1.583688726890250878e-02,4.072208143394896035e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.296838749729178231e-01,-8.701956339062371093e-01,1.405912448825621108e-01,1.595952316111432998e-02,4.147082560828258685e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.289098311470160851e-01,-8.740263348570195268e-01,1.373492074649770023e-01,1.607999928290183150e-02,4.222689019402812299e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.281317072298272963e-01,-8.777914391341986589e-01,1.341077886541066078e-01,1.619828515471903943e-02,4.299024991396966505e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.273495066391393982e-01,-8.814912291191590032e-01,1.308689355452729852e-01,1.631435203685977983e-02,4.376087876625601308e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.265632328106458981e-01,-8.851260091805656094e-01,1.276345837212212730e-01,1.642817295569224192e-02,4.453875005034706180e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.257728891979321029e-01,-8.886961031265123712e-01,1.244066559735897554e-01,1.653972272861762433e-02,4.532383639192222313e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.249784792724585758e-01,-8.922018545428380687e-01,1.211870613920115608e-01,1.664897798747036473e-02,4.611610976710118354e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.241800065235469264e-01,-8.956436241429307499e-01,1.179776941975647364e-01,1.675591720092522996e-02,4.691554152668175881e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.233774744583637117e-01,-8.990217885121185581e-01,1.147804326902030908e-01,1.686052069510858564e-02,4.772210241806926212e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.225708866019060039e-01,-9.023367404959528049e-01,1.115971387476673293e-01,1.696277067353053197e-02,4.853576260818726062e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.217602464969846254e-01,-9.055888854147221556e-01,1.084296557524209864e-01,1.706265123503662370e-02,4.935649170466045732e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.209455577042096053e-01,-9.087786411461096225e-01,1.052798085459994565e-01,1.716014839046691581e-02,5.018425877551770803e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.201268238019744139e-01,-9.119064374212553226e-01,1.021494021364728749e-01,1.725525007825558579e-02,5.101903236938221120e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.193040483864395318e-01,-9.149727121934375651e-01,9.904022040433818619e-02,1.734794617826136004e-02,5.186078053367513402e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.184772350715175726e-01,-9.179779122657683033e-01,9.595402576785028170e-02,1.743822852417923133e-02,5.270947083121833160e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.176463874888565186e-01,-9.209224919648573149e-01,9.289255820731044666e-02,1.752609091516817846e-02,5.356507035771859715e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.168115092878250660e-01,-9.238069105094917077e-01,8.985753361440565468e-02,1.761152912518348240e-02,5.442754575651569809e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.159726041354947501e-01,-9.266316316175654544e-01,8.685064386687744997e-02,1.769454091137046339e-02,5.529686323282874405e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.151296757166257345e-01,-9.293971214663930835e-01,8.387355505072957573e-02,1.777512602078479276e-02,5.617298856634001664e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.142827277336491587e-01,-9.321038483526691287e-01,8.092790769112449822e-02,1.785328619585003704e-02,5.705588712330664225e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.134317639066518169e-01,-9.347522802895964489e-01,7.801531507067285820e-02,1.792902517853192657e-02,5.794552386685448975e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.125767879733592824e-01,-9.373428853395984195e-01,7.513736247738486695e-02,1.800234871253480334e-02,5.884186336674775836e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.117178036891198101e-01,-9.398761284947146910e-01,7.229560655024426519e-02,1.807326454414004413e-02,5.974486980760210925e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.108548148268875710e-01,-9.423524713195026159e-01,6.949157509408326883e-02,1.814178242209628714e-02,6.065450699530872819e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.099878251772063331e-01,-9.447723702628232356e-01,6.672676518059100781e-02,1.820791409564684327e-02,6.157073836250308296e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.091168385481928071e-01,-9.471362763550696950e-01,6.400264307917959139e-02,1.827167331081227739e-02,6.249352697288480807e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.082418587655194386e-01,-9.494446333596853194e-01,6.132064306908165119e-02,1.833307580542653525e-02,6.342283552451463691e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.073628896723984205e-01,-9.516978764493227994e-01,5.868216738137978228e-02,1.839213930264712096e-02,6.435862635117653441e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.064799351295638186e-01,-9.538964317027098794e-01,5.608858485267895211e-02,1.844888350319683068e-02,6.530086142271930572e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.055929990152560283e-01,-9.560407144463983764e-01,5.354123042140210981e-02,1.850333007559680151e-02,6.624950234437766705e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.047020852252026790e-01,-9.581311280318162149e-01,5.104140442801930605e-02,1.855550264525579821e-02,6.720451035385226202e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.038071976726038681e-01,-9.601680642640767482e-01,4.859037192183989634e-02,1.860542678182960236e-02,6.816584631837496744e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.029083402881132869e-01,-9.621519003585464747e-01,4.618936196158443530e-02,1.865312998517569196e-02,6.913347072987983954e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.020055170198213457e-01,-9.640829991350380324e-01,4.383956694029713524e-02,1.869864166965726909e-02,7.010734369821652878e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.010987318332384088e-01,-9.659617086674882058e-01,4.154214189869791934e-02,1.874199314694471874e-02,7.108742494442883697e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.001879887112764766e-01,-9.677883598434191548e-01,3.929820385469326438e-02,1.878321760722705819e-02,7.207367379209278502e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.992732916542326427e-01,-9.695632667487635858e-01,3.710883101883086926e-02,1.882235009873804710e-02,7.306604915680929568e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.983546446797705531e-01,-9.712867259562097955e-01,3.497506271436717079e-02,1.885942750596164216e-02,7.406450953610697885e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.974320518229038646e-01,-9.729590144880099212e-01,3.289789833574051359e-02,1.889448852641065213e-02,7.506901299643177483e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.965055171359771480e-01,-9.745803913309410982e-01,3.087829672907650355e-02,1.892757364548786764e-02,7.607951716031864653e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.955750446886495686e-01,-9.761510947561178320e-01,2.891717524058565192e-02,1.895872510967173963e-02,7.709597919214171424e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.946406385678755679e-01,-9.776713428048777743e-01,2.701540992225622689e-02,1.898798689818516697e-02,7.811835578231968169e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.937023028778884326e-01,-9.791413321579964801e-01,2.517383470863024386e-02,1.901540469377019024e-02,7.914660313073165754e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.927600417401808652e-01,-9.805612390689132374e-01,2.339324020343142019e-02,1.904102585091679151e-02,8.018067692963053161e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.918138592934876652e-01,-9.819312175209049531e-01,2.167437423794014648e-02,1.906489936309481437e-02,8.122053234574404257e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.908637596937675207e-01,-9.832513987832250946e-01,2.001794006133272505e-02,1.908707582832745719e-02,8.226612400037196238e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.899097471141842464e-01,-9.845218919421311332e-01,1.842459670003581199e-02,1.910760741258946127e-02,8.331740594908967901e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.889518257450892413e-01,-9.857427842409902663e-01,1.689495850110650857e-02,1.912654781281240488e-02,8.437433166186754374e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.879899997940023937e-01,-9.869141390153153282e-01,1.542959366481205276e-02,1.914395221716156531e-02,8.543685400128167573e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.870242734855938727e-01,-9.880359972001467872e-01,1.402902487972904760e-02,1.915987726424957932e-02,8.650492520004372798e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.860546510616654770e-01,-9.891083768242918683e-01,1.269372808718257327e-02,1.917438100082930960e-02,8.757849683874766034e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.850811367811323160e-01,-9.901312734254078540e-01,1.142413279401215120e-02,1.918752283794766464e-02,8.865751982298082645e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.841037349200038253e-01,-9.911046590975607362e-01,1.022062084070738189e-02,1.919936350566362263e-02,8.974194435964867600e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.831224497713646704e-01,-9.920284833292259963e-01,9.083526163385974642e-03,1.920996500568134327e-02,9.083171993268154831e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.821372856453569833e-01,-9.929026730872462236e-01,8.013134459657067499e-03,1.921939056275085919e-02,9.192679527882290091e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.811482468691598235e-01,-9.937271336590965953e-01,7.009683223445081941e-03,1.922770457481107545e-02,9.302711836353183550e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.801553377869717476e-01,-9.945017484298215305e-01,6.073360668783227452e-03,1.923497256151892112e-02,9.413263635660089335e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.791585627599906028e-01,-9.952263794867830526e-01,5.204305867327033326e-03,1.924126111099576722e-02,9.524329560780793458e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.781579261663950975e-01,-9.959008670133234764e-01,4.402608256257362247e-03,1.924663782554289493e-02,9.635904162161965103e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.771534324013248174e-01,-9.965250323610743699e-01,3.668307526806494252e-03,1.925117126568560641e-02,9.747981903326070896e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.761450858768620176e-01,-9.970986762475738674e-01,3.001393320079245863e-03,1.925493089310447575e-02,9.860557158516207454e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.751328910220110835e-01,-9.976215804311204804e-01,2.401804844826777984e-03,1.925798701195508347e-02,9.973624210238178633e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.741168522826797682e-01,-9.980935079451213054e-01,1.869430458793765399e-03,1.926041070864043664e-02,1.008717724692521078e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.730969741216594304e-01,-9.985142051766598970e-01,1.404108338983859044e-03,1.926227379061673170e-02,1.020121036065810560e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.720732610186056055e-01,-9.988834010229872273e-01,1.005625534215415836e-03,1.926364872417786825e-02,1.031571754497194876e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.710457174700181326e-01,-9.992008085733767864e-01,6.737179164589050763e-04,1.926460857014493377e-02,1.043069269259340043e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.700143479892213927e-01,-9.994661262498868437e-01,4.080703111188431595e-04,1.926522691888056180e-02,1.054612959339606276e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.689791571063447684e-01,-9.996790388905770763e-01,2.083171086507398177e-04,1.926557782434622570e-02,1.066202193238974666e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.679401493683025492e-01,-9.998392182680491125e-01,7.404146874101561165e-05,1.926573573748130083e-02,1.077836328782434960e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.668973293387736145e-01,-9.999463247469773108e-01,4.774925498419486896e-06,1.926577543699280540e-02,1.089514712934134960e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.658507015981822263e-01,-1.000000007751896458e+00,-1.230616374526293431e-09,1.926577196038540413e-02,1.101236681629405439e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.648002707436770464e-01,-9.999999077576271489e-01,5.914320235482091437e-05,1.926580053380841720e-02,1.113001559609651386e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.637460413891117073e-01,-9.999456578602164969e-01,1.815884074859663530e-04,1.926593650116099898e-02,1.124808660289342704e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.626880181650236068e-01,-9.998368832370252424e-01,3.666635693664287154e-04,1.926625525217290358e-02,1.136657285614096485e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.616262057186147016e-01,-9.996732054601517037e-01,6.136482335037950252e-04,1.926683214940606848e-02,1.148546725949101538e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.605606087137299687e-01,-9.994542406489989039e-01,9.217723086201802573e-04,1.926774245551599735e-02,1.160476259980227909e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.594912318308379762e-01,-9.991796039975705535e-01,1.290216455761201845e-03,1.926906125919281010e-02,1.172445154621612057e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.584180797670095675e-01,-9.988489094520610623e-01,1.718112412977101239e-03,1.927086340100733763e-02,1.184452664967527019e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.573411572358974331e-01,-9.984617699428306459e-01,2.204543109516168050e-03,1.927322339801359682e-02,1.196498034217825385e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.562604689677154601e-01,-9.980178030534325728e-01,2.748544230646353479e-03,1.927621536874077335e-02,1.208580493661883803e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.551760197092181937e-01,-9.975166275062458121e-01,3.349104117573471675e-03,1.927991295804295027e-02,1.220699262666944812e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.540878142236794091e-01,-9.969578684120418144e-01,4.005164644588799297e-03,1.928438926128141911e-02,1.232853548670468369e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.529958572908716841e-01,-9.963411586664225883e-01,4.715622021159760618e-03,1.928971674883107196e-02,1.245042547232717567e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.519001537070450825e-01,-9.956661381670158795e-01,5.479327269517704144e-03,1.929596719025768181e-02,1.257265442077525330e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.508007082849067260e-01,-9.949324585036238844e-01,6.295087738789324414e-03,1.930321157887383496e-02,1.269521405165587868e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.496975258535985898e-01,-9.941397820153069498e-01,7.161667154475618094e-03,1.931152005663006738e-02,1.281809596800629059e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.485906112586771854e-01,-9.932877833057115469e-01,8.077786470814659822e-03,1.932096183810244963e-02,1.294129165722828212e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.474799693620920227e-01,-9.923761547738875199e-01,9.042126604652265046e-03,1.933160513641128675e-02,1.306479249268487630e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.463656050421637378e-01,-9.914046037749569829e-01,1.005332821368983519e-02,1.934351708996057476e-02,1.318858973542559876e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.452475231935638877e-01,-9.903728560071820564e-01,1.110999232348694465e-02,1.935676368813891643e-02,1.331267453591768735e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.441257287272921905e-01,-9.892806572297572210e-01,1.221068279244752648e-02,1.937140969848639288e-02,1.343703793623525489e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.430002265706553199e-01,-9.881277753866988256e-01,1.335392723381806715e-02,1.938751859550820814e-02,1.356167087247312220e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.418710216672458113e-01,-9.869140009626686805e-01,1.453821798470181746e-02,1.940515248951473967e-02,1.368656417740365394e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.407381189769198571e-01,-9.856391500400372152e-01,1.576201420172653953e-02,1.942437205710749459e-02,1.381170858333378992e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.396015234757753243e-01,-9.843030645573324078e-01,1.702374274888314443e-02,1.944523647278042014e-02,1.393709472534334914e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.384612401561303274e-01,-9.829056133799581874e-01,1.832180002918303019e-02,1.946780334166492779e-02,1.406271314449631660e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.373172740265014680e-01,-9.814466966747416032e-01,1.965455340322291508e-02,1.949212863379403901e-02,1.418855429160976733e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.361696301115808527e-01,-9.799262438682587728e-01,2.102034330537992310e-02,1.951826662012428812e-02,1.431460853118929621e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.350183134522153328e-01,-9.783442162967832534e-01,2.241748451391179994e-02,1.954626981030391578e-02,1.444086614538807878e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.338633291053832997e-01,-9.767006098784356549e-01,2.384426805674418406e-02,1.957618889191561559e-02,1.456731733848717347e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.327046821441730362e-01,-9.749954552192263701e-01,2.529896294722294367e-02,1.960807267175321278e-02,1.469395224158006885e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.315423776577602899e-01,-9.732288183442767693e-01,2.677981860506994244e-02,1.964196801935648093e-02,1.482076091736508061e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.303764207513859574e-01,-9.714008029658010024e-01,2.828506632870409973e-02,1.967791981296948198e-02,1.494773336522601526e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.292068165463334362e-01,-9.695115505451069593e-01,2.981292126332967321e-02,1.971597088707952722e-02,1.507485952654664674e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.280335701799065307e-01,-9.675612435888468754e-01,3.136158499236728348e-02,1.975616198307054983e-02,1.520212929028613436e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.268566868054064711e-01,-9.655501033597828631e-01,3.292924674320334838e-02,1.979853170204524948e-02,1.532953249882050284e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.256761715921098199e-01,-9.634783929663753854e-01,3.451408620048031228e-02,1.984311645988331638e-02,1.545705895366239757e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.244920297252450458e-01,-9.613464190908393991e-01,3.611427571237239842e-02,1.988995044609672899e-02,1.558469842195285837e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.233042664059700977e-01,-9.591545282785313153e-01,3.772798148044380351e-02,1.993906558424590400e-02,1.571244064253184414e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.221128868513501997e-01,-9.569031140409796077e-01,3.935336728705522702e-02,1.999049149588177574e-02,1.584027533256428810e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.209178962943334268e-01,-9.545926121359171734e-01,4.098859579289861749e-02,2.004425546812808531e-02,1.596819219436653725e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.197192999837294991e-01,-9.522235032757799811e-01,4.263183077777563806e-02,2.010038242312025061e-02,1.609618092208801310e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.185171031841850242e-01,-9.497963139390591669e-01,4.428124035760573618e-02,2.015889489169354204e-02,1.622423120882191261e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.173113111761618477e-01,-9.473116164960707630e-01,4.593499831640522613e-02,2.021981299035236629e-02,1.635233275386392404e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.161019292559127392e-01,-9.447700272767494134e-01,4.759128617164574970e-02,2.028315440018896645e-02,1.648047526985863265e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.148889627354590770e-01,-9.421722099478916368e-01,4.924829713847155704e-02,2.034893435068201029e-02,1.660864849020126655e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.136724169425666453e-01,-9.395188728846677328e-01,5.090423693715315917e-02,2.041716560708436295e-02,1.673684217657046280e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.124522972207226523e-01,-9.368107716433748955e-01,5.255732653170795687e-02,2.048785845999582730e-02,1.686504612650547852e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.112286089291126379e-01,-9.340487064937004202e-01,5.420580489128593721e-02,2.056102071975195872e-02,1.699325018113280183e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.100013574425958263e-01,-9.312335237389656184e-01,5.584793086136314449e-02,2.063665771407866001e-02,1.712144423283272210e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.087705481516828110e-01,-9.283661133191839498e-01,5.748198533255594045e-02,2.071477228908808962e-02,1.724961823297455621e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.075361864625111297e-01,-9.254474117751082529e-01,5.910627421196944925e-02,2.079536481426553368e-02,1.737776219970832703e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.062982777968216164e-01,-9.224783978098908266e-01,6.071913010208872946e-02,2.087843319163600747e-02,1.750586622589166974e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.050568275919344208e-01,-9.194600930537157879e-01,6.231891439282129935e-02,2.096397286749744279e-02,1.763392048658471434e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.038118413007259155e-01,-9.163935633004959280e-01,6.390402049934268414e-02,2.105197684918722056e-02,1.776191524702839253e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.025633243916036053e-01,-9.132799157394344558e-01,6.547287448031949286e-02,2.114243572501600318e-02,1.788984087046310290e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.013112823484830338e-01,-9.101202967033260594e-01,6.702393796649475100e-02,2.123533768704068819e-02,1.801768782568538285e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.000557206707632485e-01,-9.069158932246959015e-01,6.855571069252136551e-02,2.133066855915519491e-02,1.814544669461152782e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.987966448733025970e-01,-9.036679299141320287e-01,7.006673165946307458e-02,2.142841182809394202e-02,1.827310817989490632e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.975340604863950800e-01,-9.003776694511471668e-01,7.155558156322808228e-02,2.152854867802210526e-02,1.840066311228601426e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.962679730557449265e-01,-8.970464089751061243e-01,7.302088413800121580e-02,2.163105802866392702e-02,1.852810245801951583e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.949983881424437238e-01,-8.936754806443794052e-01,7.446130888225015176e-02,2.173591657710552405e-02,1.865541732580154599e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.937253113229446599e-01,-8.902662493048369985e-01,7.587557205777212266e-02,2.184309884371984228e-02,1.878259897405573364e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.924487481890386542e-01,-8.868201095101287956e-01,7.726243779241777354e-02,2.195257721979942955e-02,1.890963881748408681e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.911687043478299319e-01,-8.833384871895550994e-01,7.862072120733101799e-02,2.206432201976016305e-02,1.903652843390892802e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.898851854217111557e-01,-8.798228342946174951e-01,7.994928889864247323e-02,2.217830153713889335e-02,1.916325957069203445e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.885981970483383341e-01,-8.762746292362256773e-01,8.124705999169899717e-02,2.229448210217636200e-02,1.928982415094899205e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.873077448806070633e-01,-8.726953732056423885e-01,8.251300844079204055e-02,2.241282814334131374e-02,1.941621427951529255e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.860138345866267695e-01,-8.690865905487592524e-01,8.374616387324726463e-02,2.253330225225881611e-02,1.954242224874800649e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.847164718496960623e-01,-8.654498240861682179e-01,8.494561219444442257e-02,2.265586525071740590e-02,1.966844054406917675e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.834156623682784204e-01,-8.617866341095896665e-01,8.611049650977718739e-02,2.278047625979897836e-02,1.979426184903096964e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.821114118559759909e-01,-8.580985975343069505e-01,8.724001897674965589e-02,2.290709277236523123e-02,1.991987905037970463e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.808037260415053860e-01,-8.543873032011726210e-01,8.833344111346977390e-02,2.303567072846439759e-02,2.004528524273023193e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.794926106686721479e-01,-8.506543499026170130e-01,8.939008408767351588e-02,2.316616459207563108e-02,2.017047373262083332e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.781780714963458800e-01,-8.469013478682355656e-01,9.040932918576667643e-02,2.329852742973028898e-02,2.029543804274055729e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.768601142984342678e-01,-8.431299110587157086e-01,9.139061886431298909e-02,2.343271099133075255e-02,2.042017191564737266e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.755387448638585424e-01,-8.393416579237135133e-01,9.233345720161266479e-02,2.356866579363400133e-02,2.054466931681732667e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.742139689965273908e-01,-8.355382099241154270e-01,9.323740916294387393e-02,2.370634120447655258e-02,2.066892443791729927e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.728857925153117536e-01,-8.317211854804420490e-01,9.410210102743117588e-02,2.384568552782879991e-02,2.079293169930510610e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.715542212540194011e-01,-8.278922005947046969e-01,9.492722086335200760e-02,2.398664609106828868e-02,2.091668575212649039e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.702192610613689538e-01,-8.240528666795141621e-01,9.571251780061863779e-02,2.412916933311531897e-02,2.104018148043299841e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.688809178009646805e-01,-8.202047854389188375e-01,9.645780190173385005e-02,2.427320089302944378e-02,2.116341400255946081e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.675391973512698529e-01,-8.163495506091974496e-01,9.716294376110376496e-02,2.441868569952608536e-02,2.128637867230358249e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.661941056055823207e-01,-8.124887411219486877e-01,9.782787391776788521e-02,2.456556806085164607e-02,2.140907107977664814e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.648456484720069781e-01,-8.086239214809168852e-01,9.845258231120913861e-02,2.471379175489065577e-02,2.153148705148317521e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.634938318734313389e-01,-8.047566392519180933e-01,9.903711785069856111e-02,2.486330011986896682e-02,2.165362265054808499e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.621386617474984471e-01,-8.008884218715165426e-01,9.958158640516863447e-02,2.501403614424184407e-02,2.177547417612275826e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.607801440465815634e-01,-7.970207761277511294e-01,1.000861508018400098e-01,2.516594255574763558e-02,2.189703816277078219e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.594182847377570766e-01,-7.931551835736769807e-01,1.005510299196201440e-01,2.531896191166226745e-02,2.201831137916583914e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.580530898027795228e-01,-7.892931007945316324e-01,1.009764965521968982e-01,2.547303668701360671e-02,2.213929082658256109e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.566845652380541631e-01,-7.854359564505605240e-01,1.013628769738697671e-01,2.562810936220611194e-02,2.225997373699689919e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.553127170546113378e-01,-7.815851493738897871e-01,1.017105488522143941e-01,2.578412250972434219e-02,2.238035757091573485e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.539375512780798205e-01,-7.777420445608128619e-01,1.020199400818929680e-01,2.594101887901249098e-02,2.250044001444983233e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.525590739486605063e-01,-7.739079757259634818e-01,1.022915273627505173e-01,2.609874148062464941e-02,2.262021897643887169e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.511772911210995440e-01,-7.700842392442609485e-01,1.025258341392472627e-01,2.625723366839950723e-02,2.273969258510675218e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.497922088646621352e-01,-7.662720957735662530e-01,1.027234289065677014e-01,2.641643921948337928e-02,2.285885918432162822e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.484038332631057777e-01,-7.624727664070709565e-01,1.028849238879466776e-01,2.657630241306029040e-02,2.297771732963149183e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.470121704146531760e-01,-7.586874328005167589e-01,1.030109719396845730e-01,2.673676810637555870e-02,2.309626578392406926e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.456172264319661513e-01,-7.549172328720453740e-01,1.031022659111988232e-01,2.689778180808429647e-02,2.321450351264854606e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.442190074421178858e-01,-7.511632643476540139e-01,1.031595360491185664e-01,2.705928975093609828e-02,2.333242967888643216e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.428175195865669433e-01,-7.474265803896148475e-01,1.031835471393047071e-01,2.722123895931887610e-02,2.345004363839531536e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.414127690211299582e-01,-7.437081889674573842e-01,1.031750981398528860e-01,2.738357731618707228e-02,2.356734493400854669e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.400047619159539902e-01,-7.400090505381236028e-01,1.031350180291184387e-01,2.754625362636280161e-02,2.368433328980169283e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.385935044554904350e-01,-7.363300810283561759e-01,1.030641648169126900e-01,2.770921767628778051e-02,2.380100860516933814e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.371790028384670457e-01,-7.326721467078289862e-01,1.029634225282525378e-01,2.787242029142705108e-02,2.391737094878755021e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.357612632778612882e-01,-7.290360660457668729e-01,1.028336995787676456e-01,2.803581339002247236e-02,2.403342055193024018e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.343402920008724744e-01,-7.254226097289149244e-01,1.026759261343370283e-01,2.819935003427101114e-02,2.414915780219940578e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.329160952488951164e-01,-7.218324977944733378e-01,1.024910519449426516e-01,2.836298447769454098e-02,2.426458323665131034e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.314886792774907276e-01,-7.182664017765537645e-01,1.022800436210659081e-01,2.852667220912395371e-02,2.437969753488551106e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.300580503563610657e-01,-7.147249436527243205e-01,1.020438826718157582e-01,2.869036999302273724e-02,2.449450151218128813e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.286242147693199334e-01,-7.112086940146365999e-01,1.017835627938658005e-01,2.885403590643787972e-02,2.460899611216629812e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.271871788142661996e-01,-7.077181757243921068e-01,1.015000883610661236e-01,2.901762937252176308e-02,2.472318239964537701e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.257469488031560445e-01,-7.042538611839558316e-01,1.011944713885734731e-01,2.918111119097716380e-02,2.483706155339817090e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.243035310619742040e-01,-7.008161752309324255e-01,1.008677292524104291e-01,2.934444356362753831e-02,2.495063485885781751e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.228569319307079910e-01,-6.974054912174872944e-01,1.005208828961727946e-01,2.950759011730741052e-02,2.506390370068804496e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.214071577633178745e-01,-6.940221363300544954e-01,1.001549540967373170e-01,2.967051592289337777e-02,2.517686955521087344e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.199542149277103897e-01,-6.906663885779597001e-01,9.977096342598275325e-02,2.983318751016401579e-02,2.528953398323914858e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.184981098057098281e-01,-6.873384797746127095e-01,9.936992917308079543e-02,2.999557288050028539e-02,2.540189862241988838e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.170388487930305921e-01,-6.840385950889971145e-01,9.895286367963083973e-02,3.015764151548222180e-02,2.551396518013094861e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.155764382992484407e-01,-6.807668747416311117e-01,9.852077235893005880e-02,3.031936438109781098e-02,2.562573542595741882e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.141108847477731780e-01,-6.775234143385543417e-01,9.807465100981893613e-02,3.048071392953570144e-02,2.573721118470642111e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.126421945758196763e-01,-6.743082652802415700e-01,9.761548480785159354e-02,3.064166409674248109e-02,2.584839432891409272e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.111703742343798984e-01,-6.711214386716500835e-01,9.714424602301696687e-02,3.080219029828750940e-02,2.595928677211855606e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.096954301881950311e-01,-6.679629017991975948e-01,9.666189206931867506e-02,3.096226941996182633e-02,2.606989046169030932e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.082173689157260643e-01,-6.648325852800189839e-01,9.616936489954823475e-02,3.112187980737659426e-02,2.618020737209040272e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.067361969091264795e-01,-6.617303807954519890e-01,9.566758783333274840e-02,3.128100125112018293e-02,2.629023949851016506e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.052519206742123847e-01,-6.586561415619610615e-01,9.515746533656986583e-02,3.143961496903825703e-02,2.639998885011133889e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.037645467304356472e-01,-6.556096868737077976e-01,9.463988101850256962e-02,3.159770358648075761e-02,2.650945744373270907e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.022740816108536954e-01,-6.525908007624051654e-01,9.411569635286827495e-02,3.175525111300243897e-02,2.661864729786229677e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.007805318621018742e-01,-6.495992359440913022e-01,9.358575000141730604e-02,3.191224291726099671e-02,2.672756042654131914e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.992839040443636911e-01,-6.466347147015658603e-01,9.305085609297154836e-02,3.206866569956517976e-02,2.683619883396898831e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.977842047313435048e-01,-6.436969292590907221e-01,9.251180319849894307e-02,3.222450746116162223e-02,2.694456450875556963e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.962814405102357718e-01,-6.407855458441260588e-01,9.196935407697527654e-02,3.237975747247739261e-02,2.705265941887610714e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.947756179816976241e-01,-6.379002030050242578e-01,9.142424418208458525e-02,3.253440623919649083e-02,2.716048550648288429e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.932667437598194482e-01,-6.350405166411439151e-01,9.087718095306353272e-02,3.268844546582369559e-02,2.726804468313495655e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.917548244720952422e-01,-6.322060806119566001e-01,9.032884373483709983e-02,3.284186801819578838e-02,2.737533882537236396e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.902398667593944159e-01,-6.293964685620763166e-01,8.977988252566837524e-02,3.299466788452961458e-02,2.748236977058508002e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.887218772759319263e-01,-6.266112336426069573e-01,8.923091791397205763e-02,3.314684013415569036e-02,2.758913931276731901e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.872008626892394112e-01,-6.238499140022012446e-01,8.868254113121080973e-02,3.329838087665502877e-02,2.769564919890554200e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.856768296801356577e-01,-6.211120303036034018e-01,8.813531241803232441e-02,3.344928721828452356e-02,2.780190112546183090e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.841497849426976252e-01,-6.183970916155819841e-01,8.758976227901682510e-02,3.359955721770993553e-02,2.790789673513878744e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.826197351842306915e-01,-6.157045939812332280e-01,8.704639125952925993e-02,3.374918984246998849e-02,2.801363761409839115e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.810866871252394539e-01,-6.130340239213566411e-01,8.650566933493640531e-02,3.389818492384057830e-02,2.811912528919476850e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.795506474993976420e-01,-6.103848583994224697e-01,8.596803607274140779e-02,3.404654311063635974e-02,2.822436122563318217e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.780116230535196964e-01,-6.077565689194891485e-01,8.543390149166286973e-02,3.419426582367104256e-02,2.832934682481573763e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.764696205475297930e-01,-6.051486209887717571e-01,8.490364554665842667e-02,3.434135520991904444e-02,2.843408342270264400e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.749246467544329775e-01,-6.025604753473445818e-01,8.437761856809977701e-02,3.448781409583153573e-02,2.853857228796015666e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.733767084602854114e-01,-5.999915905408259587e-01,8.385614214649934894e-02,3.463364594144592579e-02,2.864281462064890427e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.718258124641639517e-01,-5.974414247134796074e-01,8.333950962219778547e-02,3.477885479539689906e-02,2.874681155115534348e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.702719655781371744e-01,-5.949094362146349901e-01,8.282798567443168392e-02,3.492344524912981141e-02,2.885056413945679132e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.687151746272345099e-01,-5.923950859314701667e-01,8.232180818476904360e-02,3.506742239188893884e-02,2.895407337447724605e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.671554464494171555e-01,-5.898978370639275104e-01,8.182118822003446590e-02,3.521079176738046990e-02,2.905734017389757229e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.655927878955469890e-01,-5.874171552737835755e-01,8.132631089347572284e-02,3.535355932991138955e-02,2.916036538357598129e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.640272058293580359e-01,-5.849525157616038973e-01,8.083733586734394294e-02,3.549573140212489153e-02,2.926314977800149042e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.624587071274248284e-01,-5.825033961128966986e-01,8.035439822479899241e-02,3.563731463251913062e-02,2.936569406062093379e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.608872986791325399e-01,-5.800692825951205833e-01,7.987761022298026092e-02,3.577831595590904440e-02,2.946799886384001699e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.593129873866474533e-01,-5.776496724257135229e-01,7.940706083660764580e-02,3.591874255394970905e-02,2.957006475029327341e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.577357801648857638e-01,-5.752440693874317779e-01,7.894281735352540186e-02,3.605860181638990969e-02,2.967189221354770701e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.561556839414840470e-01,-5.728519890934480330e-01,7.848492666952994190e-02,3.619790130455240867e-02,2.977348167899772635e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.545727056567677282e-01,-5.704729566872124291e-01,7.803341585513715040e-02,3.633664871656686751e-02,2.987483350522511238e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.529868522637218842e-01,-5.681065081759942847e-01,7.758829213334605956e-02,3.647485185177969796e-02,2.997594798491797774e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.513981307279594901e-01,-5.657521952299100487e-01,7.714954614971321345e-02,3.661251857849505409e-02,3.007682534677897923e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.498065480276917771e-01,-5.634095794119139411e-01,7.671715108289162144e-02,3.674965680364587067e-02,3.017746575712528245e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.482121111536969238e-01,-5.610782370880019743e-01,7.629106479463611257e-02,3.688627444245855258e-02,3.027786932139809317e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.466148271092899691e-01,-5.587577581571101515e-01,7.587123021592064032e-02,3.702237939133704719e-02,3.037803608623910190e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.450147029102913931e-01,-5.564477459550981608e-01,7.545757621352849887e-02,3.715797950038121633e-02,3.047796604124490272e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.434117455849966971e-01,-5.541478178793545206e-01,7.505001919510093100e-02,3.729308254884872165e-02,3.057765912091858773e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.418059621741456500e-01,-5.518576064957830818e-01,7.464846392792379159e-02,3.742769622207473695e-02,3.067711520675733827e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.401973597308906472e-01,-5.495767596261521559e-01,7.425280458276643791e-02,3.756182809015162771e-02,3.077633412948591607e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.385859453207668457e-01,-5.473049403881424801e-01,7.386292570230212906e-02,3.769548558810988798e-02,3.087531567139001898e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.369717260216599675e-01,-5.450418244064755280e-01,7.347870288546776152e-02,3.782867599748214460e-02,3.097405956827572471e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.353547089237763235e-01,-5.427871057008817335e-01,7.310000432897115208e-02,3.796140642972024859e-02,3.107256551184313653e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.337349011296107282e-01,-5.405404897417875887e-01,7.272669061650106903e-02,3.809368381091868955e-02,3.117083315209384087e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.321123097539158575e-01,-5.383016995321004972e-01,7.235861718030470180e-02,3.822551486767823692e-02,3.126886209929134464e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.304869419236707184e-01,-5.360704723007315708e-01,7.199563475283764225e-02,3.835690611710883269e-02,3.136665192685179071e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.288588047780496737e-01,-5.338465586081656555e-01,7.163758885959183031e-02,3.848786385496966167e-02,3.146420217310955114e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.272279054683905786e-01,-5.316297249027758243e-01,7.128432198901676897e-02,3.861839414631496742e-02,3.156151234397336913e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.255942511581639165e-01,-5.294197493489199990e-01,7.093567402516466613e-02,3.874850281811718228e-02,3.165858191488649109e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.239578490229409358e-01,-5.272164261127109119e-01,7.059148315286857545e-02,3.887819545329602305e-02,3.175541033321496176e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.223187062503623412e-01,-5.250195621016096048e-01,7.025158630978745045e-02,3.900747738567715561e-02,3.185199702052598503e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.206768300401065419e-01,-5.228289774747290064e-01,6.991582004147967044e-02,3.913635369647002621e-02,3.194834137481267211e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.190322276038583427e-01,-5.206445031389658196e-01,6.958402069337726870e-02,3.926482921142183252e-02,3.204444277231790483e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.173849061652770809e-01,-5.184659852398556534e-01,6.925602540798975193e-02,3.939290849917479603e-02,3.214030056979516603e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.157348729599646520e-01,-5.162932799302174525e-01,6.893167313045317490e-02,3.952059587150936332e-02,3.223591410664694012e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.140821352354338680e-01,-5.141262554168125609e-01,6.861080432232199988e-02,3.964789538461128970e-02,3.233128270668736426e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.124267002510773716e-01,-5.119647912264099210e-01,6.829326144439776258e-02,3.977481083965924563e-02,3.242640568019387604e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.107685752781343291e-01,-5.098087772825076192e-01,6.797889059139285151e-02,3.990134578643349250e-02,3.252128232575807187e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.091077675996596774e-01,-5.076581118069304344e-01,6.766754035962162239e-02,4.002750352704449732e-02,3.261591193184805171e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.074442845104918165e-01,-5.055127031481462252e-01,6.735906250355164215e-02,4.015328711891791996e-02,3.271029377835245611e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.057781333172199689e-01,-5.033724696427420708e-01,6.705331310601915507e-02,4.027869937997067412e-02,3.280442713837419810e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.041093213381530935e-01,-5.012373376736231068e-01,6.675015231058489817e-02,4.040374289481950265e-02,3.289831127982991710e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.024378559032870228e-01,-4.991072405325754913e-01,6.644944473791054851e-02,4.052842002057339271e-02,3.299194546666070482e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.007637443542724887e-01,-4.969821207377011496e-01,6.615105987416018685e-02,4.065273289434647580e-02,3.308532896029127146e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.990869940443830366e-01,-4.948619274038358329e-01,6.585487106730457396e-02,4.077668343941298623e-02,3.317846102106196127e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.974076123384820525e-01,-4.927466157312824224e-01,6.556075736898334116e-02,4.090027337248236528e-02,3.327134090930485844e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.957256066129916761e-01,-4.906361472006815494e-01,6.526860303658503504e-02,4.102350421295779953e-02,3.336396788643581357e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.940409842558590503e-01,-4.885304882188235553e-01,6.497829648599093288e-02,4.114637728990370308e-02,3.345634121601558375e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.923537526665245689e-01,-4.864296098998260320e-01,6.468973218584164742e-02,4.126889375006290162e-02,3.354846016447499135e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.906639192558894580e-01,-4.843334897871974798e-01,6.440280977196688461e-02,4.139105456793519544e-02,3.364032400227963127e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.889714914462828022e-01,-4.822421067899030911e-01,6.411743345708999231e-02,4.151286055282038534e-02,3.373193200451841678e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.872764766714295703e-01,-4.801554467492751277e-01,6.383351373200535361e-02,4.163431235852396717e-02,3.382328345165202532e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.855788823764169759e-01,-4.780734968055855849e-01,6.355096557013885372e-02,4.175541049233861407e-02,3.391437763038386843e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.838787160176629465e-01,-4.759962480339235968e-01,6.326970922454290547e-02,4.187615532315891453e-02,3.400521383395362918e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.821759850628824839e-01,-4.739236931172002842e-01,6.298966974624052584e-02,4.199654709013603965e-02,3.409579136281858380e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.804706969910552461e-01,-4.718558286765781040e-01,6.271077771317037919e-02,4.211658591159655324e-02,3.418610952496663469e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.787628592923925730e-01,-4.697926519560427505e-01,6.243296820560564203e-02,4.223627179389408820e-02,3.427616763661271992e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.770524794683048464e-01,-4.677341606245665750e-01,6.215618130578815653e-02,4.235560463950543281e-02,3.436596502193567937e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.753395650313679610e-01,-4.656803567143563138e-01,6.188036120293406023e-02,4.247458425528482379e-02,3.445550101381587016e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.736241235052911280e-01,-4.636312409171929327e-01,6.160545672179083138e-02,4.259321035976049857e-02,3.454477495397222375e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.719061624248831244e-01,-4.615868153327273848e-01,6.133142129405498577e-02,4.271148259178945278e-02,3.463378619299818939e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.701856893360195411e-01,-4.595470821628273583e-01,6.105821232541942117e-02,4.282940051813396987e-02,3.472253409044960271e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.684627117956096987e-01,-4.575120454189410268e-01,6.078579042844726305e-02,4.294696363984598642e-02,3.481101801508945659e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.667372373715630074e-01,-4.554817074830540546e-01,6.051412079179769449e-02,4.306417139893478629e-02,3.489923734487389573e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.650092736427564377e-01,-4.534560700686992574e-01,6.024317223044282216e-02,4.318102318671960399e-02,3.498719146670036806e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.632788281990006585e-01,-4.514351358882814913e-01,5.997291613933652954e-02,4.329751834860348647e-02,3.507487977636906118e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.615459086410068412e-01,-4.494189077083461381e-01,5.970332766310237210e-02,4.341365619031864465e-02,3.516230167888117486e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.598105225803533536e-01,-4.474073852104987203e-01,5.943438472052219729e-02,4.352943598375497158e-02,3.524945658785487712e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.580726776394524524e-01,-4.454005725256106052e-01,5.916606837697329507e-02,4.364485697262252689e-02,3.533634392586545991e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.563323814515162002e-01,-4.433984667523676038e-01,5.889836197821211178e-02,4.375991837730100475e-02,3.542296312417216875e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.545896416605240464e-01,-4.414010681419641147e-01,5.863125179207296506e-02,4.387461939989382997e-02,3.550931362208452069e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.528444659211878554e-01,-4.394083735588171225e-01,5.836472540485384414e-02,4.398895922814825832e-02,3.559539486732065661e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.510968618989195988e-01,-4.374203808132056115e-01,5.809877307512693184e-02,4.410293703891658007e-02,3.568120631528172715e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.493468372697968283e-01,-4.354370869500221453e-01,5.783338688427713814e-02,4.421655200285980886e-02,3.576674742958120823e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.475943997205290348e-01,-4.334584849490663583e-01,5.756856095254390265e-02,4.432980328782831769e-02,3.585201768107385001e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.458395569484247867e-01,-4.314845691534400229e-01,5.730429011734135269e-02,4.444269006177721526e-02,3.593701654788981070e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.440823166613565354e-01,-4.295153318957159461e-01,5.704057144393088336e-02,4.455521149495036970e-02,3.602174351515865514e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.423226865777274197e-01,-4.275507642427991417e-01,5.677740270065723127e-02,4.466736676381678761e-02,3.610619807486978305e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.405606744264380703e-01,-4.255908551374085191e-01,5.651478253659104195e-02,4.477915505117652240e-02,3.619037972521075841e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.387962879468511934e-01,-4.236355968086172319e-01,5.625271165783107818e-02,4.489057555029635910e-02,3.627428797099350732e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.370295348887590414e-01,-4.216849739977034384e-01,5.599119062093656229e-02,4.500162746610861936e-02,3.635792232305640193e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.352604230123479967e-01,-4.197389744722112015e-01,5.573022080541369105e-02,4.511231001605208596e-02,3.644128229765876692e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.334889600881657090e-01,-4.177975847125744435e-01,5.546980448039159212e-02,4.522262243152602029e-02,3.652436741693155797e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.317151538970863456e-01,-4.158607885937666837e-01,5.520994432072243857e-02,4.533256395979444464e-02,3.660717720822338483e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.299390122302762851e-01,-4.139285696433678829e-01,5.495064309561120686e-02,4.544213386360421852e-02,3.668971120384528772e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.281605428891603671e-01,-4.120009111857466100e-01,5.469190420518504903e-02,4.555133142294664345e-02,3.677196894103475500e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.263797536853872527e-01,-4.100777928284069507e-01,5.443373034343575861e-02,4.566015593387513782e-02,3.685394996149223834e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.245966524407953413e-01,-4.081591971122570661e-01,5.417612619009429947e-02,4.576860671068579134e-02,3.693565381115320734e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.228112469873781309e-01,-4.062451048594815095e-01,5.391909470081110434e-02,4.587668308605141049e-02,3.701708004060934032e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.210235451672501350e-01,-4.043354932048924821e-01,5.366263946999479117e-02,4.598438440945731842e-02,3.709822820414297451e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.192335548326121319e-01,-4.024303427235654818e-01,5.340676385752014116e-02,4.609171004810640876e-02,3.717909785988850468e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.174412838457173036e-01,-4.005296310405179194e-01,5.315147182741732784e-02,4.619865938715038423e-02,3.725968856985972244e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.156467400788354860e-01,-3.986333355682882829e-01,5.289676591869962863e-02,4.630523182907556740e-02,3.733999989947173748e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.138499314142201957e-01,-3.967414329163614917e-01,5.264264965227701681e-02,4.641142679260483461e-02,3.742003141751297224e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.120508657440723255e-01,-3.948539001897661271e-01,5.238912576717864866e-02,4.651724371410505959e-02,3.749978269598918668e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.102495509705069487e-01,-3.929707127334657324e-01,5.213619617274797835e-02,4.662268204467931443e-02,3.757925331009411285e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.084459950055177924e-01,-3.910918456605667060e-01,5.188386332801234180e-02,4.672774125068039630e-02,3.765844283775607315e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.066402057709425977e-01,-3.892172748153540640e-01,5.163212916746877379e-02,4.683242081359414999e-02,3.773735085989796678e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.048321911984281485e-01,-3.873469743585175595e-01,5.138099506248661880e-02,4.693672022881861805e-02,3.781597696007463760e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.030219592293964093e-01,-3.854809212636427507e-01,5.113046231526584273e-02,4.704063900510570967e-02,3.789432072471765611e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.012095178150079988e-01,-3.836190893330909124e-01,5.088053153183096783e-02,4.714417666373861560e-02,3.797238174302438596e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.993948749161288836e-01,-3.817614526651256779e-01,5.063120372246188855e-02,4.724733273817201956e-02,3.805015960656292218e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.975780385032944064e-01,-3.799079852224910292e-01,5.038247879525301759e-02,4.735010677364143855e-02,3.812765390926879383e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.957590165566744811e-01,-3.780586635769292991e-01,5.013435670238558517e-02,4.745249832544583268e-02,3.820486424770819234e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.939378170660390643e-01,-3.762134606209793786e-01,4.988683685991725236e-02,4.755450695915177522e-02,3.828179022096801498e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.921144480307220181e-01,-3.743723511531212855e-01,4.963991842666405641e-02,4.765613224907044865e-02,3.835843143023617841e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.902889174595871369e-01,-3.725353099578139759e-01,4.939360027003560083e-02,4.775737377824840896e-02,3.843478747922701166e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.884612333709922316e-01,-3.707023116206288882e-01,4.914788115986466061e-02,4.785823113757894237e-02,3.851085797389904863e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.866314037927538805e-01,-3.688733304158525739e-01,4.890275902297765848e-02,4.795870392534354321e-02,3.858664252253038440e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.847994367621128453e-01,-3.670483413240622772e-01,4.865823184966003301e-02,4.805879174553584487e-02,3.866214073556357711e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.829653403256978783e-01,-3.652273188853800701e-01,4.841429759055439902e-02,4.815849420890241928e-02,3.873735222578482129e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.811291225394909166e-01,-3.634102382231814699e-01,4.817095346151315655e-02,4.825781093114445142e-02,3.881227660806790269e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.792907914687918880e-01,-3.615970754385930963e-01,4.792819692656456743e-02,4.835674153306809137e-02,3.888691349976450828e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.774503551881826846e-01,-3.597878060498483910e-01,4.768602512256923942e-02,4.845528563992256010e-02,3.896126252044235794e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.756078217814919684e-01,-3.579824075233697389e-01,4.744443510506242412e-02,4.855344288158469634e-02,3.903532329225871123e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.737631993417600329e-01,-3.561808529145552016e-01,4.720342284199229022e-02,4.865121289055700932e-02,3.910909543948259404e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.719164959712024987e-01,-3.543831195946279267e-01,4.696298545803551799e-02,4.874859530236066302e-02,3.918257858839901964e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.700677197811755081e-01,-3.525891854268833336e-01,4.672311925145212347e-02,4.884558975607899978e-02,3.925577236803319825e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.682168788921395874e-01,-3.507990254145236242e-01,4.648382047223853508e-02,4.894219589284833083e-02,3.932867640965979072e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.663639814336241196e-01,-3.490126169841267778e-01,4.624508514132854436e-02,4.903841335604523094e-02,3.940129034670264430e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.645090355441917063e-01,-3.472299378783992863e-01,4.600690911201409800e-02,4.913424179028345190e-02,3.947361381518706969e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.626520493714022519e-01,-3.454509646362655118e-01,4.576928881600580978e-02,4.922968084228003527e-02,3.954564645338288775e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.607930310717773814e-01,-3.436756764411154075e-01,4.553221976273404831e-02,4.932473016025289148e-02,3.961738790208024219e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.589319888107645240e-01,-3.419040502789995606e-01,4.529569830530461721e-02,4.941938939349722199e-02,3.968883780457603261e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.570689307627009978e-01,-3.401360640734931162e-01,4.505971998366299636e-02,4.951365819318564315e-02,3.975999580642168296e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.552038651107780942e-01,-3.383716971298446063e-01,4.482428028424298438e-02,4.960753621015020348e-02,3.983086155572526099e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.533368000470053838e-01,-3.366109281021211208e-01,4.458937552328430531e-02,4.970102309685948960e-02,3.990143470321336761e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.514677437721743569e-01,-3.348537356108577945e-01,4.435500124051357240e-02,4.979411850669099998e-02,3.997171490201572031e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.495967044958226744e-01,-3.331000985429223848e-01,4.412115296859285551e-02,4.988682209342246848e-02,4.004170180781885247e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.477236904361980296e-01,-3.313499952548600858e-01,4.388782651476653279e-02,4.997913351086186867e-02,4.011139507859103892e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.458487098202219556e-01,-3.296034077545161867e-01,4.365501837048674227e-02,5.007105241482045360e-02,4.018079437510966856e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.439717708834537979e-01,-3.278603139601054539e-01,4.342272339346896215e-02,5.016257846117637420e-02,4.024989936078507413e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.420928818700546881e-01,-3.261206947999337746e-01,4.319093797212356783e-02,5.025371130582704376e-02,4.031870970143970889e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.402120510327509062e-01,-3.243845293945762931e-01,4.295965786149214677e-02,5.034445060618272794e-02,4.038722506558002356e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.383292866327981319e-01,-3.226517990880932718e-01,4.272887881068129085e-02,5.043479601966745912e-02,4.045544512419585459e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.364445969399446956e-01,-3.209224841538562689e-01,4.249859692872211270e-02,5.052474720448318218e-02,4.052336955111563999e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.345579902323956079e-01,-3.191965653943290571e-01,4.226880839800947753e-02,5.061430382004499656e-02,4.059099802263360091e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.326694747967760879e-01,-3.174740238041954266e-01,4.203950872417175355e-02,5.070346552607175178e-02,4.065833021783991086e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.307790589280949822e-01,-3.157548394664520530e-01,4.181069452931148595e-02,5.079223198290432062e-02,4.072536581826661783e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.288867509297087377e-01,-3.140389940701710514e-01,4.158236196473711482e-02,5.088060285276760353e-02,4.079210450810824007e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.269925591132845422e-01,-3.123264688059790295e-01,4.135450714455513044e-02,5.096857779832794644e-02,4.085854597422459711e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.250964917987639091e-01,-3.106172450456022305e-01,4.112712630985529416e-02,5.105615648348862945e-02,4.092468990613829516e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.231985573143263735e-01,-3.089113033505569983e-01,4.090021560611258739e-02,5.114333857272319872e-02,4.099053599588311503e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.212987639963524100e-01,-3.072086261593881540e-01,4.067377186144514184e-02,5.123012373218313598e-02,4.105608393807438916e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.193971201893874623e-01,-3.055091968050932527e-01,4.044779131467417937e-02,5.131651162956400031e-02,4.112133343030896504e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.174936342461046945e-01,-3.038129950253450895e-01,4.022227047891458601e-02,5.140250193355578451e-02,4.118628417276518072e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.155883145272687984e-01,-3.021200031032469369e-01,3.999720587118586745e-02,5.148809431438212958e-02,4.125093586792796252e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.136811694016988561e-01,-3.004302047598121361e-01,3.977259419857349237e-02,5.157328844363685133e-02,4.131528822126334100e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.117722072462318139e-01,-2.987435816134337285e-01,3.954843244165851673e-02,5.165808399514321136e-02,4.137934094095662152e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.098614364456857340e-01,-2.970601142628586655e-01,3.932471691291709870e-02,5.174248064420303855e-02,4.144309373756757120e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.079488653928228237e-01,-2.953797860296122901e-01,3.910144453815816296e-02,5.182647806738070045e-02,4.150654632411227007e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.060345024883124099e-01,-2.937025796173415859e-01,3.887861215389811637e-02,5.191007594348912707e-02,4.156969841653598285e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.041183561406945790e-01,-2.920284753699232683e-01,3.865621647056965982e-02,5.199327395276098079e-02,4.163254973301040995e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.022004347663427071e-01,-2.903574580871810529e-01,3.843425469493789298e-02,5.207607177764341649e-02,4.169509999435803693e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.002807467894267113e-01,-2.886895096368481961e-01,3.821272337754814258e-02,5.215846910245060253e-02,4.175734892421805733e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.983593006418760796e-01,-2.870246129904621690e-01,3.799161976357332687e-02,5.224046561300459135e-02,4.181929624861588368e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.964361047633429003e-01,-2.853627520577611265e-01,3.777094138244370403e-02,5.232206099842372921e-02,4.188094169641654596e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.945111676011643920e-01,-2.837039077703729095e-01,3.755068472740191038e-02,5.240325494967859599e-02,4.194228499892125872e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.925844976103263773e-01,-2.820480630705451963e-01,3.733084689941720824e-02,5.248404715904445017e-02,4.200332588969816761e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.906561032534255906e-01,-2.803952013726989101e-01,3.711142515891788923e-02,5.256443732131733243e-02,4.206406410500159732e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.887259930006330966e-01,-2.787453049959661699e-01,3.689241656307880313e-02,5.264442513339075147e-02,4.212449938352126888e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.867941753296565421e-01,-2.770983578407213432e-01,3.667381858300441250e-02,5.272401029453961663e-02,4.218463146648452899e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.848606587257031308e-01,-2.754543426266470862e-01,3.645562830891448808e-02,5.280319250663997183e-02,4.224446009770073118e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.829254516814425413e-01,-2.738132428237172733e-01,3.623784295337748040e-02,5.288197147329540965e-02,4.230398502338418298e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.809885626969692352e-01,-2.721750415760257891e-01,3.602046016860861732e-02,5.296034690113549798e-02,4.236320599236014783e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.790500002797654311e-01,-2.705397200359498378e-01,3.580347678830345104e-02,5.303831849880012023e-02,4.242212275556668244e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.771097729446636349e-01,-2.689072637838904600e-01,3.558689051338531639e-02,5.311588597713320231e-02,4.248073506641251162e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.751678892138092802e-01,-2.672776550727771760e-01,3.537069843175431189e-02,5.319304904963925024e-02,4.253904268100536235e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.732243576166232035e-01,-2.656508778067487309e-01,3.515489814546209069e-02,5.326980743185445732e-02,4.259704535759036292e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.712791866897643955e-01,-2.640269168191012872e-01,3.493948718681581800e-02,5.334616084243781753e-02,4.265474285728457216e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.693323849770918654e-01,-2.624057515704821264e-01,3.472446260910766469e-02,5.342210900205553170e-02,4.271213494317327997e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.673839610296283364e-01,-2.607873680066410405e-01,3.450982166302115489e-02,5.349765163304009280e-02,4.276922138044007005e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.654339234055213326e-01,-2.591717516147429290e-01,3.429556247396046281e-02,5.357278846075334833e-02,4.282600193737639560e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.634822806700064302e-01,-2.575588830009680752e-01,3.408168223712938066e-02,5.364751921368279136e-02,4.288247638438603682e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.615290413953694548e-01,-2.559487457172385239e-01,3.386817847791396413e-02,5.372184362202286206e-02,4.293864449383007487e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.595742141609086784e-01,-2.543413234357141950e-01,3.365504867178033960e-02,5.379576141889539503e-02,4.299450604046566315e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.576178075528972378e-01,-2.527366004503950880e-01,3.344228983862282401e-02,5.386927233890054928e-02,4.305006080138746860e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.556598301645456095e-01,-2.511345603466804688e-01,3.322989972803354342e-02,5.394237611883241634e-02,4.310530855610543166e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.537002905959633070e-01,-2.495351868780310567e-01,3.301787636058167719e-02,5.401507249873295502e-02,4.316024908632367652e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.517391974541219102e-01,-2.479384640642217641e-01,3.280621699823875470e-02,5.408736122150886727e-02,4.321488217612828309e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.497765593528163741e-01,-2.463443746349964492e-01,3.259491904519199595e-02,5.415924203149822397e-02,4.326920761170457430e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.478123849126281697e-01,-2.447529037279323749e-01,3.238398038354890068e-02,5.423071467604905632e-02,4.332322518150482638e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.458466827608864258e-01,-2.431640345832005168e-01,3.217339821502858410e-02,5.430177890461603679e-02,4.337693467637944167e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.438794615316307368e-01,-2.415777500797698651e-01,3.196317006437204844e-02,5.437243446860173107e-02,4.343033588908270515e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.419107298655730265e-01,-2.399940347836647392e-01,3.175329363829797191e-02,5.444268112168899193e-02,4.348342861462378695e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.399404964100594673e-01,-2.384128720956989189e-01,3.154376673292508260e-02,5.451251862055871145e-02,4.353621265013566943e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.379687698190326217e-01,-2.368342476493640048e-01,3.133458657430254063e-02,5.458194672374643008e-02,4.358868779511614333e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.359955587529934729e-01,-2.352581436610858834e-01,3.112575121446605614e-02,5.465096519224434124e-02,4.364085385118365301e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.340208718789632880e-01,-2.336845462265491613e-01,3.091725812009240085e-02,5.471957378983226261e-02,4.369271062208846534e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.320447178704457047e-01,-2.321134366902992685e-01,3.070910461109397846e-02,5.478777228190175042e-02,4.374425791378676598e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.300671054073883171e-01,-2.305448001669923863e-01,3.050128863401264587e-02,5.485556043600735981e-02,4.379549553390184591e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.280880431761448723e-01,-2.289786208520001587e-01,3.029380810715975522e-02,5.492293802303171990e-02,4.384642329259025306e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.261075398694370242e-01,-2.274148832607233439e-01,3.008666017943820853e-02,5.498990481554315995e-02,4.389704100185874980e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.241256041863160853e-01,-2.258535720879291664e-01,2.987984276813863277e-02,5.505646058818317035e-02,4.394734847619538032e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.221422448321247800e-01,-2.242946698112092130e-01,2.967335334975788538e-02,5.512260511792135831e-02,4.399734553174280416e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.201574705184592196e-01,-2.227381615886918909e-01,2.946718984990668103e-02,5.518833818400794428e-02,4.404703198682196064e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.181712899631304881e-01,-2.211840308052820114e-01,2.926134983371223020e-02,5.525365956809925178e-02,4.409640766161550540e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.161837118901262844e-01,-2.196322631692371186e-01,2.905583119592191482e-02,5.531856905415503262e-02,4.414547237858754136e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.141947450295727307e-01,-2.180828413464480087e-01,2.885063145782363822e-02,5.538306642834930210e-02,4.419422596201603159e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.122043981176959027e-01,-2.165357521674018126e-01,2.864574875429666198e-02,5.544715147934440452e-02,4.424266823841414564e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.102126798967836940e-01,-2.149909770011372567e-01,2.844118020458268406e-02,5.551082399799425054e-02,4.429079903619868031e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.082195991151471803e-01,-2.134485009702106195e-01,2.823692363626470470e-02,5.557408377634905083e-02,4.433861818544097555e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.062251645270823164e-01,-2.119083085123749655e-01,2.803297680988084406e-02,5.563693060926123979e-02,4.438612551831789821e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.042293848928312450e-01,-2.103703850418109089e-01,2.782933741704336078e-02,5.569936429334960970e-02,4.443332086910218304e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.022322689785442162e-01,-2.088347132720558275e-01,2.762600348928348146e-02,5.576138462790763278e-02,4.448020407386729103e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.002338255562408409e-01,-2.073012799719763655e-01,2.742297265355748970e-02,5.582299141454128560e-02,4.452677497068658896e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.982340634037716764e-01,-2.057700665582694322e-01,2.722024230485868904e-02,5.588418445637013260e-02,4.457303339956003585e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.962329913047793695e-01,-2.042410588954778616e-01,2.701781046890668753e-02,5.594496355842987906e-02,4.461897920195037615e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.942306180486604639e-01,-2.027142422537059618e-01,2.681567488563320334e-02,5.600532852839856107e-02,4.466461222174009094e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.922269524305266541e-01,-2.011895995412427696e-01,2.661383322897988707e-02,5.606527917567207669e-02,4.470993230438357058e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.902220032511660386e-01,-1.996671153627049000e-01,2.641228334575971420e-02,5.612481531178513394e-02,4.475493929709080665e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.882157793170046500e-01,-1.981467750797328287e-01,2.621102332289485318e-02,5.618393675076923527e-02,4.479963304895746568e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.862082894400676536e-01,-1.966285630879540391e-01,2.601005043339884096e-02,5.624264330864365419e-02,4.484401341109501282e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.841995424379405444e-01,-1.951124632100794121e-01,2.580936282216150096e-02,5.630093480275954326e-02,4.488808023617233967e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.821895471337307892e-01,-1.935984621876825629e-01,2.560895860330727331e-02,5.635881105402898439e-02,4.493183337891250573e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.801783123560285804e-01,-1.920865429480611009e-01,2.540883486289282137e-02,5.641627188436292711e-02,4.497527269593218358e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.781658469388685329e-01,-1.905766878036706247e-01,2.520898975430542130e-02,5.647331711743162203e-02,4.501839804505479159e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.761521597216903823e-01,-1.890688830224739048e-01,2.500942089349093844e-02,5.652994657912091547e-02,4.506120928573885687e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.741372595493007380e-01,-1.875631161920616741e-01,2.481012649380036003e-02,5.658616009737706815e-02,4.510370627987671521e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.721211552718337257e-01,-1.860593684942971615e-01,2.461110428078539317e-02,5.664195750282323416e-02,4.514588889103831604e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.701038557447121846e-01,-1.845576241142625473e-01,2.441235163488455692e-02,5.669733862703271038e-02,4.518775698384284167e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.680853698286093101e-01,-1.830578691791947266e-01,2.421386681090191215e-02,5.675230330364224030e-02,4.522931042493445464e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.660657063894090180e-01,-1.815600886797041624e-01,2.401564756127596428e-02,5.680685136873430546e-02,4.527054908273340228e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.640448742981674202e-01,-1.800642671622897928e-01,2.381769177180428562e-02,5.686098266015291586e-02,4.531147282748982374e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.620228824310735782e-01,-1.785703883700370531e-01,2.361999707961173062e-02,5.691469701754744981e-02,4.535208153080649840e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.599997396694109231e-01,-1.770784385958131812e-01,2.342256172754215993e-02,5.696799428252500436e-02,4.539237506619213658e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.579754548995178420e-01,-1.755884001964949648e-01,2.322538344234151286e-02,5.702087429925957462e-02,4.543235330857878784e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.559500370127490432e-01,-1.741002595899645011e-01,2.302845973825099271e-02,5.707333691293405703e-02,4.547201613451030133e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.539234949054359203e-01,-1.726139980823635511e-01,2.283178825490756944e-02,5.712538197009878194e-02,4.551136342195298279e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.518958374788481391e-01,-1.711296037276920368e-01,2.263536736588295586e-02,5.717700931925706609e-02,4.555039505028627977e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.498670736391540859e-01,-1.696470614896304907e-01,2.243919505838475048e-02,5.722821881199047916e-02,4.558911090119163179e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.478372122973819258e-01,-1.681663548468704528e-01,2.224326896845690502e-02,5.727901030109465441e-02,4.562751085753935509e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.458062623693803850e-01,-1.666874683208811048e-01,2.204758692130679223e-02,5.732938364122118491e-02,4.566559480371873980e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.437742327757797534e-01,-1.652103871010052738e-01,2.185214663216566294e-02,5.737933868823724692e-02,4.570336262559021590e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.417411324419524721e-01,-1.637350954190754659e-01,2.165694621311077886e-02,5.742887530017841413e-02,4.574081421064441488e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.397069702979740813e-01,-1.622615795338143985e-01,2.146198377606614929e-02,5.747799333712829556e-02,4.577794944783317721e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.376717552785839183e-01,-1.607898235842844803e-01,2.126725722311151312e-02,5.752669266168103368e-02,4.581476822799553372e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.356354963231459820e-01,-1.593198097382980127e-01,2.107276393411300006e-02,5.757497313707032882e-02,4.585127044274908137e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.335982023756095483e-01,-1.578515262452400980e-01,2.087850231302905424e-02,5.762283462862888977e-02,4.588745598549832105e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.315598823844699505e-01,-1.563849558053212740e-01,2.068446989814960724e-02,5.767027700350901148e-02,4.592332475125480706e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.295205453027292508e-01,-1.549200848265611385e-01,2.049066490148236663e-02,5.771730013047198654e-02,4.595887663632939879e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.274802000878569097e-01,-1.534568969778068170e-01,2.029708518632964559e-02,5.776390388044456980e-02,4.599411153869753521e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.254388557017504569e-01,-1.519953765383946220e-01,2.010372848844975413e-02,5.781008812579001982e-02,4.602902935737563372e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.233965211106962168e-01,-1.505355089774873534e-01,1.991059254658266234e-02,5.785585274001278644e-02,4.606362999297087257e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.213532052853296739e-01,-1.490772784086391023e-01,1.971767555653507892e-02,5.790119759860391790e-02,4.609791334738552182e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.193089172005963650e-01,-1.476206705884352544e-01,1.952497524902672282e-02,5.794612257877363587e-02,4.613187932400925617e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.172636658357122441e-01,-1.461656687673354438e-01,1.933248967407266011e-02,5.799062755934604463e-02,4.616552782752834649e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.152174601741243254e-01,-1.447122590055159586e-01,1.914021673160931636e-02,5.803471242086216675e-02,4.619885876386500834e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.131703092034713254e-01,-1.432604271180794031e-01,1.894815460959788436e-02,5.807837704591425898e-02,4.623187204071342871e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.111222219155439450e-01,-1.418101558308799970e-01,1.875630077103604756e-02,5.812162131844875873e-02,4.626456756690022210e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.090732073062456231e-01,-1.403614300697661188e-01,1.856465330381697545e-02,5.816444512353410867e-02,4.629694525230498292e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.070232743755528182e-01,-1.389142349350321326e-01,1.837321006269787907e-02,5.820684834814183334e-02,4.632900500824016499e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.049724321274755956e-01,-1.374685558145125330e-01,1.818196908247006471e-02,5.824883088069000847e-02,4.636074674736522239e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.029206895700181590e-01,-1.360243777711538926e-01,1.799092839724995146e-02,5.829039261153351470e-02,4.639217038379672142e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.008680557151390489e-01,-1.345816857603361061e-01,1.780008604755458149e-02,5.833153343280347153e-02,4.642327583288804460e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.988145395787117298e-01,-1.331404641573453451e-01,1.760943963882382821e-02,5.837225323799757892e-02,4.645406301135759364e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.967601501804849828e-01,-1.317006958175198084e-01,1.741898698014578487e-02,5.841255192143000180e-02,4.648453183673164624e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.947048965440432433e-01,-1.302623671121353466e-01,1.722872601188265088e-02,5.845242937893968538e-02,4.651468222784926887e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.926487876967668822e-01,-1.288254619597506700e-01,1.703865493376442025e-02,5.849188550802610770e-02,4.654451410477745688e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.905918326697928489e-01,-1.273899685604782950e-01,1.684877187418076794e-02,5.853092020850012706e-02,4.657402738915559781e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.885340404979746198e-01,-1.259558675252432647e-01,1.665907436681268033e-02,5.856953338116930446e-02,4.660322200382414071e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.864754202198428135e-01,-1.245231457369719991e-01,1.646956064364839614e-02,5.860772492818651369e-02,4.663209787226038072e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.844159808775653053e-01,-1.230917881279543075e-01,1.628022828696635768e-02,5.864549475328109662e-02,4.666065491979681790e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.823557315169075932e-01,-1.216617785568563231e-01,1.609107545546428417e-02,5.868284276105321245e-02,4.668889307247218734e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.802946811871931343e-01,-1.202331026046514206e-01,1.590210025064894492e-02,5.871976885848301936e-02,4.671681225775596302e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.782328389412633773e-01,-1.188057441705780426e-01,1.571330044877443724e-02,5.875627295352636725e-02,4.674441240401654429e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.761702138354382385e-01,-1.173796887671384598e-01,1.552467398081067845e-02,5.879235495569548609e-02,4.677169344095864489e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.741068149294762724e-01,-1.159549202453131200e-01,1.533621878356730694e-02,5.882801477538730089e-02,4.679865529915039346e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.720426512865346202e-01,-1.145314266511411327e-01,1.514793340452116541e-02,5.886325232562934773e-02,4.682529791076426307e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.699777319731296532e-01,-1.131091889205937395e-01,1.495981496759856040e-02,5.889806752027934578e-02,4.685162120894478255e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.679120660590968095e-01,-1.116881933829294710e-01,1.477186158211494550e-02,5.893246027371225326e-02,4.687762512753539945e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.658456626175507931e-01,-1.102684245048640710e-01,1.458407108239008140e-02,5.896643050174991962e-02,4.690330960178776265e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.637785307248458277e-01,-1.088498682818584140e-01,1.439644208179179270e-02,5.899997812221940280e-02,4.692867456796440129e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.617106794605358555e-01,-1.074325088353507290e-01,1.420897188646247714e-02,5.903310305451996837e-02,4.695371996366781708e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.596421179073343188e-01,-1.060163294593241129e-01,1.402165876232334962e-02,5.906580521837908465e-02,4.697844572705692223e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.575728551510746644e-01,-1.046013176087567664e-01,1.383450080589294050e-02,5.909808453611651441e-02,4.700285179762209720e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.555029002806702365e-01,-1.031874557145315147e-01,1.364749527807913518e-02,5.912994093017510944e-02,4.702693811597853379e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.534322623880743364e-01,-1.017747276833983472e-01,1.346064045156428537e-02,5.916137432386921885e-02,4.705070462319538849e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.513609505682403933e-01,-1.003631208316762696e-01,1.327393460538608093e-02,5.919238464274181877e-02,4.707415126167044384e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.492889739190818854e-01,-9.895262000248444823e-02,1.308737563746422902e-02,5.922297181380964393e-02,4.709727797514952630e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.472163415414325383e-01,-9.754320996386572906e-02,1.290096175509167914e-02,5.925313576554522982e-02,4.712008470830252316e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.451430625390061624e-01,-9.613487505149494183e-02,1.271469050271165427e-02,5.928287642765662363e-02,4.714257140680948033e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.430691460183568520e-01,-9.472759965562742301e-02,1.252855985418199064e-02,5.931219373017952018e-02,4.716473801712377512e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.409946010888388224e-01,-9.332136941751681403e-02,1.234256795479539946e-02,5.934108760513322894e-02,4.718658448684597828e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.389194368625665810e-01,-9.191616723244502851e-02,1.215671226379707107e-02,5.936955798497015740e-02,4.720811076433655828e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.368436624543747093e-01,-9.051197964596299772e-02,1.197099133040905108e-02,5.939760480355390365e-02,4.722931679888878187e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.347672869817779229e-01,-8.910879067824366284e-02,1.178540284273728832e-02,5.942522799652708715e-02,4.725020254094040029e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.326903195649310474e-01,-8.770658561681854426e-02,1.159994482249006309e-02,5.945242750010194810e-02,4.727076794174975283e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.306127693265889400e-01,-8.630534820066620982e-02,1.141461536789783841e-02,5.947920325217641302e-02,4.729101295350442435e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.285346453920664100e-01,-8.490506541227128834e-02,1.122941255323259800e-02,5.950555519195306620e-02,4.731093752935008889e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.264559568891981123e-01,-8.350572041249139377e-02,1.104433398579714003e-02,5.953148325970109705e-02,4.733054162362284045e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.243767129482984818e-01,-8.210729820484592711e-02,1.085937761270789302e-02,5.955698739600694819e-02,4.734982519110320642e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.222969227021216548e-01,-8.070978374068409067e-02,1.067454164102685232e-02,5.958206754322754178e-02,4.736878818782257472e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.202165952858213066e-01,-7.931316151393753844e-02,1.048982419885434621e-02,5.960672364507956650e-02,4.738743057039594975e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.181357398369105166e-01,-7.791741684859289363e-02,1.030522291380164997e-02,5.963095564657291264e-02,4.740575229665173751e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.160543654952217590e-01,-7.652253284862645111e-02,1.012073564334158710e-02,5.965476349262582856e-02,4.742375332486066797e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.139724814028665600e-01,-7.512849686628721158e-02,9.936360934148586593e-03,5.967814713050676595e-02,4.744143361450237628e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.118900967041955297e-01,-7.373529192288270917e-02,9.752096335256373039e-03,5.970110650832294308e-02,4.745879312587843457e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.098072205457580330e-01,-7.234290453802724730e-02,9.567940105783732940e-03,5.972364157538648938e-02,4.747583182021369308e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.077238620762621668e-01,-7.095131754981724714e-02,9.383889809724385925e-03,5.974575228173921448e-02,4.749254965953928487e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.056400304465344858e-01,-6.956051594429164819e-02,9.199943670298206186e-03,5.976743857849501490e-02,4.750894660651194257e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.035557348094797714e-01,-6.817048399115202550e-02,9.016099330133571163e-03,5.978870041755176418e-02,4.752502262454190718e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.014709843200409523e-01,-6.678120914771626393e-02,8.832355306031828418e-03,5.980953775228917135e-02,4.754077767819993028e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.938578813515881738e-02,-6.539267301028423018e-02,8.648709067146979901e-03,5.982995053717623385e-02,4.755621173286787018e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.730015541373175647e-02,-6.400486132075781931e-02,8.465158889939521786e-03,5.984993872736596443e-02,4.757132475424907248e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.521409531657563929e-02,-6.261776126938640996e-02,8.281702587381900224e-03,5.986950227931035068e-02,4.758611670950391725e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.312761700638348683e-02,-6.123135495497440683e-02,8.098338654525269117e-03,5.988864115089614221e-02,4.760058756652465473e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.104072964768519793e-02,-5.984562749675664123e-02,7.915064482379484748e-03,5.990735530111510054e-02,4.761473729351760054e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.895344240680749803e-02,-5.846056597034326313e-02,7.731878300019603099e-03,5.992564468925173221e-02,4.762856586008280546e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.686576445183338824e-02,-5.707615090914089362e-02,7.548777777099825899e-03,5.994350927593718875e-02,4.764207323622930979e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.477770495256206629e-02,-5.569236846309981448e-02,7.365760977182808471e-03,5.996094902218439271e-02,4.765525939234598884e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.268927308046866709e-02,-5.430920368820824767e-02,7.182825825226212338e-03,5.997796389028652031e-02,4.766812429988216970e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-8.060047800866380896e-02,-5.292664449565083473e-02,6.999970984560577487e-03,5.999455384406729430e-02,4.768066793141906845e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.851132891185348683e-02,-5.154467307981271118e-02,6.817194107232070824e-03,6.001071884924681632e-02,4.769289026052945801e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.642183496629867401e-02,-5.016327358370398226e-02,6.634493001930581550e-03,6.002645887133928326e-02,4.770479126088049693e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.433200534977503493e-02,-4.878243170036152682e-02,6.451865610122209035e-03,6.004177387703182184e-02,4.771637090715595275e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.224184924153258247e-02,-4.740213239449219546e-02,6.269310039469570134e-03,6.005666383377069456e-02,4.772762917479015377e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.015137582225546009e-02,-4.602236041228577323e-02,6.086824184815839076e-03,6.007112871036798807e-02,4.773856604010554006e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.806059427402150197e-02,-4.464309900914002760e-02,5.904406048185510333e-03,6.008516847618132484e-02,4.774918147988019834e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.596951378026195967e-02,-4.326433353008848209e-02,5.722053626621020211e-03,6.009878310185295253e-02,4.775947547152858408e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.387814352572118715e-02,-4.188604938561812235e-02,5.539765058033714433e-03,6.011197255899879133e-02,4.776944799338673775e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-6.178649269641621478e-02,-4.050823045309633746e-02,5.357537731555265907e-03,6.012473681996895192e-02,4.777909902447747270e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.969457047959648988e-02,-3.913086222449792262e-02,5.175369941914104849e-03,6.013707585707685210e-02,4.778842854447249433e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.760238606370348541e-02,-3.775392975062330514e-02,4.993259968842729025e-03,6.014898964516567775e-02,4.779743653397108827e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.550994863833031562e-02,-3.637741724073631500e-02,4.811205394477899820e-03,6.016047815920838593e-02,4.780612297420187562e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.341726739418143494e-02,-3.500130875535285846e-02,4.629204296987086444e-03,6.017154137522244539e-02,4.781448784697523990e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.132435152303221199e-02,-3.362558935233434659e-02,4.447254393567797587e-03,6.018217926952569574e-02,4.782253113471432449e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.923121021768859379e-02,-3.225024491601384546e-02,4.265354177562559988e-03,6.019239181998582711e-02,4.783025282076290852e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.713785267194667977e-02,-3.087525957128391119e-02,4.083501350679577188e-03,6.020217900557436885e-02,4.783765288927019910e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.504428808055239986e-02,-2.950061637017211899e-02,3.901693885977874313e-03,6.021154080580166240e-02,4.784473132470151713e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.295052563916110233e-02,-2.812630209817643112e-02,3.719929946605185587e-03,6.022047720120710795e-02,4.785148811227473153e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-4.085657454429714869e-02,-2.675230115589279090e-02,3.538207604266534190e-03,6.022898817385696768e-02,4.785792323817930627e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.876244399331353618e-02,-2.537859920661539662e-02,3.356524568471612284e-03,6.023707370615414419e-02,4.786403668925880428e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.666814318435148573e-02,-2.400517929440811049e-02,3.174879053393636109e-03,6.024473378164006687e-02,4.786982845308644374e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.457368131630006447e-02,-2.263202660776864050e-02,2.993268687944358915e-03,6.025196838441280317e-02,4.787529851753242749e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.247906758875574590e-02,-2.125912661386868130e-02,2.811691717030550663e-03,6.025877749948756884e-02,4.788044687157500534e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.038431120198204285e-02,-1.988646170443703065e-02,2.630145744043349901e-03,6.026516111256791791e-02,4.788527350442914887e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.828942135686907111e-02,-1.851401953296162237e-02,2.448629299798411700e-03,6.027111921038853098e-02,4.788977840615179504e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.619440725489314420e-02,-1.714178296834829862e-02,2.267139864685898017e-03,6.027665178096636156e-02,4.789396156767789514e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.409927809807633700e-02,-1.576973700367740802e-02,2.085675595429236241e-03,6.028175881220539795e-02,4.789782298022020046e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-2.200404308894610833e-02,-1.439786737261817412e-02,1.904234631074033183e-03,6.028644029399456844e-02,4.790136263601682542e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.990871143049483677e-02,-1.302615738834221083e-02,1.722814650919555265e-03,6.029069621629656872e-02,4.790458052777324949e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.781329232613943978e-02,-1.165459384057771983e-02,1.541414388233565370e-03,6.029452657088119061e-02,4.790747664899173142e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.571779497968089911e-02,-1.028315901099415182e-02,1.360031189844050897e-03,6.029793135058881398e-02,4.791005099386639321e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.362222859526389211e-02,-8.911838273879051434e-03,1.178663074470144177e-03,6.030091054802686618e-02,4.791230355682438713e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.152660237733631014e-02,-7.540617353338227927e-03,9.973080024681805263e-04,6.030346415711960312e-02,4.791423433338795057e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-9.430925530608870802e-03,-6.169480691597984366e-03,8.159640322160908749e-04,6.030559217234030683e-02,4.791584331963795740e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-7.335207260014662417e-03,-4.798413746078192786e-03,6.346294372857755675e-04,6.030729458985728542e-02,4.791713051251573763e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-5.239456770668723257e-03,-3.427399823227861558e-03,4.533019028824298202e-04,6.030857140647433862e-02,4.791809590941950026e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-3.143683267827608162e-03,-2.056424469099807882e-03,2.719793771797988338e-04,6.030942261957650263e-02,4.791873950839450336e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +-1.047895956848953446e-03,-6.854712333548786032e-04,9.065978101642293576e-05,6.030984822713496979e-02,4.791906130808503694e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +1.047895956848953446e-03,6.854747609913691394e-04,-9.065866544552002890e-05,6.030984822848513283e-02,4.791906130767911165e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.143683267827608162e-03,2.056428449478961765e-03,-2.719785003395683103e-04,6.030942262325027919e-02,4.791873950710415775e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.239456770668723257e-03,3.427403443994493822e-03,-4.533008805037072315e-04,6.030857141229080398e-02,4.791809590721641809e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.335207260014662417e-03,4.798417281660659339e-03,-6.346284481651142144e-04,6.030729459817749799e-02,4.791713050950793251e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.430925530608870802e-03,6.169485654641921994e-03,-8.159633960109273455e-04,6.030559218249947895e-02,4.791584331563709109e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.152660237733631014e-02,7.540622598948598480e-03,-9.973071293973953706e-04,6.030346416895229766e-02,4.791423432815805072e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.362222859526389211e-02,8.911843003398844595e-03,-1.178661748546046013e-03,6.030091056243134379e-02,4.791230355041420363e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.571779497968089911e-02,1.028316228729233943e-02,-1.360029514163022717e-03,6.029793136857455188e-02,4.791005098650062410e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.781329232613943978e-02,1.165459635627093260e-02,-1.541412635191230377e-03,6.029452659290954891e-02,4.790747664098173875e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.990871143049483677e-02,1.302615996569733703e-02,-1.722812738744650737e-03,6.029069624259203453e-02,4.790458051915872928e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.200404308894610833e-02,1.439786981379585079e-02,-1.904232322297100526e-03,6.028644032518190587e-02,4.790136262681881640e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.409927809807633700e-02,1.576973926270058488e-02,-2.085672765948306303e-03,6.028175884945143059e-02,4.789782297045768744e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.619440725489314420e-02,1.714178460696852077e-02,-2.267136706467506348e-03,6.027665182525876464e-02,4.789396155746517558e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.828942135686907111e-02,1.851402190561390534e-02,-2.448626042676379112e-03,6.027111926229975630e-02,4.788977839550367377e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.038431120198204285e-02,1.988646500052378835e-02,-2.630142701762513795e-03,6.026516117184953225e-02,4.788527349308006609e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.247906758875574590e-02,2.125912881813637328e-02,-2.811688582411823317e-03,6.025877756597491225e-02,4.788044685956529545e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.457368131630006447e-02,2.263202883428868870e-02,-2.993265429452636808e-03,6.025196845838190485e-02,4.787529850503589590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.666814318435148573e-02,2.400518130956883628e-02,-3.174875571334434803e-03,6.024473386354666643e-02,4.786982844006500426e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.876244399331353618e-02,2.537860155423338551e-02,-3.356521244286623379e-03,6.023707379608985585e-02,4.786403667577117660e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.085657454429714869e-02,2.675230446225179692e-02,-3.538204224383324509e-03,6.022898827158363472e-02,4.785792322399530785e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.295052563916110233e-02,2.812630480836791555e-02,-3.719926481041102486e-03,6.022047730698060208e-02,4.785148809737831965e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.504428808055239986e-02,2.950061849150538082e-02,-3.901690119280616393e-03,6.021154091998141833e-02,4.784473130922929918e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.713785267194667977e-02,3.087526092030303329e-02,-4.083497294704225601e-03,6.020217912899699514e-02,4.783765287341206207e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.923121021768859379e-02,3.225024762909983050e-02,-4.265350150322039756e-03,6.019239195292341166e-02,4.783025280446712713e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.132435152303221199e-02,3.362559277340006897e-02,-4.447250359840389071e-03,6.018217941184580283e-02,4.782253111763973830e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.341726739418143494e-02,3.500131064575360412e-02,-4.629199951469096727e-03,6.017154152737080874e-02,4.781448782927493757e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.550994863833031562e-02,3.637741917900112754e-02,-4.811201139551473001e-03,6.016047832150963476e-02,4.780612295609048545e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.760238606370348541e-02,3.775393176035016923e-02,-4.993255713898068528e-03,6.014898981738932976e-02,4.779743651537158855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.969457047959648988e-02,3.913086390949568200e-02,-5.175365825669199157e-03,6.013707603916203931e-02,4.778842852544841202e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.178649269641621478e-02,4.050823135640630596e-02,-5.357533553397799200e-03,6.012473701168480344e-02,4.777909900512523067e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.387814352572118715e-02,4.188604961049436498e-02,-5.539760974775389223e-03,6.011197276051709337e-02,4.776944797393812281e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.596951378026195967e-02,4.326433459197555342e-02,-5.722049922363203929e-03,6.009878331247212740e-02,4.775947545193567345e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.806059427402150197e-02,4.464310048664835162e-02,-5.904402781410535671e-03,6.008516869500769159e-02,4.774918145998121588e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.015137582225546009e-02,4.602236281820842878e-02,-6.086821341424686338e-03,6.007112893630821293e-02,4.773856601978069270e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.224184924153258247e-02,4.740213706227016516e-02,-6.269307615891953761e-03,6.005666406589427880e-02,4.772762915364283098e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.433200534977503493e-02,4.878243696327538154e-02,-6.451863321515293077e-03,6.004177411457645164e-02,4.771637088479507849e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.642183496629867401e-02,5.016327897555730719e-02,-6.634490957387815559e-03,6.002645911407106932e-02,4.770479123730140825e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.851132891185348683e-02,5.154467883640012627e-02,-6.817192387139359329e-03,6.001071909629291518e-02,4.769289023562121033e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +8.060047800866380896e-02,5.292665060381950698e-02,-6.999969452851268620e-03,5.999455409496794872e-02,4.768066790515457232e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +8.268927308046866709e-02,5.430920996338164941e-02,-7.182824219572654309e-03,5.997796414468306581e-02,4.766812427211122305e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +8.477770495256206629e-02,5.569237252942919186e-02,-7.365758828565563410e-03,5.996094928101405180e-02,4.765525936336483803e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +8.686576445183338824e-02,5.707615367913236593e-02,-7.548775174824592059e-03,5.994350954025812889e-02,4.764207320645461041e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +8.895344240680749803e-02,5.846056801945608006e-02,-7.731875233204669150e-03,5.992564496032021237e-02,4.762856582978836073e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.104072964768519793e-02,5.984563106063635340e-02,-7.915061422113265494e-03,5.990735557933184879e-02,4.761473726257019479e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.312761700638348683e-02,6.123135831775226162e-02,-8.098335585819540355e-03,5.988864143636272314e-02,4.760058753474242232e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.521409531657563929e-02,6.261776555155813828e-02,-8.281699757242515225e-03,5.986950257157396676e-02,4.758611667686226676e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.730015541373175647e-02,6.400486539448353718e-02,-8.465155550198831189e-03,5.984993902677100686e-02,4.757132472057025163e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.938578813515881738e-02,6.539267549336494345e-02,-8.648705479563796247e-03,5.982995084474065917e-02,4.755621169843527452e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.014709843200409523e-01,6.678121103292931904e-02,-8.832351530160939893e-03,5.980953806846932980e-02,4.754077764327613864e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.035557348094797714e-01,6.817048589812216286e-02,-9.016095544571563203e-03,5.978870074254026556e-02,4.752502258917404854e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.056400304465344858e-01,6.956051677978937475e-02,-9.199939475686775420e-03,5.976743891275260873e-02,4.750894657079391958e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.077238620762621668e-01,7.095131723394508538e-02,-9.383885534661080829e-03,5.974575262597582592e-02,4.749254962379025335e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.098072205457580330e-01,7.234290502458867733e-02,-9.567935802927253833e-03,5.972364192960674667e-02,4.747583178447856156e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.118900967041955297e-01,7.373529335972714427e-02,-9.752092143236504654e-03,5.970110687247916909e-02,4.745879308988799061e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.139724814028665600e-01,7.512849768855255073e-02,-9.936356608610140070e-03,5.967814750453587919e-02,4.744143357823944473e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.160543654952217590e-01,7.652253382835033646e-02,-1.012073146441640724e-02,5.965476387668151165e-02,4.742375328840672810e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.181357398369105166e-01,7.791741785084078942e-02,-1.030521876097525644e-02,5.963095604020924451e-02,4.740575225995360964e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.202165952858213066e-01,7.931316242031656583e-02,-1.048982002198305813e-02,5.960672404855076739e-02,4.738743053347736489e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.222969227021216548e-01,8.070978470654550929e-02,-1.067453780453226676e-02,5.958206795599334943e-02,4.736878815070322268e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.243767129482984818e-01,8.210730087162364244e-02,-1.085937383922006179e-02,5.955698781762158722e-02,4.734982515358166499e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.264559568891981123e-01,8.350572361939204158e-02,-1.104433030182780037e-02,5.953148369000321738e-02,4.733054158537646772e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.285346453920664100e-01,8.490506951728006568e-02,-1.122940921558185194e-02,5.950555563053082364e-02,4.731093749029268158e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.306127693265889400e-01,8.630535268830184792e-02,-1.141461253812714287e-02,5.947920369785541367e-02,4.729101291338755342e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.326903195649310474e-01,8.770658942120304613e-02,-1.159994210815449775e-02,5.945242795221254706e-02,4.727076790070115497e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.347672869817779229e-01,8.910879465888911499e-02,-1.178540019169795235e-02,5.942522845482711008e-02,4.725020249896388913e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.368436624543747093e-01,9.051198335620923296e-02,-1.197098856783196386e-02,5.939760526818574360e-02,4.722931675604810087e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.389194368625665810e-01,9.191617150323443119e-02,-1.215670954310833773e-02,5.936955845592300357e-02,4.720811072053721635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.409946010888388224e-01,9.332137252084969636e-02,-1.234256494580932874e-02,5.934108808277663294e-02,4.718658444219787640e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.430691460183568520e-01,9.472760358246333501e-02,-1.252855719337381615e-02,5.931219421449698131e-02,4.716473797168099225e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.451430625390061624e-01,9.613487868505735112e-02,-1.271468788043745787e-02,5.928287691801660542e-02,4.714257136046221541e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.472163415414325383e-01,9.754321435376631100e-02,-1.290095960130081647e-02,5.925313626163155462e-02,4.712008466106443749e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.492889739190818854e-01,9.895262492008678745e-02,-1.308737385571736439e-02,5.922297231425409370e-02,4.709727792679114788e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.513609505682403933e-01,1.003631250454115470e-01,-1.327393252509790657e-02,5.919238514767105608e-02,4.707415121224636789e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.534322623880743364e-01,1.017747308118250871e-01,-1.346063812147490005e-02,5.916137483387539503e-02,4.705070457291373187e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.555029002806702365e-01,1.031874581038856598e-01,-1.364749246452636421e-02,5.912994144613228226e-02,4.702693806507113883e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.575728551510746644e-01,1.046013199586123682e-01,-1.383449775002701908e-02,5.909808505898361125e-02,4.700285174618417661e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.596421179073343188e-01,1.060163322481383202e-01,-1.402165601595431639e-02,5.906580574801337247e-02,4.697844567500299995e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.617106794605358555e-01,1.074325101813600053e-01,-1.420896898287674764e-02,5.903310359055119472e-02,4.695371991112275434e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.637785307248458277e-01,1.088498707868761906e-01,-1.439643918809653186e-02,5.899997866515128281e-02,4.692867451504336151e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.658456626175507931e-01,1.102684272855219971e-01,-1.458406835881698345e-02,5.896643105100837084e-02,4.690330954817072961e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.679120660590968095e-01,1.116881954729014026e-01,-1.477185839880362783e-02,5.893246082984680834e-02,4.687762507341125540e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.699777319731296532e-01,1.131091913986687209e-01,-1.495981177825260119e-02,5.889806808381281567e-02,4.685162115425001716e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.720426512865346202e-01,1.145314283785666687e-01,-1.514793020815815086e-02,5.886325289660816751e-02,4.682529785561891367e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.741068149294762724e-01,1.159549232084002707e-01,-1.533621608078051396e-02,5.882801535321919434e-02,4.679865524345880878e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.761702138354382385e-01,1.173796911935669485e-01,-1.552467135425936694e-02,5.879235553957372351e-02,4.677169338461998338e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.782328389412633773e-01,1.188057465533184087e-01,-1.571329787967534752e-02,5.875627354347870424e-02,4.674441234714233340e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.802946811871931343e-01,1.202331054181298292e-01,-1.590209791843879769e-02,5.871976945408630133e-02,4.671681220029097470e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.823557315169075932e-01,1.216617817484327846e-01,-1.609107321293050050e-02,5.868284336188336892e-02,4.668889301428590932e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.844159808775653053e-01,1.230917900903400336e-01,-1.628022581030419413e-02,5.864549535951524978e-02,4.666065486101493853e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.864754202198428135e-01,1.245231484245078180e-01,-1.646955806402488134e-02,5.860772554032116916e-02,4.663209781298175982e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.885340404979746198e-01,1.259558701680327519e-01,-1.665907184648451195e-02,5.856953399917031594e-02,4.660322194388382133e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.905918326697928489e-01,1.273899708533593189e-01,-1.684876936213177898e-02,5.853092083230820863e-02,4.657402732867715889e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.926487876967668822e-01,1.288254638299667076e-01,-1.703865243341457295e-02,5.849188613758220939e-02,4.654451404377795698e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.947048965440432433e-01,1.302623676269376585e-01,-1.722872331741932828e-02,5.845243001445341730e-02,4.651468216660459842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.967601501804849828e-01,1.317006971979113039e-01,-1.741898426942718042e-02,5.841255256323682477e-02,4.648453177529163205e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.988145395787117298e-01,1.331404656655899299e-01,-1.760943705989782143e-02,5.837225388587238828e-02,4.645406294955885529e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.008680557151390489e-01,1.345816880349100519e-01,-1.780008370370781898e-02,5.833153408641770371e-02,4.642327577069808586e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.029206895700181590e-01,1.360243817007422973e-01,-1.799092658613661230e-02,5.829039326991071468e-02,4.639217032087088466e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.049724321274755956e-01,1.374685596987106073e-01,-1.818196739685654095e-02,5.824883154299131949e-02,4.636074668352201944e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.070232743755528182e-01,1.389142391263301357e-01,-1.837320834405982775e-02,5.820684901437123138e-02,4.632900494348609066e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.090732073062456231e-01,1.403614338537499118e-01,-1.856465152058454568e-02,5.816444579374483587e-02,4.629694518659491220e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.111222219155439450e-01,1.418101584027957052e-01,-1.875629882861011566e-02,5.812162199296221077e-02,4.626456750048629774e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.131703092034713254e-01,1.432604306848545284e-01,-1.894815281214942693e-02,5.807837772477195126e-02,4.623187197362956802e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.152174601741243254e-01,1.447122635127089940e-01,-1.914021508442551728e-02,5.803471310362891267e-02,4.619885869582952553e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.172636658357122441e-01,1.461656730467849885e-01,-1.933248798246862618e-02,5.799062824590212745e-02,4.616552775846928802e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.193089172005963650e-01,1.476206743062795368e-01,-1.952497334548912444e-02,5.794612326944416358e-02,4.613187925404407808e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.213532052853296739e-01,1.490772823792799184e-01,-1.971767367511259236e-02,5.790119829365454607e-02,4.609791327654353954e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.233965211106962168e-01,1.505355119126157992e-01,-1.991059032179470084e-02,5.785585343960384513e-02,4.606362992130673129e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.254388557017504569e-01,1.519953793096984473e-01,-2.010372579015734518e-02,5.781008883115720809e-02,4.602902928511649616e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.274802000878569097e-01,1.534569008415848368e-01,-2.029708268956399025e-02,5.776390459176871522e-02,4.599411146565559605e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.295205453027292508e-01,1.549200882458502682e-01,-2.049066222071078969e-02,5.771730084762189544e-02,4.595887656244328490e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.315598823844699505e-01,1.563849589075267110e-01,-2.068446684286013443e-02,5.767027772720312306e-02,4.592332467662965656e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.335982023756095483e-01,1.578515295941025909e-01,-2.087849893061297438e-02,5.762283535970885023e-02,4.588745591015492842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.356354963231459820e-01,1.593198130707392246e-01,-2.107276035148326898e-02,5.757497387607745515e-02,4.585127036660529565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.376717552785839183e-01,1.607898254506195390e-01,-2.126725332120013232e-02,5.752669340924164432e-02,4.581476815126965252e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.397069702979740813e-01,1.622615819528470116e-01,-2.146197981427999668e-02,5.747799409367922163e-02,4.577794937065199909e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.417411324419524721e-01,1.637350978520245581e-01,-2.165694200207902292e-02,5.742887606597789352e-02,4.574081413287562348e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.437742327757797534e-01,1.652103891665768209e-01,-2.185214211471383020e-02,5.737933946400335106e-02,4.570336254732791370e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.458062623693803850e-01,1.666874705304931814e-01,-2.204758222964059780e-02,5.732938442748684166e-02,4.566559472496520833e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.478372122973819258e-01,1.681663570057046198e-01,-2.224326418454930857e-02,5.727901109814505232e-02,4.562751077829211299e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.498670736391540859e-01,1.696470640134664365e-01,-2.243919036500047551e-02,5.722821961989455614e-02,4.558911082142249049e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.518958374788481391e-01,1.711296069509967155e-01,-2.263536311998770936e-02,5.717701013731647797e-02,4.555039496986086345e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.539234949054359203e-01,1.726140007734825454e-01,-2.283178395835683966e-02,5.712538279772648442e-02,4.551136334081948287e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.559500370127490432e-01,1.741002611459073479e-01,-2.302845518476683007e-02,5.707333775065390191e-02,4.547201605291559257e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.579754548995178420e-01,1.755884016455049601e-01,-2.322537893398885292e-02,5.702087514731020434e-02,4.543235322664313514e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.599997396694109231e-01,1.770784394560060149e-01,-2.342255751405434086e-02,5.696799514049347146e-02,4.539237498399874005e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.620228824310735782e-01,1.785703902483214700e-01,-2.361999306783127070e-02,5.691469788477998559e-02,4.535208144833859367e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.640448742981674202e-01,1.800642699380346212e-01,-2.381768780869261692e-02,5.686098353642176501e-02,4.531147274446328810e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.660657063894090180e-01,1.815600917557494698e-01,-2.401564373875895997e-02,5.680685225383465958e-02,4.527054899905918472e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.680853698286093101e-01,1.830578731922526681e-01,-2.421386323019787332e-02,5.675230419712483376e-02,4.522931034045885590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.701038557447121846e-01,1.845576285401729633e-01,-2.441234816440078850e-02,5.669733952843732105e-02,4.518775689839423793e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.721211552718337257e-01,1.860593720616441349e-01,-2.461110091150186391e-02,5.664195841201129233e-02,4.514588880466903764e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.741372595493007380e-01,1.875631197849217391e-01,-2.481012344274932668e-02,5.658616101379596725e-02,4.510370619274101101e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.761521597216903823e-01,1.890688870376776576e-01,-2.500941773853820188e-02,5.652994750244640099e-02,4.506120919770955635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.781658469388685329e-01,1.905766906632992974e-01,-2.520898641842213989e-02,5.647331804813801592e-02,4.501839795624483775e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.801783123560285804e-01,1.920865450380199457e-01,-2.540883159248652837e-02,5.641627282252113101e-02,4.497527260657428472e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.821895471337307892e-01,1.935984650753732605e-01,-2.560895539201766755e-02,5.635881199950226045e-02,4.493183328904711282e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.841995424379405444e-01,1.951124678515837318e-01,-2.580935970080866851e-02,5.630093575532454930e-02,4.488808014541809666e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.862082894400676536e-01,1.966285667750598543e-01,-2.601004698313701471e-02,5.624264426852474547e-02,4.484401331938780988e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.882157793170046500e-01,1.981467794609510225e-01,-2.621101967440763597e-02,5.618393771874497650e-02,4.479963295638212384e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.902220032511660386e-01,1.996671197572329193e-01,-2.641227986265418104e-02,5.612481628775087272e-02,4.475493920348277976e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.922269524305266541e-01,2.011896031559197440e-01,-2.661382951655558854e-02,5.606528015964880318e-02,4.470993220989826766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.942306180486604639e-01,2.027142464716726877e-01,-2.681567097833721311e-02,5.600532952097460326e-02,4.466461212639095124e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.962329913047793695e-01,2.042410626743020319e-01,-2.701780629629586816e-02,5.594496455999183049e-02,4.461897910565180148e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +2.982340634037716764e-01,2.057700680602219945e-01,-2.722023773886143166e-02,5.588418546777970675e-02,4.457303330267901598e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.002338255562408409e-01,2.073012815480589355e-01,-2.742296805515838029e-02,5.582299243625220980e-02,4.452677487350752417e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.022322689785442162e-01,2.088347153232777142e-01,-2.762599897870952545e-02,5.576138565984863765e-02,4.448020397626564759e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.042293848928312450e-01,2.103703871487425103e-01,-2.782933306819604727e-02,5.569936533514848603e-02,4.443332077102758459e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.062251645270823164e-01,2.119083095145637974e-01,-2.803297218636766758e-02,5.563693166104280441e-02,4.438612541987747018e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.082195991151471803e-01,2.134485018703365933e-01,-2.823691900299052718e-02,5.557408483855959258e-02,4.433861808682850736e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.102126798967836940e-01,2.149909784497472387e-01,-2.844117569559226344e-02,5.551082507039734742e-02,4.429079893731609485e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.122043981176959027e-01,2.165357553516893407e-01,-2.864574465325376434e-02,5.544715256144733956e-02,4.424266813906134188e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.141947450295727307e-01,2.180828463205686518e-01,-2.885062776522989703e-02,5.538306751903194353e-02,4.419422586170219658e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.161837118901262844e-01,2.196322679155873203e-01,-2.905582741483072468e-02,5.531857015312308112e-02,4.414547227718753075e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.181712899631304881e-01,2.211840347327033718e-01,-2.926134586396791221e-02,5.525366067567348549e-02,4.409640755921143129e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.201574705184592196e-01,2.227381634549834288e-01,-2.946718569108478306e-02,5.518833930065263765e-02,4.404703188379451295e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.221422448321247800e-01,2.242946715658937495e-01,-2.967334899066173190e-02,5.512260624398033354e-02,4.399734542832726691e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.241256041863160853e-01,2.258535729950193471e-01,-2.987983802558838442e-02,5.505646172437274333e-02,4.394734837247531445e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.261075398694370242e-01,2.274148845176577616e-01,-3.008665522962067698e-02,5.498990596248877483e-02,4.389704089792426656e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.280880431761448723e-01,2.289786216750558656e-01,-3.029380284429342499e-02,5.492293918131604946e-02,4.384642318838969377e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.300671054073883171e-01,2.305448006071216249e-01,-3.050128344425365526e-02,5.485556160596154890e-02,4.379549542959070285e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.320447178704457047e-01,2.321134371676186192e-01,-3.070909958222201031e-02,5.478777346313759566e-02,4.374425780936302410e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.340208718789632880e-01,2.336845479050756458e-01,-3.091725327042185029e-02,5.471957498205245324e-02,4.369271051748098156e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.359955587529934729e-01,2.352581473627587683e-01,-3.112574677782346116e-02,5.465096639473596835e-02,4.364085374593722477e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.379687698190326217e-01,2.368342511099247039e-01,-3.133458224315384399e-02,5.458194793583626137e-02,4.358868768907586677e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.399404964100594673e-01,2.384128763964959985e-01,-3.154376231425491672e-02,5.451251984233561332e-02,4.353621254324024914e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.419107298655730265e-01,2.399940375801838111e-01,-3.175328911793701830e-02,5.444268235326474303e-02,4.348342850689054795e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.438794615316307368e-01,2.415777513318667635e-01,-3.196316515661957319e-02,5.437243571059034025e-02,4.343033578094347424e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.458466827608864258e-01,2.431640355833207812e-01,-3.217339344653392247e-02,5.430178015738422459e-02,4.337693456800085778e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.478123849126281697e-01,2.447529062290572932e-01,-3.238397614272162106e-02,5.423071593877825530e-02,4.332322507279112722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.497765593528163741e-01,2.463443786762334053e-01,-3.259491543159840493e-02,5.415924330282332472e-02,4.326920750221330825e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.517391974541219102e-01,2.479384675416138606e-01,-3.280621345778825942e-02,5.408736250060223322e-02,4.321488206582489444e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.537002905959633070e-01,2.495351916171516249e-01,-3.301787296575132286e-02,5.401507378552007632e-02,4.316024897512913405e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.556598301645456095e-01,2.511345647133128578e-01,-3.322989644248092872e-02,5.394237741286669885e-02,4.310530844387363003e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.576178075528972378e-01,2.527366046290835389e-01,-3.344228643901920456e-02,5.386927364028224613e-02,4.305006068824637211e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.595742141609086784e-01,2.543413274864233520e-01,-3.365504534451298907e-02,5.379576272769779482e-02,4.299450592640328139e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.615290413953694548e-01,2.559487496383298089e-01,-3.386817562897619094e-02,5.372184493764461249e-02,4.293864437891308228e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.634822806700064302e-01,2.575588867537079163e-01,-3.408167950818773856e-02,5.364752053524898118e-02,4.288247626861033113e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.654339234055213326e-01,2.591717558325584148e-01,-3.429555963231729898e-02,5.357278978849697865e-02,4.282600182078036277e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.673839610296283364e-01,2.607873740505967097e-01,-3.450981881419831754e-02,5.349765296688435007e-02,4.276922126268978830e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.693323849770918654e-01,2.624057557378959848e-01,-3.472445920728649965e-02,5.342211034273044307e-02,4.271213482425313401e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.712791866897643955e-01,2.640269205218550019e-01,-3.493948376002410650e-02,5.334616219065099058e-02,4.265474273759598534e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.732243576166232035e-01,2.656508839007367673e-01,-3.515489472246697417e-02,5.326980878748070747e-02,4.259704523682206756e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.751678892138092802e-01,2.672776605509011860e-01,-3.537069476714289201e-02,5.319305041293420633e-02,4.253904255891426400e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.771097729446636349e-01,2.689072679467470750e-01,-3.558688654707199001e-02,5.311588734877265711e-02,4.248073494330588118e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.790500002797654311e-01,2.705397237135994670e-01,-3.580347276956746122e-02,5.303831987911117318e-02,4.242212263158762764e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.809885626969692352e-01,2.721750436802548578e-01,-3.602045593280902430e-02,5.296034829041187941e-02,4.236320586776475827e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.829254516814425413e-01,2.738132456890615685e-01,-3.623783875729245235e-02,5.288197287173847932e-02,4.230398489828862130e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.848606587257031308e-01,2.754543453668589215e-01,-3.645562379336526188e-02,5.280319391443789173e-02,4.224445997194619107e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.867941753296565421e-01,2.770983595369576058e-01,-3.667381385851860992e-02,5.272401171242688256e-02,4.218463134026673722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.887259930006330966e-01,2.787453054600792512e-01,-3.689241155115186022e-02,5.264442656170708473e-02,4.212449925704095932e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.906561032534255906e-01,2.803952005184612850e-01,-3.711141968689948140e-02,5.256443876102952911e-02,4.206406397860112389e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.925844976103263773e-01,2.820480625004357367e-01,-3.733084123853901842e-02,5.248404861078097977e-02,4.200332576344978364e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.945111676011643920e-01,2.837039071700639403e-01,-3.755067907888199469e-02,5.240325641371972737e-02,4.194228487281337903e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.964361047633429003e-01,2.853627527633899597e-01,-3.777093617589073055e-02,5.232206247416559819e-02,4.188094157030751719e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +3.983593006418760796e-01,2.870246142047925830e-01,-3.799161474975294023e-02,5.224046709971414543e-02,4.181929612227784365e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.002807467894267113e-01,2.886895107290988194e-01,-3.821271838687029243e-02,5.215847059987341328e-02,4.175734879762498797e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.022004347663427071e-01,2.903574590126544175e-01,-3.843424958853332990e-02,5.207607328597316926e-02,4.169509986756030351e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.041183561406945790e-01,2.920284763996272570e-01,-3.865621171308986775e-02,5.199327547171704589e-02,4.163254960598282706e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.060345024883124099e-01,2.937025792878419361e-01,-3.887860736517938232e-02,5.191007747260024952e-02,4.156969828943128942e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.079488653928228237e-01,2.953797866139650785e-01,-3.910143993469841761e-02,5.182647960668260861e-02,4.150654619703254555e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.098614364456857340e-01,2.970601156396797116e-01,-3.932471260791051137e-02,5.174248219296453954e-02,4.144309361023295057e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.117722072462318139e-01,2.987435831191847524e-01,-3.954842830088801942e-02,5.165808555300734767e-02,4.137934081334503911e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.136811694016988561e-01,3.004302080854056589e-01,-3.977259045749894500e-02,5.157329000989293449e-02,4.131528809314354289e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.155883145272687984e-01,3.021200072973992401e-01,-3.999720212632235788e-02,5.148809588857734126e-02,4.125093573897542498e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.174936342461046945e-01,3.038129991885418213e-01,-4.022226676820302854e-02,5.140250351575156862e-02,4.118628404292204448e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.193971201893874623e-01,3.055092014432720382e-01,-4.044778778045935580e-02,5.131651321948338396e-02,4.112133329953633898e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.212987639963524100e-01,3.072086309592993447e-01,-4.067376845019737147e-02,5.123012532948320596e-02,4.105608380627682741e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.231985573143263735e-01,3.089113067592595963e-01,-4.090021215871862681e-02,5.114334017727762555e-02,4.099053586318297526e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.250964917987639091e-01,3.106172470277812625e-01,-4.112712271576896700e-02,5.105615809555211215e-02,4.092468977290040777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.269925591132845422e-01,3.123264711964581219e-01,-4.135450369551391808e-02,5.096857941790970681e-02,4.085854584052620586e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.288867509297087377e-01,3.140389956520988624e-01,-4.158235857898116145e-02,5.088060447956009591e-02,4.079210437397080002e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.307790589280949822e-01,3.157548410056558730e-01,-4.181069120346580376e-02,5.079223361682413257e-02,4.072536568381985855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.326694747967760879e-01,3.174740249247599566e-01,-4.203950515987493058e-02,5.070346716718149294e-02,4.065833008309717722e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.345579902323956079e-01,3.191965674283254728e-01,-4.226880448887284508e-02,5.061430546914671286e-02,4.059099788760023308e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.364445969399446956e-01,3.209224868507172812e-01,-4.249859326307561108e-02,5.052474886166578860e-02,4.052336941553946748e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.383292866327981319e-01,3.226518018969287138e-01,-4.272887559021926185e-02,5.043479768411283926e-02,4.045544498806267208e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.402120510327509062e-01,3.243845318425761515e-01,-4.295965480911387302e-02,5.034445227717728594e-02,4.038722492884824766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.420928818700546881e-01,3.261206968463702949e-01,-4.319093526355546886e-02,5.025371298300580075e-02,4.031870956429196018e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.439717708834537979e-01,3.278603171287119977e-01,-4.342272108265751024e-02,5.016258014349757322e-02,4.024989922305000078e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.458487098202219556e-01,3.296034095957653953e-01,-4.365501589907369584e-02,5.007105410220858560e-02,4.018079423684801643e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.477236904361980296e-01,3.313499975415669652e-01,-4.388782441407201862e-02,4.997913520307821489e-02,4.011139493991481286e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.495967044958226744e-01,3.331001007159369309e-01,-4.412115078919840588e-02,4.988682379004473477e-02,4.004170166867154212e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.514677437721743569e-01,3.348537393648177707e-01,-4.435499921106216686e-02,4.979412020782219422e-02,3.997171476227737164e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.533368000470053838e-01,3.366109321286235856e-01,-4.458937363741075077e-02,4.970102480201137041e-02,3.990143456260673571e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.552038651107780942e-01,3.383717008569456230e-01,-4.482427838975283629e-02,4.960753791927513534e-02,3.983086141434062921e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.570689307627009978e-01,3.401360683384228034e-01,-4.505971796398922091e-02,4.951365990634338382e-02,3.975999566418739750e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.589319888107645240e-01,3.419040547236328331e-01,-4.529569629513546108e-02,4.941939111097271714e-02,3.968883766145316350e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.607930310717773814e-01,3.436756826124021114e-01,-4.553221810349163817e-02,4.932473188149956445e-02,3.961738775786116662e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.626520493714022519e-01,3.454509710659170452e-01,-4.576928687764118819e-02,4.922968256718097957e-02,3.954564630780707524e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.645090355441917063e-01,3.472299435497775977e-01,-4.600690688451048588e-02,4.913424351956214436e-02,3.947361366836308894e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.663639814336241196e-01,3.490126219808183938e-01,-4.624508262772696748e-02,4.903841509022083961e-02,3.940129019875292515e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.682168788921395874e-01,3.507990288455532513e-01,-4.648381779215238901e-02,4.894219763247006222e-02,3.932867626083686452e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.700677197811755081e-01,3.525891883757684697e-01,-4.672311637250975425e-02,4.884559150139650835e-02,3.925577221857436405e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.719164959712024987e-01,3.543831229776815972e-01,-4.696298241935658452e-02,4.874859705390467701e-02,3.918257843829056064e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.737631993417600329e-01,3.561808561066139855e-01,-4.720341981155043443e-02,4.865121464827580622e-02,3.910909528866675089e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.756078217814919684e-01,3.579824097525655668e-01,-4.744443163138586939e-02,4.855344464607722066e-02,3.903532314089733224e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.774503551881826846e-01,3.597878087043267503e-01,-4.768602196533974769e-02,4.845528741133026812e-02,3.896126236858876712e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.792907914687918880e-01,3.615970778244534944e-01,-4.792819359315260430e-02,4.835674331102303436e-02,3.888691334737250926e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.811291225394909166e-01,3.634102399754942092e-01,-4.817094983205585934e-02,4.825781271632413605e-02,3.881227645524552572e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.829653403256978783e-01,3.652273192713189065e-01,-4.841429374217899595e-02,4.815849600177261880e-02,3.873735207273574788e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.847994367621128453e-01,3.670483413411999574e-01,-4.865822810674159254e-02,4.805879354626042821e-02,3.866214058249939356e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.866314037927538805e-01,3.688733304047613903e-01,-4.890275513045725525e-02,4.795870573381633140e-02,3.858664236946248716e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.884612333709922316e-01,3.707023128227257547e-01,-4.914787743843732071e-02,4.785823295401177985e-02,3.851085782074317176e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.902889174595871369e-01,3.725353120863876821e-01,-4.939359685389609761e-02,4.775737560186102404e-02,3.843478732569010070e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.921144480307220181e-01,3.743723531330298004e-01,-4.963991481018003443e-02,4.765613407986846184e-02,3.835843127628906779e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.939378170660390643e-01,3.762134621588666961e-01,-4.988683310629434681e-02,4.755450879748951420e-02,3.828179006663476325e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.957590165566744811e-01,3.780586646621101887e-01,-5.013435330861220479e-02,4.745250017120748731e-02,3.820486409314593490e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.975780385032944064e-01,3.799079869102094387e-01,-5.038247574654960287e-02,4.735010862581452706e-02,3.812765375439478577e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +4.993948749161288836e-01,3.817614527341446906e-01,-5.063120056223117255e-02,4.724733459671751762e-02,3.805015945150003742e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.012095178150079988e-01,3.836190893496582710e-01,-5.088052845022020959e-02,4.714417852856186975e-02,3.797238158799053354e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.030219592293964093e-01,3.854809224430219561e-01,-5.113045895685522368e-02,4.704064087648877901e-02,3.789432056957349748e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.048321911984281485e-01,3.873469761868061356e-01,-5.138099173606525821e-02,4.693672210698698888e-02,3.781597680458533839e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.066402057709425977e-01,3.892172752082220244e-01,-5.163212578532108687e-02,4.683242269857417622e-02,3.773735070418522408e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.084459950055177924e-01,3.910918466331282373e-01,-5.188386008406962319e-02,4.672774314235435267e-02,3.765844268193413447e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.102495509705069487e-01,3.929707134835868088e-01,-5.213619288055654916e-02,4.662268394294748208e-02,3.757925315407058542e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.120508657440723255e-01,3.948539012466351639e-01,-5.238912253973882632e-02,4.651724561895854287e-02,3.749978253980901233e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.138499314142201957e-01,3.967414337092276533e-01,-5.264264653579012188e-02,4.641142870388543368e-02,3.742003126111733136e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.156467400788354860e-01,3.986333369864577536e-01,-5.289676311094386463e-02,4.630523374631888289e-02,3.733999974290371782e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.174412838457173036e-01,4.005296334716465512e-01,-5.315146939487135763e-02,4.619866130970461116e-02,3.725968841286108058e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.192335548326121319e-01,4.024303445737815732e-01,-5.340676169230915149e-02,4.609171197515120333e-02,3.717909770246818346e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.210235451672501350e-01,4.043354945256011912e-01,-5.366263702063171770e-02,4.598438634115748630e-02,3.709822804638095439e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.228112469873781309e-01,4.062451052710731592e-01,-5.391909252231467942e-02,4.587668502243939794e-02,3.701707988270983019e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.245966524407953413e-01,4.081591992274264946e-01,-5.417612422131772715e-02,4.576860865121954586e-02,3.693565365301505476e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.263797536853872527e-01,4.100777947711624627e-01,-5.443372864061693517e-02,4.566015787802157888e-02,3.685394980290402356e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.281605428891603671e-01,4.120009128850969926e-01,-5.469190230735999220e-02,4.555133337069193489e-02,3.677196878211600461e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.299390122302762851e-01,4.139285720368154342e-01,-5.495064175001453111e-02,4.544213581470143981e-02,3.668971104451239085e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.317151538970863456e-01,4.158607913493004848e-01,-5.520994333185680336e-02,4.533256591310378530e-02,3.660717704838034048e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.334889600881657090e-01,4.177975879493989186e-01,-5.546980364786075618e-02,4.522262438664244322e-02,3.652436725649137461e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.352604230123479967e-01,4.197389778999250920e-01,-5.573021971155846244e-02,4.511231197299401391e-02,3.644128213654858062e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.370295348887590414e-01,4.216849765859852495e-01,-5.599118934437052192e-02,4.500162942547023676e-02,3.635792216133204580e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.387962879468511934e-01,4.236355993518910457e-01,-5.625271032989208331e-02,4.489057751223574089e-02,3.627428780880520565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.405606744264380703e-01,4.255908585044723691e-01,-5.651478133451430519e-02,4.477915701561959588e-02,3.619037956240178655e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.423226865777274197e-01,4.275507654963051074e-01,-5.677740112727489019e-02,4.466736873091314147e-02,3.610619791158318215e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.440823166613565354e-01,4.295153327877318516e-01,-5.704056999550081997e-02,4.455521346520760484e-02,3.602174335171165476e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.458395569484247867e-01,4.314845705692794686e-01,-5.730428891136405550e-02,4.444269203450911671e-02,3.593701638419984912e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.475943997205290348e-01,4.334584862458090715e-01,-5.756855961156158441e-02,4.432980526310983244e-02,3.585201751711387663e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.493468372697968283e-01,4.354370880295653778e-01,-5.783338550921716253e-02,4.421655398076076993e-02,3.576674726539405547e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.510968618989195988e-01,4.374203824705562016e-01,-5.809877153584919185e-02,4.410293901969621760e-02,3.568120615083006375e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.528444659211878554e-01,4.394083742036398177e-01,-5.836472368901809288e-02,4.398896121208812487e-02,3.559539470260864036e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.545896416605240464e-01,4.414010686857537391e-01,-5.863125030859205750e-02,4.387462138711954729e-02,3.550931345731139777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.563323814515162002e-01,4.433984680397362044e-01,-5.889836104269941669e-02,4.375992036677706781e-02,3.542296295918270221e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.580726776394524524e-01,4.454005734421970231e-01,-5.916606722216993103e-02,4.364485896408865084e-02,3.533634376068727212e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.598105225803533536e-01,4.474073870412204612e-01,-5.943438356479461421e-02,4.352943797752316379e-02,3.524945642238912491e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.615459086410068412e-01,4.494189075595399485e-01,-5.970332654270105177e-02,4.341365818627989509e-02,3.516230151321872999e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.632788281990006585e-01,4.514351350553761910e-01,-5.997291500996410063e-02,4.329752034674941297e-02,3.507487961085111183e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.650092736427564377e-01,4.534560691593650383e-01,-6.024317127103302749e-02,4.318102518695769027e-02,3.498719130132533772e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.667372373715630074e-01,4.554817062504090686e-01,-6.051412038513049457e-02,4.306417340051087173e-02,3.489923717973532069e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.684627117956096987e-01,4.575120454045062957e-01,-6.078579019849569831e-02,4.294696564193431487e-02,3.481101785007027494e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.701856893360195411e-01,4.595470817084197357e-01,-6.105821211364634293e-02,4.282940252071514714e-02,3.472253392544348283e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.719061624248831244e-01,4.615868142024618370e-01,-6.133142149361384132e-02,4.271148459438930262e-02,3.463378602816428176e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.736241235052911280e-01,4.636312400057323591e-01,-6.160545699223669847e-02,4.259321236184267223e-02,3.454477478934086521e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.753395650313679610e-01,4.656803565782285936e-01,-6.188036161088621839e-02,4.247458625678498384e-02,3.445550084929746570e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.770524794683048464e-01,4.677341609999856598e-01,-6.215618210349008188e-02,4.235560663984804658e-02,3.436596485736859163e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.787628592923925730e-01,4.697926515221416688e-01,-6.243296931918430881e-02,4.223627379239994900e-02,3.427616747205160519e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.804706969910552461e-01,4.718558290370136743e-01,-6.271077927356895343e-02,4.211658790758852217e-02,3.418610936044649828e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.821759850628824839e-01,4.739236945904936049e-01,-6.298967175751184200e-02,4.199654908265058884e-02,3.409579119810171588e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.838787160176629465e-01,4.759962494931453403e-01,-6.326971126371429277e-02,4.187615731180362177e-02,3.400521366894915798e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.855788823764169759e-01,4.780734977657087881e-01,-6.355096802417309343e-02,4.175541247676966411e-02,3.391437746513875084e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.872764766714295703e-01,4.801554472531168183e-01,-6.383351648750827667e-02,4.163431433794967385e-02,3.382328328628222969e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.889714914462828022e-01,4.822421070838862622e-01,-6.411743662744344430e-02,4.151286252668109217e-02,3.373193183905776049e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.906639192558894580e-01,4.843334894988757244e-01,-6.440281337742123635e-02,4.139105653534302931e-02,3.364032383683963068e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.923537526665245689e-01,4.864296109144067848e-01,-6.468973637798366982e-02,4.126889571014409619e-02,3.354845999897778652e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.940409842558590503e-01,4.885304892201637883e-01,-6.497830097363072044e-02,4.114637924165876620e-02,3.345634105029983152e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.957256066129916761e-01,4.906361487283490974e-01,-6.526860772426355817e-02,4.102350615617594148e-02,3.336396772051403170e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.974076123384820525e-01,4.927466181401373158e-01,-6.556076273915309449e-02,4.090027530618855350e-02,3.327134074298986888e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +5.990869940443830366e-01,4.948619295898656345e-01,-6.585487650211194199e-02,4.077668536287595152e-02,3.317846085430888881e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.007637443542724887e-01,4.969821222484950840e-01,-6.615106542330990991e-02,4.065273480756710633e-02,3.308532879317999664e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.024378559032870228e-01,4.991072409747891458e-01,-6.644945066730481575e-02,4.052842192302218149e-02,3.299194529936666509e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.041093213381530935e-01,5.012373372916789638e-01,-6.675015829670903622e-02,4.040374478605510827e-02,3.289831111253931906e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.057781333172199689e-01,5.033724694214705142e-01,-6.705331905171564577e-02,4.027870126007635637e-02,3.280442697115393269e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.074442845104918165e-01,5.055127028203741935e-01,-6.735906855558479456e-02,4.015328898786054562e-02,3.271029361116729595e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.091077675996596774e-01,5.076581115257730037e-01,-6.766754682460786319e-02,4.002750538436858352e-02,3.261591176474242237e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.107685752781343291e-01,5.098087782218558850e-01,-6.797889741465308611e-02,3.990134763135860102e-02,3.252128215859749760e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.124267002510773716e-01,5.119647933063201650e-01,-6.829326847903076370e-02,3.977481267169681589e-02,3.242640551274659777e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.140821352354338680e-01,5.141262575035050464e-01,-6.861081131832938440e-02,3.964789720362111441e-02,3.233128253882587844e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.157348729599646520e-01,5.162932808429946396e-01,-6.893168051572440280e-02,3.952059767731740114e-02,3.223591393850576137e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.173849061652770809e-01,5.184659854124719081e-01,-6.925603309477645908e-02,3.939291029093108798e-02,3.214030040156165557e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.190322276038583427e-01,5.206445027448431961e-01,-6.958402843530207826e-02,3.926483098903413294e-02,3.204444260410531653e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.206768300401065419e-01,5.228289769844582935e-01,-6.991582841161107786e-02,3.913635545926952286e-02,3.194834120669579614e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.223187062503623412e-01,5.250195621291970927e-01,-7.025159521030246990e-02,3.900747913258149208e-02,3.185199685245140855e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.239578490229409358e-01,5.272164264253427213e-01,-7.059149254955698893e-02,3.887819718339511482e-02,3.175541016510813885e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.255942511581639165e-01,5.294197496179207096e-01,-7.093568390803743628e-02,3.874850453056740557e-02,3.165858174670997949e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.272279054683905786e-01,5.316297245292809226e-01,-7.128433253710628192e-02,3.861839584010193227e-02,3.156151217582238155e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.288588047780496737e-01,5.338465597230808157e-01,-7.163760017584967776e-02,3.848786552878025874e-02,3.146420200491202857e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.304869419236707184e-01,5.360704737739510062e-01,-7.199564655685802839e-02,3.835690776978491201e-02,3.136665175838818653e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.321123097539158575e-01,5.383017020530858066e-01,-7.235862935439421606e-02,3.822551649852459765e-02,3.126886193049878138e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.337349011296107282e-01,5.405404926469538074e-01,-7.272670271254677576e-02,3.809368541960585913e-02,3.117083298275814540e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.353547089237763235e-01,5.427871080196632336e-01,-7.310001639118959837e-02,3.796140801661986031e-02,3.107256534206452314e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.369717260216599675e-01,5.450418273082359155e-01,-7.347871537781375251e-02,3.782867756210599869e-02,3.097405939800989549e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.385859453207668457e-01,5.473049431884848826e-01,-7.386293822611199322e-02,3.769548713011064422e-02,3.087531550062373453e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.401973597308906472e-01,5.495767637203776701e-01,-7.425281746712260322e-02,3.756182960928269171e-02,3.077633395810271955e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.418059621741456500e-01,5.518576107638960559e-01,-7.464847717128299254e-02,3.742769771762374797e-02,3.067711503459620292e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.434117455849966971e-01,5.541478216451929395e-01,-7.505003260501191820e-02,3.729308402043820386e-02,3.057765894803944340e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.450147029102913931e-01,5.564477501078528565e-01,-7.545759006306990146e-02,3.715798094754516057e-02,3.047796586767104743e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.466148271092899691e-01,5.587577626134198061e-01,-7.587124443141295138e-02,3.702238081330499336e-02,3.037803591187585583e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.482121111536969238e-01,5.610782411599096742e-01,-7.629107934078786790e-02,3.688627583873173299e-02,3.027786914627431103e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.498065480276917771e-01,5.634095829572111080e-01,-7.671716586075677524e-02,3.674965817367036036e-02,3.017746558130968704e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.513981307279594901e-01,5.657521979394031231e-01,-7.714956088727882577e-02,3.661251992220770912e-02,3.007682517041446180e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.529868522637218842e-01,5.681065103850876374e-01,-7.758830666223245909e-02,3.647485316940984185e-02,2.997594780810870496e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.545727056567677282e-01,5.704729578811196244e-01,-7.803343033693936615e-02,3.633665000853042448e-02,2.987483332811556314e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.561556839414840470e-01,5.728519899950996752e-01,-7.848494135434204433e-02,3.619790257060098021e-02,2.977348150171361119e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.577357801648857638e-01,5.752440700050738265e-01,-7.894283181392228177e-02,3.605860305663364379e-02,2.967189203611932946e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.593129873866474533e-01,5.776496721240030885e-01,-7.940707511454113410e-02,3.591874376880489078e-02,2.957006457283401502e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.608872986791325399e-01,5.800692816995922652e-01,-7.987762438410656030e-02,3.577831714574203348e-02,2.946799868649030985e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.624587071274248284e-01,5.825033941340636012e-01,-8.035441249508833150e-02,3.563731579731078958e-02,2.936569388351730758e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.640272058293580359e-01,5.849525141895652869e-01,-8.083734998665999261e-02,3.549573254197271877e-02,2.926314960124939413e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.655927878955469890e-01,5.874171540600549024e-01,-8.132632509702358847e-02,3.535356044497139916e-02,2.916036520702540158e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.671554464494171555e-01,5.898978351290398159e-01,-8.182120266363614169e-02,3.521079285738578507e-02,2.905733999765415243e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.687151746272345099e-01,5.923950856816394550e-01,-8.232182283434524850e-02,3.506742345644328629e-02,2.895407319843302796e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.702719655781371744e-01,5.949094363515259332e-01,-8.282800019394162550e-02,3.492344628822762614e-02,2.885056396339894524e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.718258124641639517e-01,5.974414248169126473e-01,-8.333952422360936529e-02,3.477885580922965292e-02,2.874681137507923423e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.733767084602854114e-01,5.999915907314075092e-01,-8.385615719598679685e-02,3.463364692953439794e-02,2.864281444455246128e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.749246467544329775e-01,6.025604762212254384e-01,-8.437763374137846606e-02,3.448781505764925842e-02,2.853857211177853737e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.764696205475297930e-01,6.051486226065502239e-01,-8.490366078294693530e-02,3.434135614548174120e-02,2.843408324630068984e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.780116230535196964e-01,6.077565704387106749e-01,-8.543391701123252691e-02,3.419426673266166838e-02,2.832934664812590264e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.795506474993976420e-01,6.103848594575920217e-01,-8.596805167746755794e-02,3.404654399277256410e-02,2.822436104872829143e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.810866871252394539e-01,6.130340243858507510e-01,-8.650568470238483021e-02,3.389818577927885407e-02,2.811912511214487709e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.826197351842306915e-01,6.157045938853850098e-01,-8.704640646714111030e-02,3.374919067174330517e-02,2.801363743703967901e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.841497849426976252e-01,6.183970922348999144e-01,-8.758977761154743136e-02,3.359955802077495973e-02,2.790789655803483926e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.856768296801356577e-01,6.211120311030335905e-01,-8.813532754548576709e-02,3.344928799530063473e-02,2.780190094823500879e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.872008626892394112e-01,6.238499152247789503e-01,-8.868255626943963299e-02,3.329838162786011302e-02,2.769564902150655206e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.887218772759319263e-01,6.266112354238076998e-01,-8.923093298054071698e-02,3.314684085961513288e-02,2.758913913512136551e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.902398667593944159e-01,6.293964708620514159e-01,-8.977989745344845673e-02,3.299466858447899081e-02,2.748236959257385204e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.917548244720952422e-01,6.322060824242952126e-01,-9.032885847484395381e-02,3.284186869298399286e-02,2.737533864701660047e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.932667437598194482e-01,6.350405186505813093e-01,-9.087719548065173458e-02,3.268844611580437598e-02,2.726804450445419192e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.947756179816976241e-01,6.379002046300789042e-01,-9.142425844422769066e-02,3.253440686488437428e-02,2.716048532749567590e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.962814405102357718e-01,6.407855476736032951e-01,-9.196936831623490904e-02,3.237975807413635138e-02,2.705265923959768171e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.977842047313435048e-01,6.436969305431599064e-01,-9.251181739183318709e-02,3.222450803890829218e-02,2.694456432921057409e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +6.992839040443636911e-01,6.466347162398403681e-01,-9.305087027912192776e-02,3.206866625344144406e-02,2.683619865419816786e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.007805318621018742e-01,6.495992371036047874e-01,-9.358576383703845480e-02,3.191224344763778781e-02,2.672756024652648277e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.022740816108536954e-01,6.525908012342157560e-01,-9.411571006947772045e-02,3.175525162037014237e-02,2.661864711771446679e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.037645467304356472e-01,6.556096864696080440e-01,-9.463989451587123680e-02,3.159770407109799978e-02,2.650945726357640253e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.052519206742123847e-01,6.586561408936743023e-01,-9.515747860147094617e-02,3.143961543138151793e-02,2.639998867005751149e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.067361969091264795e-01,6.617303800744970266e-01,-9.566760077047015143e-02,3.128100169165104472e-02,2.629023931857010776e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.082173689157260643e-01,6.648325850240789370e-01,-9.616937755732904436e-02,3.112188022672434523e-02,2.618020719224042892e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.096954301881950311e-01,6.679629012093780682e-01,-9.666190454420069134e-02,3.096226981846563961e-02,2.606989028188508306e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.111703742343798984e-01,6.711214376531613457e-01,-9.714425804060315695e-02,3.080219067656959908e-02,2.595928659247244696e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.126421945758196763e-01,6.743082649434755105e-01,-9.761549675632875955e-02,3.064166445532164398e-02,2.584839414936955793e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.141108847477731780e-01,6.775234136001371255e-01,-9.807466276670777361e-02,3.048071426856754571e-02,2.573721100525028227e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.155764382992484407e-01,6.807668748036541650e-01,-9.852078405318087650e-02,3.031936470095623892e-02,2.562573524657069113e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.170388487930305921e-01,6.840385950661254100e-01,-9.895287539864867921e-02,3.015764181610538358e-02,2.551396500071571039e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.184981098057098281e-01,6.873384793625200162e-01,-9.936994067688005583e-02,2.999557316217135380e-02,2.540189844305328348e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.199542149277103897e-01,6.906663878938401613e-01,-9.977097481603595941e-02,2.983318777313047343e-02,2.528953380395204675e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.214071577633178745e-01,6.940221354976774393e-01,-1.001549650439275962e-01,2.967051616768029421e-02,2.517686937606080644e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.228569319307079910e-01,6.974054902957468327e-01,-1.005208938427540055e-01,2.950759034440905704e-02,2.506390352166617541e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.243035310619742040e-01,7.008161739893316033e-01,-1.008677404065546762e-01,2.934444377278144431e-02,2.495063468002371998e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.257469488031560445e-01,7.042538597442451476e-01,-1.011944822819595691e-01,2.918111138226757786e-02,2.483706137476471565e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.271871788142661996e-01,7.077181735798332429e-01,-1.015000991126854130e-01,2.901762954639479017e-02,2.472318222131396626e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.286242147693199334e-01,7.112086918022164639e-01,-1.017835731462146953e-01,2.885403606323066680e-02,2.460899593417587572e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.300580503563610657e-01,7.147249406773320546e-01,-1.020438923980546281e-01,2.869037013375016348e-02,2.449450133461488766e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.314886792774907276e-01,7.182663992158943023e-01,-1.022800531182562478e-01,2.852667233446495865e-02,2.437969735777003322e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.329160952488951164e-01,7.218324953378624231e-01,-1.024910612822366451e-01,2.836298458800611683e-02,2.426458305993432207e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.343402920008724744e-01,7.254226077569050357e-01,-1.026759353772163930e-01,2.819935012973777064e-02,2.414915762583806635e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.357612632778612882e-01,7.290360640486789334e-01,-1.028337085542377904e-01,2.803581347098929241e-02,2.403342037587637425e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.371790028384670457e-01,7.326721443335779682e-01,-1.029634311652376094e-01,2.787242035838781093e-02,2.391737077307536097e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.385935044554904350e-01,7.363300783190246390e-01,-1.030641732016141787e-01,2.770921772981322154e-02,2.380100842987298015e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.400047619159539902e-01,7.400090483762525873e-01,-1.031350261634373100e-01,2.754625366677564829e-02,2.368433311488701842e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.414127690211299582e-01,7.437081861839328623e-01,-1.031751060598700698e-01,2.738357734403735685e-02,2.356734475947150353e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.428175195865669433e-01,7.474265772261855512e-01,-1.031835551296509113e-01,2.722123897456520114e-02,2.345004346433454123e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.442190074421178858e-01,7.511632608028170344e-01,-1.031595434025794206e-01,2.705928975414408771e-02,2.333242950534920035e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.456172264319661513e-01,7.549172288656820218e-01,-1.031022732564072786e-01,2.689778179984140399e-02,2.321450333969798940e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.470121704146531760e-01,7.586874280653286284e-01,-1.030109789788696967e-01,2.673676808683593531e-02,2.309626561166410186e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.484038332631057777e-01,7.624727623425942014e-01,-1.028849308769993337e-01,2.657630238266945819e-02,2.297771715807201964e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.497922088646621352e-01,7.662720919935934738e-01,-1.027234358789196889e-01,2.641643917817752124e-02,2.285885901335698300e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.511772911210995440e-01,7.700842355092928360e-01,-1.025258408570767360e-01,2.625723361648134302e-02,2.273969241473053349e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.525590739486605063e-01,7.739079721632565745e-01,-1.022915338911943167e-01,2.609874141846025425e-02,2.262021880662682116e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.539375512780798205e-01,7.777420411407867107e-01,-1.020199464047478743e-01,2.594101880691538145e-02,2.250043984517275109e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.553127170546113378e-01,7.815851457344857867e-01,-1.017105549083185401e-01,2.578412242810899924e-02,2.238035740218556613e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.566845652380541631e-01,7.854359535137794834e-01,-1.013628830776838047e-01,2.562810927128847741e-02,2.225997356877952860e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.580530898027795228e-01,7.892930974928649146e-01,-1.009765026153456452e-01,2.547303658672158896e-02,2.213929065881844738e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.594182847377570766e-01,7.931551793979473297e-01,-1.005510357831364554e-01,2.531896180227380042e-02,2.201831121198382368e-01,-0.000000000000000000e+00,-0.000000000000000000e+00 +7.607801440465815634e-01,7.970207716947490528e-01,-1.000861566825645282e-01,2.516594243741059750e-02,2.189703799624795055e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.621386617474984471e-01,8.008884171552363984e-01,-9.958159208779819582e-02,2.501403601706739949e-02,2.177547401029090168e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.634938318734313389e-01,8.047566338116259876e-01,-9.903712345900499148e-02,2.486329998419596621e-02,2.165362248548943769e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.648456484720069781e-01,8.086239164499903209e-01,-9.845258810006730921e-02,2.471379161055385631e-02,2.153148688723071913e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.661941056055823207e-01,8.124887357573871149e-01,-9.782787944092867505e-02,2.456556790794054562e-02,2.140907091628746761e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.675391973512698529e-01,8.163495452861698753e-01,-9.716294915360354500e-02,2.441868553843024731e-02,2.128637850964787692e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.688809178009646805e-01,8.202047807697686599e-01,-9.645780722003206853e-02,2.427320072386809566e-02,2.116341384063343545e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.702192610613689538e-01,8.240528612554008925e-01,-9.571252294660219373e-02,2.412916915610947149e-02,2.104018131926398416e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.715542212540194011e-01,8.278921952142621610e-01,-9.492722587060130335e-02,2.398664590647559686e-02,2.091668559177201347e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.728857925153117536e-01,8.317211800273783817e-01,-9.410210582387362743e-02,2.384568533591081146e-02,2.079293153975554087e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.742139689965273908e-01,8.355382045255229650e-01,-9.323741384990455461e-02,2.370634100554401200e-02,2.066892427917769171e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.755387448638585424e-01,8.393416525214556501e-01,-9.233346191752912768e-02,2.356866558770935247e-02,2.054466915887447342e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.768601142984342678e-01,8.431299050050713451e-01,-9.139062346989734742e-02,2.343271077850435369e-02,2.042017175854342892e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.781780714963458800e-01,8.469013415156376734e-01,-9.040933361921636713e-02,2.329852721021365147e-02,2.029543788657025283e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.794926106686721479e-01,8.506543438656616329e-01,-8.939008831495655338e-02,2.316616436619996916e-02,2.017047357735032020e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.808037260415053860e-01,8.543872961037932701e-01,-8.833344511648412445e-02,2.303567049651573592e-02,2.004528508841793288e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.821114118559759909e-01,8.580985903763292955e-01,-8.724002270775023382e-02,2.290709253478704147e-02,1.991987889712936999e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.834156623682784204e-01,8.617866271078226115e-01,-8.611050011115704839e-02,2.278047601684096701e-02,1.979426169680747749e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.847164718496960623e-01,8.654498174353639772e-01,-8.494561556611178454e-02,2.265586500271364601e-02,1.966844039285099155e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.860138345866267695e-01,8.690865845147446311e-01,-8.374616734998772960e-02,2.253330199928579164e-02,1.954242209844556100e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.873077448806070633e-01,8.726953673686587143e-01,-8.251301181681902630e-02,2.241282788537984558e-02,1.941621413007115216e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.885981970483383341e-01,8.762746234660607758e-01,-8.124706318510863112e-02,2.229448183945256443e-02,1.928982400234191263e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.898851854217111557e-01,8.798228286791766051e-01,-7.994929195628681928e-02,2.217830126996013160e-02,1.916325942290910411e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.911687043478299319e-01,8.833384819174915359e-01,-7.862072450477693153e-02,2.206432174800776816e-02,1.903652828690999832e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.924487481890386542e-01,8.868201046956466493e-01,-7.726244095145523572e-02,2.195257694338618309e-02,1.890963867120712683e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.937253113229446599e-01,8.902662446003122287e-01,-7.587557530599473021e-02,2.184309856278007084e-02,1.878259882845775830e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.949983881424437238e-01,8.936754769404373633e-01,-7.446131236397190989e-02,2.173591629135546155e-02,1.865541718081921430e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.962679730557449265e-01,8.970464060177898435e-01,-7.302088761037732467e-02,2.163105773791988012e-02,1.852810231348974712e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.975340604863950800e-01,9.003776664241633920e-01,-7.155558476072519658e-02,2.152854838256312159e-02,1.840066296818876901e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +7.987966448733025970e-01,9.036679274869553469e-01,-7.006673483709040251e-02,2.142841152812960392e-02,1.827310803618196200e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.000557206707632485e-01,9.069158910719848921e-01,-6.855571354767926728e-02,2.133066825490989582e-02,1.814544655122229400e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.013112823484830338e-01,9.101202949846104451e-01,-6.702394079631417145e-02,2.123533737886154626e-02,1.801768768256547526e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.025633243916036053e-01,9.132799140307144015e-01,-6.547287744248810049e-02,2.114243541276508362e-02,1.788984072758169064e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.038118413007259155e-01,9.163935616123491945e-01,-6.390402375326691142e-02,2.105197653262915955e-02,1.776191510438167864e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.050568275919344208e-01,9.194600908498824499e-01,-6.231891792029237848e-02,2.096397254616567604e-02,1.763392034420411814e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.062982777968216164e-01,9.224783954151157817e-01,-6.071913359266969956e-02,2.087843286541740673e-02,1.750586608384324949e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.075361864625111297e-01,9.254474102066732044e-01,-5.910627778714810238e-02,2.079536448316768663e-02,1.737776205793958584e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.087705481516828110e-01,9.283661118182801042e-01,-5.748198890271945943e-02,2.071477195301530891e-02,1.724961809140161950e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.100013574425958263e-01,9.312335219892210469e-01,-5.584793435117629823e-02,2.063665737317264132e-02,1.712144409149209956e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.112286089291126379e-01,9.340487044876554235e-01,-5.420580856720586677e-02,2.056102037392053619e-02,1.699325004004036410e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.124522972207226523e-01,9.368107694830801746e-01,-5.255733017817123404e-02,2.048785810912695862e-02,1.686504598571443025e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.136724169425666453e-01,9.395188711169134566e-01,-5.090424061094959290e-02,2.041716525122880266e-02,1.673684203603684750e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.148889627354590770e-01,9.421722079597700805e-01,-4.924830084028083282e-02,2.034893398979333717e-02,1.660864834993144024e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.161019292559127392e-01,9.447700258217968106e-01,-4.759128974480203444e-02,2.028315403432722624e-02,1.648047512982032392e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.173113111761618477e-01,9.473116152396394707e-01,-4.593500172355011180e-02,2.021981261980827088e-02,1.635233261401564941e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.185171031841850242e-01,9.497963136798592831e-01,-4.428124405682394571e-02,2.015889451636412519e-02,1.622423106907503187e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.197192999837294991e-01,9.522235031368122549e-01,-4.263183433777100600e-02,2.010038204283153690e-02,1.609618078236089156e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.209178962943334268e-01,9.545926122165051542e-01,-4.098859901811028467e-02,2.004425508330760428e-02,1.596819205464486691e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.221128868513501997e-01,9.569031144261247457e-01,-3.935337050636497636e-02,1.999049110679539110e-02,1.584027519281810403e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.233042664059700977e-01,9.591545293140031347e-01,-3.772798488027004349e-02,1.993906519071965075e-02,1.571244050268204295e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.244920297252450458e-01,9.613464196355895330e-01,-3.611427902699804182e-02,1.988995004810981820e-02,1.558469828199568474e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.256761715921098199e-01,9.634783938209041665e-01,-3.451408978456752680e-02,1.984311605734499426e-02,1.545705881361748579e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.268566868054064711e-01,9.655501037455226365e-01,-3.292925018650311869e-02,1.979853129479005810e-02,1.532953235868485253e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.280335701799065307e-01,9.675612441438147249e-01,-3.136158832797301260e-02,1.975616157140762733e-02,1.520212915010002719e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.292068165463334362e-01,9.695115506980487297e-01,-2.981292457308781091e-02,1.971597047100227817e-02,1.507485938629687106e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.303764207513859574e-01,9.714008024288285359e-01,-2.828506940082096965e-02,1.967791939273616161e-02,1.494773322501126156e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.315423776577602899e-01,9.732288177685716191e-01,-2.677982169962204659e-02,1.964196759510066642e-02,1.482076077722551677e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.327046821441730362e-01,9.749954547016222994e-01,-2.529896582459697715e-02,1.960807224357612871e-02,1.469395210151084874e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.338633291053832997e-01,9.767006093221006768e-01,-2.384427075840802115e-02,1.957618846016032088e-02,1.456731719848932960e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.350183134522153328e-01,9.783442157557986629e-01,-2.241748730900664302e-02,1.954626937499190364e-02,1.444086600545492760e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.361696301115808527e-01,9.799262425782738761e-01,-2.102034607111445685e-02,1.951826618122443763e-02,1.431460839137420338e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.373172740265014680e-01,9.814466957561227556e-01,-1.965455625660952541e-02,1.949212819129043234e-02,1.418855415194846536e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.384612401561303274e-01,9.829056122068932089e-01,-1.832180284011759169e-02,1.946780289551328785e-02,1.406271300495195720e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.396015234757753243e-01,9.843030628792531767e-01,-1.702374551186844678e-02,1.944523602307820370e-02,1.393709458599079742e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.407381189769198571e-01,9.856391482182258468e-01,-1.576201691496220691e-02,1.942437160391647516e-02,1.381170844420094301e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.418710216672458113e-01,9.869139988344564074e-01,-1.453822062075811779e-02,1.940515203291762539e-02,1.368656403852049896e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.430002265706553199e-01,9.881277724718008360e-01,-1.335392959653193198e-02,1.938751813573789951e-02,1.356167073390226741e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.441257287272921905e-01,9.892806536137056517e-01,-1.221068503541743525e-02,1.937140923584345850e-02,1.343703779808225474e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.452475231935638877e-01,9.903728519451465706e-01,-1.110999452866419032e-02,1.935676322269190217e-02,1.331267439824788401e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.463656050421637378e-01,9.914045999919394170e-01,-1.005333037105025223e-02,1.934351662179413858e-02,1.318858959825946753e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.474799693620920227e-01,9.923761516344690170e-01,-9.042128858582456838e-03,1.933160466550105580e-02,1.306479235594432686e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.485906112586771854e-01,9.932877798390985502e-01,-8.077788605279702036e-03,1.932096136441179002e-02,1.294129152088744072e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.496975258535985898e-01,9.941397776930420704e-01,-7.161669077954335542e-03,1.931151958048153011e-02,1.281809583215210435e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.508007082849067260e-01,9.949324544642846879e-01,-6.295090074850725158e-03,1.930321110010961919e-02,1.269521391633030016e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.519001537070450825e-01,9.956661340652838144e-01,-5.479329448136387171e-03,1.929596670863076638e-02,1.257265428593490331e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.529958572908716841e-01,9.963411537180969502e-01,-4.715623912816583121e-03,1.928971626474565437e-02,1.245042533804521512e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.540878142236794091e-01,9.969578629945350379e-01,-4.005166352393294341e-03,1.928438877497468465e-02,1.232853535305636489e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.551760197092181937e-01,9.975166213510142654e-01,-3.349105582121427241e-03,1.927991246981725776e-02,1.220699249372519252e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.562604689677154601e-01,9.980177968796165588e-01,-2.748545614890790870e-03,1.927621487880052525e-02,1.208580480443741251e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.573411572358974331e-01,9.984617642963503625e-01,-2.204544544422837229e-03,1.927322290637201868e-02,1.196498021070378087e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.584180797670095675e-01,9.988489032637625220e-01,-1.718113878810464349e-03,1.927086290762117238e-02,1.184452651891263475e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.594912318308379762e-01,9.991795982460821568e-01,-1.290218123069769747e-03,1.926906076393333778e-02,1.172445141618015385e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.605606087137299687e-01,9.994542342894839893e-01,-9.217739159798195260e-04,1.926774195825151428e-02,1.160476247047031034e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.616262057186147016e-01,9.996731983511577546e-01,-6.136496582662007366e-04,1.926683165034547701e-02,1.148546713098005378e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.626880181650236068e-01,9.998368760684646084e-01,-3.666648464766913648e-04,1.926625475150443856e-02,1.136657272847052397e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.637460413891117073e-01,9.999456502927994705e-01,-1.815897527083772751e-04,1.926593599897312090e-02,1.124808647610561763e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.648002707436770464e-01,9.999999006709333615e-01,-5.914459054924125770e-05,1.926580002996886726e-02,1.113001547017107989e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.658507015981822263e-01,1.000000000000000000e+00,-0.000000000000000000e+00,1.926577145501411417e-02,1.101236669123313167e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.668973293387736145e-01,9.999463171389191274e-01,-4.776073287077151232e-06,1.926577493022299178e-02,1.089514700519804563e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.679401493683025492e-01,9.998392111367156998e-01,-7.404257801837900357e-05,1.926573522941560368e-02,1.077836316452941839e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.689791571063447684e-01,9.996790312192816996e-01,-2.083183497540764021e-04,1.926557731490765105e-02,1.066202180995547144e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.700143479892213927e-01,9.994661183546836858e-01,-4.080713781027198997e-04,1.926522640806421424e-02,1.054612947186404498e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.710457174700181326e-01,9.992008001037426812e-01,-6.737186980437973689e-04,1.926460805828533393e-02,1.043069257200594235e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.720732610186056055e-01,9.988833925137882153e-01,-1.005626443851530532e-03,1.926364821138197916e-02,1.031571742536665920e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.730969741216594304e-01,9.985141964784909341e-01,-1.404109412041024954e-03,1.926227327665753358e-02,1.020121024203553828e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.741168522826797682e-01,9.980934992311518705e-01,-1.869431411827702540e-03,1.926041019350568581e-02,1.008717712930092825e-01,-0.000000000000000000e+00,0.000000000000000000e+00 +8.751328910220110835e-01,9.976215714429843562e-01,-2.401805671825128861e-03,1.925798649582474709e-02,9.973624093614823216e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.761450858768620176e-01,9.970986669408742342e-01,-3.001394241540672990e-03,1.925493037599641105e-02,9.860557042934503524e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.771534324013248174e-01,9.965250229409825522e-01,-3.668308443382305595e-03,1.925117074751385438e-02,9.747981788800723357e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.781579261663950975e-01,9.959008574560112015e-01,-4.402609034683878202e-03,1.924663730642485818e-02,9.635904048704428682e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.791585627599906028e-01,9.952263696291376860e-01,-5.204306721132236080e-03,1.924126059098706445e-02,9.524329448409390997e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.801553377869717476e-01,9.945017384965378904e-01,-6.073361689506399236e-03,1.923497204045769571e-02,9.413263524397669224e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.811482468691598235e-01,9.937271236730134127e-01,-7.009684463401542366e-03,1.922770405251602449e-02,9.302711726195327102e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.821372856453569833e-01,9.929026629424043682e-01,-8.013136038260097044e-03,1.921939003887503106e-02,9.192679418840014882e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.831224497713646704e-01,9.920284728804011909e-01,-9.083527676853714664e-03,1.920996448007147514e-02,9.083171885360163955e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.841037349200038253e-01,9.911046487333220467e-01,-1.022062233801185153e-02,1.919936297841837863e-02,8.974194329208426413e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.850811367811323160e-01,9.901312634563490533e-01,-1.142413428825472116e-02,1.918752230905704584e-02,8.865751876651234431e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.860546510616654770e-01,9.891083668174416310e-01,-1.269372958274587147e-02,1.917438047030746787e-02,8.757849579314265498e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.870242734855938727e-01,9.880359869150859842e-01,-1.402902618191949162e-02,1.915987673218805948e-02,8.650492416543427432e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.879899997940023937e-01,9.869141283299138756e-01,-1.542959484733634132e-02,1.914395168378076398e-02,8.543685297801620337e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.889518257450892413e-01,9.857427733102797074e-01,-1.689495969326294811e-02,1.912654727815681424e-02,8.437433065027093171e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.899097471141842464e-01,9.845218809813742755e-01,-1.842459807017562262e-02,1.910760687657425683e-02,8.331740494923077500e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.908637596937675207e-01,9.832513875273138915e-01,-2.001794144293862418e-02,1.908707529081083570e-02,8.226612301237407054e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.918138592934876652e-01,9.819312064266144668e-01,-2.167437569575871720e-02,1.906489882410446202e-02,8.122053136969183129e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.927600417401808652e-01,9.805612278975907037e-01,-2.339324169679227183e-02,1.904102531030902809e-02,8.018067596529614149e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.937023028778884326e-01,9.791413205959619281e-01,-2.517383583926234916e-02,1.901540415178818236e-02,7.914660217842954537e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.946406385678755679e-01,9.776713311456217692e-01,-2.701541098301853128e-02,1.898798635506162777e-02,7.811835484220777670e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.955750446886495686e-01,9.761510826840030797e-01,-2.891717605355393142e-02,1.895872456554883317e-02,7.709597826442719792e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.965055171359771480e-01,9.745803789256104466e-01,-3.087829739564015158e-02,1.892757310062098339e-02,7.607951624538544788e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.974320518229038646e-01,9.729590017772963284e-01,-3.289789896237604178e-02,1.889448798085950620e-02,7.506901209449207135e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.983546446797705531e-01,9.712867124071563607e-01,-3.497506307180424379e-02,1.885942695988633219e-02,7.406450864771811637e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +8.992732916542326427e-01,9.695632527595571259e-01,-3.710883111410048785e-02,1.882234955244188254e-02,7.306604828263167650e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.001879887112764766e-01,9.677883453177524853e-01,-3.929820378752852533e-02,1.878321706092240390e-02,7.207367293248825857e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.010987318332384088e-01,9.659616938947968023e-01,-4.154214185651876334e-02,1.874199260072034745e-02,7.108742409983577482e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.020055170198213457e-01,9.640829843783419451e-01,-4.383956698354311149e-02,1.869864112341130050e-02,7.010734286859446329e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.029083402881132869e-01,9.621518857061663876e-01,-4.618936180137834385e-02,1.865312943897530495e-02,6.913346991517377305e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.038071976726038681e-01,9.601680499707101468e-01,-4.859037153119333957e-02,1.860542623590432862e-02,6.816584551821680549e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.047020852252026790e-01,9.581311136688172025e-01,-5.104140373359482979e-02,1.855550209987363863e-02,6.720450956801905795e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.055929990152560283e-01,9.560406995763781612e-01,-5.354122956249111193e-02,1.850332953100309805e-02,6.624950157309723864e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.064799351295638186e-01,9.538964163924987805e-01,-5.608858382919922059e-02,1.844888295953313676e-02,6.530086066646914600e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.073628896723984205e-01,9.516978609713606652e-01,-5.868216635802124548e-02,1.839213876002196088e-02,6.435862561014502048e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.082418587655194386e-01,9.494446177695128330e-01,-6.132064211514817731e-02,1.833307526375772128e-02,6.342283479880188313e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.091168385481928071e-01,9.471362610201509158e-01,-6.400264209531325577e-02,1.827167277010159629e-02,6.249352626234434133e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.099878251772063331e-01,9.447723550692912475e-01,-6.672676421770878941e-02,1.820791355587520616e-02,6.157073766681225857e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.108548148268875710e-01,9.423524557102829213e-01,-6.949157402398371819e-02,1.814178188331203034e-02,6.065450631454690628e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.117178036891198101e-01,9.398761127164926288e-01,-7.229560535929541176e-02,1.807326400643329695e-02,5.974486914205406202e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.125767879733592824e-01,9.373428694711127829e-01,-7.513736092084370710e-02,1.800234817614286553e-02,5.884186271639354343e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.134317639066518169e-01,9.347522641593823955e-01,-7.801531342562058324e-02,1.792902464371255722e-02,5.794552323182786124e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.142827277336491587e-01,9.321038319599610489e-01,-8.092790623958588669e-02,1.785328566250543939e-02,5.705588650376981763e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.151296757166257345e-01,9.293971048480033081e-01,-8.387355350548972299e-02,1.777512548882535445e-02,5.617298796247331000e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.159726041354947501e-01,9.266316147725572883e-01,-8.685064207695379623e-02,1.769454038100604781e-02,5.529686264474963087e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.168115092878250660e-01,9.238068936161748512e-01,-8.985753187912497164e-02,1.761152859648202479e-02,5.442754518431822119e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.176463874888565186e-01,9.209224750864097864e-01,-9.289255643943870877e-02,1.752609038810231876e-02,5.356506980128668272e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.184772350715175726e-01,9.179778952440973416e-01,-9.595402408487881118e-02,1.743822799872245949e-02,5.270947029056696764e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.193040483864395318e-01,9.149726951887851012e-01,-9.904021858870698525e-02,1.734794565440300751e-02,5.186078000879148359e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.201268238019744139e-01,9.119064206573402886e-01,-1.021494002248568428e-01,1.725524955613263328e-02,5.101903186007123586e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.209455577042096053e-01,9.087786243711833967e-01,-1.052798065741077094e-01,1.716014787011402309e-02,5.018425828153764284e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.217602464969846254e-01,9.055888683232087288e-01,-1.084296537012590517e-01,1.706265071652764143e-02,4.935649122613924855e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.225708866019060039e-01,9.023367232100459878e-01,-1.115971366501387479e-01,1.696277015689769477e-02,4.853576214527022953e-02,-0.000000000000000000e+00,0.000000000000000000e+00 +9.233774744583637117e-01,8.990217708265549890e-01,-1.147804303693413103e-01,1.686052018045527609e-02,4.772210197092283113e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.241800065235469264e-01,8.956436059437988817e-01,-1.179776915973768242e-01,1.675591668849609264e-02,4.691554109568871894e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.249784792724585758e-01,8.922018365457452527e-01,-1.211870587099932450e-01,1.664897747740705369e-02,4.611610935233899367e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.257728891979321029e-01,8.886960850233658338e-01,-1.244066531139852716e-01,1.653972222101623285e-02,4.532383599315210204e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.265632328106458981e-01,8.851259908284025979e-01,-1.276345808829380812e-01,1.642817245063241463e-02,4.453874966776041888e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.273495066391393982e-01,8.814912108199702834e-01,-1.308689326978724754e-01,1.631435153428568985e-02,4.376087839978604760e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.281317072298272963e-01,8.777914207688606307e-01,-1.341077857116945993e-01,1.619828465468597239e-02,4.299024956358142580e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.289098311470160851e-01,8.740263165763200170e-01,-1.373492045271957063e-01,1.607999878544369085e-02,4.222688985958960445e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.296838749729178231e-01,8.701956154278288613e-01,-1.405912422122561067e-01,1.595952266610126125e-02,4.147082528977363153e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.304538353076660107e-01,8.662990578716871504e-01,-1.438319409762223722e-01,1.583688677613149381e-02,4.072208113146511682e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.312197087693302455e-01,8.623364079848374608e-01,-1.470693340796929427e-01,1.571212330750039812e-02,3.998068191130759591e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.319814919939308773e-01,8.583074552447698125e-01,-1.503014466985738429e-01,1.558526613560271695e-02,3.924665137846689023e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.327391816354543286e-01,8.542120149591384548e-01,-1.535262969509309239e-01,1.545635078929719769e-02,3.852001247780679344e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.334927743658669730e-01,8.500499306168413094e-01,-1.567418970214980167e-01,1.532541441970079396e-02,3.780078732211502451e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.342422668751307890e-01,8.458210735931714908e-01,-1.599462548562944897e-01,1.519249576764177163e-02,3.708899716434908150e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.349876558712167940e-01,8.415253453201917422e-01,-1.631373751056079124e-01,1.505763512967114338e-02,3.638466236965120365e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.357289380801203649e-01,8.371626775592521108e-01,-1.663132601630356311e-01,1.492087432329312902e-02,3.568780238706020791e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.364661102458753383e-01,8.327330347964900792e-01,-1.694719116315850371e-01,1.478225665089440664e-02,3.499843572081289683e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.371991691305681105e-01,8.282364134120293597e-01,-1.726113318132085073e-01,1.464182686242693557e-02,3.431657990155521920e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.379281115143518477e-01,8.236728432854422133e-01,-1.757295248452885317e-01,1.449963111683772073e-02,3.364225145849762677e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.386529341954613637e-01,8.190423896621102928e-01,-1.788244978482302905e-01,1.435571694268690524e-02,3.297546589036323694e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.393736339902262200e-01,8.143451528624557900e-01,-1.818942624696848476e-01,1.421013319754125745e-02,3.231623763676478439e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.400902077330849371e-01,8.095812694321929959e-01,-1.849368362373129404e-01,1.406293002631577548e-02,3.166458005016530020e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.408026522765996491e-01,8.047509129181580345e-01,-1.879502439622230681e-01,1.391415881845770031e-02,3.102050536764965061e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.415109644914688714e-01,7.998542943062041388e-01,-1.909325187519576028e-01,1.376387216439362189e-02,3.038402468339861801e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.422151412665417114e-01,7.948916639451082400e-01,-1.938817039652303564e-01,1.361212381083958672e-02,2.975514792083217702e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.429151795088319687e-01,7.898633106976153950e-01,-1.967958541573104070e-01,1.345896861511061866e-02,2.913388380528327687e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.436110761435310135e-01,7.847695629992565980e-01,-1.996730368571377201e-01,1.330446249865762168e-02,2.852023983776654989e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.443028281140214419e-01,7.796107898460621355e-01,-2.025113337000755687e-01,1.314866239952408068e-02,2.791422226855918426e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.449904323818909546e-01,7.743874006679237754e-01,-2.053088417999410775e-01,1.299162622432421636e-02,2.731583607157910334e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.456738859269452346e-01,7.690998461006670617e-01,-2.080636755792538706e-01,1.283341279904895611e-02,2.672508491948669129e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.463531857472214925e-01,7.637486182444631622e-01,-2.107739676297410714e-01,1.267408181934180991e-02,2.614197115928674695e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.470283288590011228e-01,7.583342513932864737e-01,-2.134378708943901748e-01,1.251369379992015030e-02,2.556649578865853589e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.476993122968236927e-01,7.528573216193672479e-01,-2.160535595856034607e-01,1.235231002316428996e-02,2.499865843295575804e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.483661331134992656e-01,7.473184470748575059e-01,-2.186192307453441730e-01,1.218999248734308101e-02,2.443845732364670342e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.490287883801215019e-01,7.417182889535380852e-01,-2.211331056294966457e-01,1.202680385409988888e-02,2.388588927685350929e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.496872751860808703e-01,7.360575506665816281e-01,-2.235934313105904236e-01,1.186280739540528789e-02,2.334094967317505362e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.503415906390768608e-01,7.303369784055709912e-01,-2.259984818994265621e-01,1.169806693996142816e-02,2.280363243849667029e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.509917318651310847e-01,7.245573612936184738e-01,-2.283465605642405749e-01,1.153264681907175196e-02,2.227393002567330110e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.516376960085998205e-01,7.187195310141859528e-01,-2.306360002144922650e-01,1.136661181195016378e-02,2.175183339707282074e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.522794802321865593e-01,7.128243609121449609e-01,-2.328651650078394919e-01,1.120002709124164574e-02,2.123733200905185761e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.529170817169542174e-01,7.068727674437458308e-01,-2.350324525053984481e-01,1.103295816752033684e-02,2.073041379685389499e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.535504976623380147e-01,7.008657086631771005e-01,-2.371362942166388710e-01,1.086547083380845838e-02,2.023106516077523287e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.541797252861573542e-01,6.948041851926221035e-01,-2.391751576238655419e-01,1.069763110982308377e-02,1.973927095348661231e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.548047618246282564e-01,6.886892381488879389e-01,-2.411475468676008704e-01,1.052950518609615388e-02,1.925501446871030753e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.554256045323753499e-01,6.825219502054206133e-01,-2.430520050467054327e-01,1.036115936792538460e-02,1.877827743155377829e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.560422506824440836e-01,6.763034448578990609e-01,-2.448871148569138345e-01,1.019266001896929237e-02,1.830903998924942114e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.566546975663126062e-01,6.700348849906350157e-01,-2.466514998528986091e-01,1.002407350538354197e-02,1.784728070410276229e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.572629424939035347e-01,6.637174734102787577e-01,-2.483438262053130063e-01,9.855466139578562476e-03,1.739297654756436287e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.578669827935962777e-01,6.573524520679466798e-01,-2.499628037062967267e-01,9.686904124164884625e-03,1.694610289523602181e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.584668158122380266e-01,6.509411006220376938e-01,-2.515071872476648540e-01,9.518453495941962481e-03,1.650663352381518886e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.590624389151560791e-01,6.444847366859275395e-01,-2.529757776909021816e-01,9.350180070215763362e-03,1.607454060929139339e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.596538494861691637e-01,6.379847144510718548e-01,-2.543674233952093466e-01,9.182149385316295057e-03,1.564979472639608377e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.602410449275984305e-01,6.314424242479453886e-01,-2.556810211212074413e-01,9.014426647427584496e-03,1.523236484972699013e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.608240226602797751e-01,6.248592915211844678e-01,-2.569155174627831828e-01,8.847076675754624725e-03,1.482221835607127648e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.614027801235747184e-01,6.182367756596600650e-01,-2.580699096725390107e-01,8.680163848102726357e-03,1.441932102851632702e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.619773147753815090e-01,6.115763694602583955e-01,-2.591432469167220809e-01,8.513752046968863993e-03,1.402363706183706177e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.625476240921463367e-01,6.048795973192359066e-01,-2.601346310783252402e-01,8.347904606179226347e-03,1.363512906953726067e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.631137055688746562e-01,5.981480157605795211e-01,-2.610432182238467957e-01,8.182684258034301356e-03,1.325375809225142268e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.636755567191421790e-01,5.913832116491449797e-01,-2.618682190814686228e-01,8.018153081056892187e-03,1.287948360689294969e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.642331750751055308e-01,5.845867997732209220e-01,-2.626089002185493793e-01,7.854372448592389019e-03,1.251226353852034727e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.647865581875129104e-01,5.777604231226770581e-01,-2.632645846325327166e-01,7.691402978067890528e-03,1.215205427315115341e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.653357036257157464e-01,5.709057510224572196e-01,-2.638346529574213117e-01,7.529304481139659916e-03,1.179881067206763404e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.658806089776785786e-01,5.640244791950640435e-01,-2.643185439990956564e-01,7.368135914514469843e-03,1.145248608710037336e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.664212718499893828e-01,5.571183264847947569e-01,-2.647157555578610877e-01,7.207955331906926687e-03,1.111303237777728180e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.669576898678714505e-01,5.501890353759992802e-01,-2.650258450768114282e-01,7.048819836775519106e-03,1.078039992988921725e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.674898606751921593e-01,5.432383687316317822e-01,-2.652484302692814100e-01,6.890785536269569515e-03,1.045453767537066203e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.680177819344745194e-01,5.362681105791432534e-01,-2.653831898622292518e-01,6.733907496023095775e-03,1.013539311378801322e-02,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.685414513269068326e-01,5.292800635640213525e-01,-2.654298638671922506e-01,6.578239696278171886e-03,9.822912334369189938e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.690608665523531284e-01,5.222760475352420384e-01,-2.653882545132350557e-01,6.423834988965721610e-03,9.517040040315255522e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.695760253293631559e-01,5.152578984756136515e-01,-2.652582259608430859e-01,6.270745056194317740e-03,9.217719573796703814e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.700869253951824867e-01,5.082274666090489390e-01,-2.650397054193972535e-01,6.119020369971453835e-03,8.924892942602950796e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.705935645057625072e-01,5.011866153352110276e-01,-2.647326830662217745e-01,5.968710153069649305e-03,8.638500847828523010e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.710959404357700775e-01,4.941372200601413245e-01,-2.643372125053026767e-01,5.819862341413887044e-03,8.358482712685461405e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.715940509785973012e-01,4.870811658810803579e-01,-2.638534108614351559e-01,5.672523547727237325e-03,8.084776712487405995e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.720878939463715174e-01,4.800203467940873936e-01,-2.632814588915360909e-01,5.526739026773554858e-03,7.817319806087165424e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.725774671699647378e-01,4.729566639991428234e-01,-2.626216013808759242e-01,5.382552641898703111e-03,7.556047767844780577e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.730627684990029724e-01,4.658920238642198508e-01,-2.618741468268164785e-01,5.240006833156383471e-03,7.300895221331099236e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.735437958018759996e-01,4.588283375820351728e-01,-2.610394676253708912e-01,5.099142587014896845e-03,7.051795673429952886e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.740205469657464699e-01,4.517675183466238198e-01,-2.601179999637960316e-01,4.959999407584654053e-03,6.808681549775899740e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.744930198965594537e-01,4.447114813627381924e-01,-2.591102437888659904e-01,4.822615289438544027e-03,6.571484230863162621e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.749612125190515455e-01,4.376621405394703901e-01,-2.580167623833641288e-01,4.687026692110080571e-03,6.340134089171577982e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.754251227767596344e-01,4.306214087191920226e-01,-2.568381822820939298e-01,4.553268516295307794e-03,6.114560527271521370e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.758847486320304521e-01,4.235911948762904888e-01,-2.555751930721387732e-01,4.421374081655824080e-03,5.894692016275223344e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.763400880660293435e-01,4.165734029777506953e-01,-2.542285470490306953e-01,4.291375106292050733e-03,5.680456135769600817e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.767911390787489267e-01,4.095699317892852465e-01,-2.527990583810819092e-01,4.163301688096772597e-03,5.471779613511989875e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.772378996890179748e-01,4.025826714043255206e-01,-2.512876032125871961e-01,4.037182287817540904e-03,5.268588366263179772e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.776803679345105191e-01,3.956135029405784542e-01,-2.496951185389463257e-01,3.913043713830307094e-03,5.070807541419178679e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.781185418717538438e-01,3.886642971879929465e-01,-2.480226020637658624e-01,3.790911108881104327e-03,4.878361558751103874e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.785524195761373667e-01,3.817369131748142852e-01,-2.462711116820275881e-01,3.670807938294675787e-03,4.691174152690927822e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.789819991419210776e-01,3.748331959373837896e-01,-2.444417642529713719e-01,3.552755980259469839e-03,4.509168415262271648e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.794072786822439758e-01,3.679549763191504730e-01,-2.425357351329280597e-01,3.436775317903052827e-03,4.332266839423082674e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.798282563291319525e-01,3.611040694680652252e-01,-2.405542574382494025e-01,3.322884333160550704e-03,4.160391362234742464e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.802449302335066728e-01,3.542822726113916687e-01,-2.384986213528675347e-01,3.211099702339722649e-03,3.993463408888053671e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.806572985651929031e-01,3.474913651764596767e-01,-2.363701726282041726e-01,3.101436393692125724e-03,3.831403936643183739e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.810653595129272819e-01,3.407331063344263278e-01,-2.341703121144498856e-01,2.993907666824447301e-03,3.674133478931482580e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.814691112843658694e-01,3.340092346305775228e-01,-2.319004946679121826e-01,2.888525073793307262e-03,3.521572189779216371e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.818685521060921406e-01,3.273214665400974166e-01,-2.295622279567800239e-01,2.785298462121501509e-03,3.373639887986652416e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.822636802236245357e-01,3.206714952586468548e-01,-2.271570715480916158e-01,2.684235979597397650e-03,3.230256101601118070e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.826544939014246749e-01,3.140609897850027976e-01,-2.246866356380771701e-01,2.585344080839678971e-03,3.091340112164967131e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.830409914229045754e-01,3.074915935888100083e-01,-2.221525797667555646e-01,2.488627535741951002e-03,2.956810999052143351e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.834231710904342005e-01,3.009649243083239445e-01,-2.195566118267209543e-01,2.394089439623119011e-03,2.826587683457823085e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.838010312253490097e-01,2.944825720885778098e-01,-2.169004865909298085e-01,2.301731225129521798e-03,2.700588972256694285e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.841745701679575076e-01,2.880460989910417768e-01,-2.141860043507949618e-01,2.211552675945753672e-03,2.578733601726049349e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.845437862775482385e-01,2.816570379520530465e-01,-2.114150097270665885e-01,2.123551942187120421e-03,2.460940280917393950e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.849086779323972252e-01,2.753168922983854627e-01,-2.085893901700653363e-01,2.037725557456328707e-03,2.347127734705941552e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.852692435297746298e-01,2.690271346310518230e-01,-2.057110743958751042e-01,1.954068457654808359e-03,2.237214746401119253e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.856254814859526370e-01,2.627892060555589437e-01,-2.027820311513960361e-01,1.872574001396551174e-03,2.131120200131783778e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.859773902362116704e-01,2.566045161837176014e-01,-1.998042676157826547e-01,1.793233992003792458e-03,2.028763122533686156e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.863249682348473879e-01,2.504744415879648356e-01,-1.967798278400860190e-01,1.716038701136472257e-03,1.930062724025472572e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.866682139551777864e-01,2.444003258164933190e-01,-1.937107912737459081e-01,1.640976893941364662e-03,1.834938439672508404e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.870071258895497524e-01,2.383834785904542508e-01,-1.905992708925149937e-01,1.568035855769301518e-03,1.743309969283287058e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.873417025493456123e-01,2.324251754456435892e-01,-1.874474118778071963e-01,1.497201420388952686e-03,1.655097317085077284e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.876719424649894608e-01,2.265266578026785549e-01,-1.842573898613520667e-01,1.428457999565355564e-03,1.570220830460790189e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.879978441859542659e-01,2.206891315086809968e-01,-1.810314092757005777e-01,1.361788614123999545e-03,1.488601238252430358e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.883194062807676428e-01,2.149137674854672553e-01,-1.777717016657240912e-01,1.297174926317175422e-03,1.410159688336749166e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.886366273370181812e-01,2.092017008823671698e-01,-1.744805237815529320e-01,1.234597273616096181e-03,1.334817784332657060e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.889495059613619965e-01,2.035540311021139248e-01,-1.711601562602134696e-01,1.174034703699162758e-03,1.262497621693116703e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.892580407795287245e-01,1.979718217091636645e-01,-1.678129015526701739e-01,1.115465010677265442e-03,1.193121822847685934e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.895622304363274058e-01,1.924561001883687839e-01,-1.644410822233616232e-01,1.058864772573402677e-03,1.126613571608742811e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.898620735956522587e-01,1.870078579590642998e-01,-1.610470391965624914e-01,1.004209389922269867e-03,1.062896646688287578e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.901575689404893410e-01,1.816280501584153217e-01,-1.576331300792758738e-01,9.514731254469584740e-04,1.001895454418344576e-03,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.904487151729213235e-01,1.763175960055825831e-01,-1.542017272418902407e-01,9.006291447837718985e-04,9.435350605328140161e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.907355110141340404e-01,1.710773785762529087e-01,-1.507552159869575414e-01,8.516495582450427527e-04,8.877412210611181380e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.910179552044214857e-01,1.659082452834030885e-01,-1.472959929407560986e-01,8.045054634944752859e-04,8.344404123072289266e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.912960465031916968e-01,1.608110076276083600e-01,-1.438264641072506123e-01,7.591669890924263725e-04,7.835598598882150727e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.915697836889719730e-01,1.557864417723089367e-01,-1.403490429609775825e-01,7.156033389556931430e-04,7.350275669065113940e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.918391655594147593e-01,1.508352888291406568e-01,-1.368661489387057728e-01,6.737828375435815208e-04,6.887723410355214761e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.921041909313020879e-01,1.459582548852799977e-01,-1.333802053243934860e-01,6.336729757837673187e-04,6.447238207728437200e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.923648586405513505e-01,1.411560115762782175e-01,-1.298936375147165534e-01,5.952404577352713713e-04,6.028125006981265022e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.926211675422206282e-01,1.364291965607618284e-01,-1.264088713876743431e-01,5.584512478152940662e-04,5.629697557554445234e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.928731165105130207e-01,1.317784138443682251e-01,-1.229283313296926500e-01,5.232706186030288776e-04,5.251278645394440647e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.931207044387818650e-01,1.272042341330509307e-01,-1.194544384437413731e-01,4.896631991979678985e-04,4.892200317168311557e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.933639302395360637e-01,1.227071958116330574e-01,-1.159896089061733110e-01,4.575930240007236170e-04,4.551804093766353629e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.936027928444443047e-01,1.182878050245562973e-01,-1.125362520730845972e-01,4.270235819012832969e-04,4.229441174676080366e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.938372912043398344e-01,1.139465366892765952e-01,-1.090967687889933257e-01,3.979178658259076252e-04,3.924472632812972951e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.940674242892252321e-01,1.096838347740866221e-01,-1.056735496355247356e-01,3.702384225579105929e-04,3.636269599735822118e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.942931910882769619e-01,1.055001132057585694e-01,-1.022689731475187047e-01,3.439474028089155549e-04,3.364213441900452224e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.945145906098497024e-01,1.013957565779560754e-01,-9.888540415883158297e-02,3.190066114637266992e-04,3.107695927027255686e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.947316218814804545e-01,9.737112076205527156e-02,-9.552519202806478149e-02,2.953775579490420990e-04,2.866119381688512374e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.949442839498933155e-01,9.342653372886219887e-02,-9.219066905940714873e-02,2.730215066722901972e-04,2.638896839931390991e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.951525758810034761e-01,8.956229643259450146e-02,-8.888414878477109149e-02,2.518995274550453337e-04,2.425452182773220420e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.953564967599211055e-01,8.577868336635521840e-02,-8.560792431931835145e-02,2.319725459479941908e-04,2.225220269238076471e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.955560456909556821e-01,8.207594368511443583e-02,-8.236426680570722858e-02,2.132013939249019420e-04,2.037647058976779530e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.957512217976196567e-01,7.845430211422453615e-02,-7.915542368787166494e-02,1.955468594458051876e-04,1.862189725825138587e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.959420242226324493e-01,7.491395939103087653e-02,-7.598361725760736929e-02,1.789697368122343676e-04,1.698316764038358088e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.961284521279242243e-01,7.145509367466267026e-02,-7.285104310809356609e-02,1.634308762486073906e-04,1.545508086373625174e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.963105046946395538e-01,6.807786100286285225e-02,-6.975986849534375478e-02,1.488912332888289367e-04,1.403255114288442975e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.964881811231409703e-01,6.478239676820098103e-02,-6.671223098954297270e-02,1.353119178044188563e-04,1.271060860933821647e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.966614806330126308e-01,6.156881626005367841e-02,-6.371023692661778592e-02,1.226542425952754196e-04,1.148440006160353398e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.968304024630633142e-01,5.843721550922410801e-02,-6.075595988036914391e-02,1.108797715603823095e-04,1.034918965883095524e-04,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.969949458713304180e-01,5.538767269071070282e-02,-5.785143951839467891e-02,9.995036731504464915e-05,9.300359536061058728e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.971551101350828450e-01,5.242024863887391473e-02,-5.499867991317972798e-02,8.982823826369523994e-05,8.333410360671326814e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.973108945508237788e-01,4.953498820009627490e-02,-5.219964840410036555e-02,8.047598507533643738e-05,7.443961824041793593e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.974622984342947918e-01,4.673192082497519551e-02,-4.945627418246351104e-02,7.185664648309384882e-05,6.627753076575849511e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.976093211204778433e-01,4.401106191004359319e-02,-4.677044699620896279e-02,6.393374440327223151e-05,5.880643105040632564e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.977519619635988324e-01,4.137241349935354140e-02,-4.414401593698928650e-02,5.667132829817355852e-05,5.198611056528026572e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.978902203371300406e-01,3.881596544692126477e-02,-4.157878817199470861e-02,5.003401875221357017e-05,4.577756512043829799e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.980240956337933511e-01,3.634169614326071551e-02,-3.907652780480704025e-02,4.398705021753047592e-05,4.014299713394507756e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.981535872655622477e-01,3.394957369305470762e-02,-3.663895471000266912e-02,3.849631287671176762e-05,3.504581745032870081e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.982786946636650338e-01,3.163955673589996520e-02,-3.426774331157977915e-02,3.352839360788794981e-05,3.045064673708219429e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.983994172785870536e-01,2.941159543825177733e-02,-3.196452165488521818e-02,2.905061598542451768e-05,2.632331649309775315e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.985157545800730228e-01,2.726563233959382673e-02,-2.973087019249734744e-02,2.503107928951218270e-05,2.263086971002612255e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.986277060571295827e-01,2.520160348553246096e-02,-2.756832083353624729e-02,2.143869649686146409e-05,1.934156117835164151e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.987352712180274095e-01,2.321943887259480740e-02,-2.547835600152593277e-02,1.824323119467269108e-05,1.642485752017056801e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.988384495903031013e-01,2.131906375473221690e-02,-2.346240766823669624e-02,1.541533339447914900e-05,1.385143694628517232e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.989372407207620652e-01,1.950039929579198972e-02,-2.152185631958932269e-02,1.292657422373838088e-05,1.159318874640455211e-05,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.990316441754796273e-01,1.776336343708760993e-02,-1.965803022173175096e-02,1.074947945018531636e-05,9.623212582839185762e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.991216595398032529e-01,1.610787169907426306e-02,-1.787220453880981191e-02,8.857561800573144034e-06,7.915817585203902251e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.992072864183545455e-01,1.453383788596881651e-02,-1.616560053434751981e-02,7.225352059492744630e-06,6.446521288241811982e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.992885244350309115e-01,1.304117500987677335e-02,-1.453938476326934424e-02,5.828428912749328355e-06,5.192048429960178583e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.993653732330070039e-01,1.162979593491942319e-02,-1.299466836022187405e-02,4.643447511988093959e-06,4.130329623014402594e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.994378324747364983e-01,1.029961395589267664e-02,-1.153250636107346419e-02,3.648166727571013466e-06,3.240499962254564536e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.995059018419534258e-01,9.050543701528804968e-03,-1.015389704188604525e-02,2.821475069492598036e-06,2.502897546875883304e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.995695810356738376e-01,7.882501556410540997e-03,-8.859781266412604384e-03,2.143415253843864883e-06,1.899061970583074253e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.996288697761966935e-01,6.795406518818971654e-03,-7.651041963717603901e-03,1.595207390458170825e-06,1.411732780436549428e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.996837678031056384e-01,5.789180442666316470e-03,-6.528503522169459318e-03,1.159270770830220314e-06,1.024847944881575350e-06,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.997342748752695574e-01,4.863748869942286814e-03,-5.492931305676303222e-03,8.192442410758792242e-07,7.235423367395296259e-07,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.997803907708440185e-01,4.019041310057394796e-03,-4.545031184425194458e-03,5.600051356597276294e-07,4.941462526420997024e-07,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.998221152872722728e-01,3.254991928089291664e-03,-3.685449096886342629e-03,3.676867599349924234e-07,3.241839843193468079e-07,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.998594482412856976e-01,2.571539743547577965e-03,-2.914770676815493095e-03,2.296944026450090722e-07,2.023724620578544039e-07,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.998923894689052405e-01,1.968629246967516268e-03,-2.233520864261702259e-03,1.347198669911694154e-07,1.186199804921017435e-07,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999209388254417519e-01,1.446210514967800030e-03,-1.642163587066675090e-03,7.275450678347964172e-08,6.402502392043930728e-08,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999450961854965403e-01,1.004239749596159753e-03,-1.141101496327595177e-03,3.510075546144303800e-08,3.087519999927952015e-08,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999648614429623716e-01,6.426793257140208087e-04,-7.306757078514341175e-04,1.438213799073529622e-08,1.264629453566377874e-08,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999802345110231361e-01,3.614982470054549629e-04,-4.111655952132400350e-04,4.551757823718516219e-09,4.001455080885748678e-09,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999912153221552913e-01,1.606721108149795842e-04,-1.827886389983132188e-04,8.992503022602981804e-10,7.905125455121505901e-10,-0.000000000000000000e+00,-0.000000000000000000e+00 +9.999978038281271964e-01,4.018346884133498867e-05,-4.570026495150424442e-05,5.619751219360055816e-11,4.944655220443084589e-11,-0.000000000000000000e+00,-0.000000000000000000e+00 +1.000000000000000000e+00,2.172345245846269443e-08,6.227729555015112737e-09,-6.406842235841267287e-16,6.014764460522365031e-17,-0.000000000000000000e+00,0.000000000000000000e+00 diff --git a/examples/TS_channel/TSwave_cheb_3D.csv b/examples/TS_channel/TSwave_cheb_3D.csv new file mode 100644 index 00000000000..25b087afa86 --- /dev/null +++ b/examples/TS_channel/TSwave_cheb_3D.csv @@ -0,0 +1,1500 @@ +-1.000000000000000000e+00,-8.532433453662170043e-09,-7.825595599627150187e-09,0.000000000000000000e+00,-0.000000000000000000e+00,-1.599831272561656676e-08,-1.467299174930090495e-08 +-9.999978038281271964e-01,6.339081250191365459e-06,4.338122083586022897e-06,4.527002409099353646e-12,1.195183183545239621e-11,-8.543270903398036051e-06,-3.317684627220741917e-07 +-9.999912153221552913e-01,2.538213128841344148e-05,1.737455491554113006e-05,7.345343160167814840e-11,1.900893980644579340e-10,-3.412460745523900372e-05,-1.285659877269323089e-06 +-9.999802345110231361e-01,5.712169081436035717e-05,3.909765612915819575e-05,3.726714591305153097e-10,9.612157521767487419e-10,-7.675772989925633467e-05,-2.883374579398063985e-06 +-9.999648614429623716e-01,1.015590864256511353e-04,6.950096197886711095e-05,1.178195977011979408e-09,3.036536800268931052e-09,-1.364397148525431532e-04,-5.136777931779325563e-06 +-9.999450961854965403e-01,1.586966408299545850e-04,1.085752883723027586e-04,2.875450803325525671e-09,7.411464482616838463e-09,-2.131656119904592008e-04,-8.062652415276418802e-06 +-9.999209388254417519e-01,2.285371189537993228e-04,1.563087678154905446e-04,5.958253586266729833e-09,1.536531941236317529e-08,-3.069294977659811586e-04,-1.168254652617643607e-05 +-9.998923894689052405e-01,3.110841581777411334e-04,2.126870951428934061e-04,1.102755811897632658e-08,2.846096669216762880e-08,-4.177236882276211586e-04,-1.602220886560661701e-05 +-9.998594482412856976e-01,4.063417112922833242e-04,2.776928930431409124e-04,1.878986997050827461e-08,4.854435876488042881e-08,-5.455398073093925635e-04,-2.111264932353566488e-05 +-9.998221152872722728e-01,5.143146210111909124e-04,3.513064318400511998e-04,3.005542619497379947e-08,7.774404255073667120e-08,-6.903677377934228174e-04,-2.698863777746396184e-05 +-9.997803907708440185e-01,6.350085839080511991e-04,4.335045815812777945e-04,4.573605253367640730e-08,1.184704800507748562e-07,-8.521957214234845718e-04,-3.369049402519871077e-05 +-9.997342748752695574e-01,7.684298599762195200e-04,5.242619024709374315e-04,6.684276389445010206e-08,1.734153604989223678e-07,-1.031010940303483687e-03,-4.126184518309180432e-05 +-9.996837678031056384e-01,9.145854153890290404e-04,6.235494765022742050e-04,9.448312592680328410e-08,2.455508216345021490e-07,-1.226799290360050228e-03,-4.975159527158962825e-05 +-9.996288697761966935e-01,1.073483310379447480e-03,7.313357270682651354e-04,1.298582651091697411e-07,3.381285481609111646e-07,-1.439544699513272460e-03,-5.921214435924348026e-05 +-9.995695810356738376e-01,1.245132113591762655e-03,8.475856592106073084e-04,1.742597291774336788e-07,4.546787552878928164e-07,-1.669230275523829292e-03,-6.970054611419045001e-05 +-9.995059018419534258e-01,1.429541350681043117e-03,9.722612530129576790e-04,2.290659532491314310e-07,5.990092281459728080e-07,-1.915837518886828348e-03,-8.127748039561823830e-05 +-9.994378324747364983e-01,1.626721909915838883e-03,1.105321058796416558e-03,2.957386186352766587e-07,7.752039665797756229e-07,-2.179345620629599429e-03,-9.400769978359452489e-05 +-9.993653732330070039e-01,1.836684663428785318e-03,1.246720106636086019e-03,3.758185879942542045e-07,9.876220703254661533e-07,-2.459734109998472883e-03,-1.079598641566882112e-04 +-9.992885244350309115e-01,2.059442512119403178e-03,1.396409998839586277e-03,4.709217263387399620e-07,1.240896374045567649e-06,-2.756979087020847239e-03,-1.232060097763611705e-04 +-9.992072864183545455e-01,2.295008962530312255e-03,1.554338672274459786e-03,5.827344509056003786e-07,1.539931751730508899e-06,-3.071055961785336321e-03,-1.398216150356972997e-04 +-9.991216595398032529e-01,2.543398548145819199e-03,1.720450063329689470e-03,7.130089865858985968e-07,1.889903773573420901e-06,-3.401938739576017799e-03,-1.578858266203851484e-04 +-9.990316441754796273e-01,2.804627156594336531e-03,1.894684450779379314e-03,8.635584581139484337e-07,2.296256848214360425e-06,-3.749599486515301297e-03,-1.774803925259928715e-04 +-9.989372407207620652e-01,3.078711674195466416e-03,2.076977617509164712e-03,1.036251699199413526e-06,2.764702535312161669e-06,-4.114009079217138126e-03,-1.986907878399675638e-04 +-9.988384495903031013e-01,3.365670624817492743e-03,2.267261737265849949e-03,1.233007910565025811e-06,3.301217475111975442e-06,-4.495136094082994270e-03,-2.216040843887388909e-04 +-9.987352712180274095e-01,3.665523431511381273e-03,2.465463722099767337e-03,1.455791084751685575e-06,3.912041436102811109e-06,-4.892948282758434984e-03,-2.463115600262525112e-04 +-9.986277060571295827e-01,3.978291314330954613e-03,2.671506672128005303e-03,1.706604012619967810e-06,4.603675095285029593e-06,-5.307410983524543310e-03,-2.729054697457588825e-04 +-9.985157545800730228e-01,4.303996515533350421e-03,2.885308499362131129e-03,1.987482625197020287e-06,5.382877778638580915e-06,-5.738488672583380405e-03,-3.014810942465334111e-04 +-9.983994172785870536e-01,4.642663278104359852e-03,3.106782279923024203e-03,2.300489383479645072e-06,6.256665029829621174e-06,-6.186143231385597165e-03,-3.321355268245334376e-04 +-9.982786946636650338e-01,4.994317116353828336e-03,3.335836217316592682e-03,2.647707335797211850e-06,7.232305954558219682e-06,-6.650335419804160458e-03,-3.649671687915827657e-04 +-9.981535872655622477e-01,5.358985339969861428e-03,3.572372796990599692e-03,3.031233193922529319e-06,8.317320650854867283e-06,-7.131024002344052599e-03,-4.000767207359412752e-04 +-9.980240956337933511e-01,5.736696921859368685e-03,3.816289465332934130e-03,3.453170931272371001e-06,9.519477282415188916e-06,-7.628166107917069838e-03,-4.375652951892959153e-04 +-9.978902203371300406e-01,6.127482744531542388e-03,4.067477382584993965e-03,3.915624579552764966e-06,1.084678916320121843e-05,-8.141716882760252280e-03,-4.775361206614616469e-04 +-9.977519619635988324e-01,6.531375794491861043e-03,4.325822775716871814e-03,4.420691616412480435e-06,1.230751144047500538e-05,-8.671629243610255375e-03,-5.200913510022429822e-04 +-9.976093211204778433e-01,6.948410482726272500e-03,4.591204291733111398e-03,4.970455485757362102e-06,1.391013792565739864e-05,-9.217855271970002567e-03,-5.653363544483002963e-04 +-9.974622984342947918e-01,7.378623493675015750e-03,4.863495513702270623e-03,5.566978208283417180e-06,1.566339789831929291e-05,-9.780344744754978017e-03,-6.133743033506157578e-04 +-9.973108945508237788e-01,7.822054063482486788e-03,5.142562866324609100e-03,6.212293468065257354e-06,1.757625206087777084e-05,-1.035904473702789659e-02,-6.643093895793436912e-04 +-9.971551101350828450e-01,8.278742775890782571e-03,5.428265825474629450e-03,6.908398568859482315e-06,1.965788908375299849e-05,-1.095390200590806370e-02,-7.182457013845617091e-04 +-9.969949458713304180e-01,8.748733253779318597e-03,5.720456999613215618e-03,7.657247012810218355e-06,2.191772173572425551e-05,-1.156485994652256023e-02,-7.752863221644666842e-04 +-9.968304024630633142e-01,9.232071099341650905e-03,6.018981708339644389e-03,8.460740816071564953e-06,2.436538248036250096e-05,-1.219186070782152416e-02,-8.355333541357465093e-04 +-9.966614806330126308e-01,9.728804226681379147e-03,6.323677576188095270e-03,9.320722714674573218e-06,2.701071963998305902e-05,-1.283484469830253362e-02,-8.990878957758016449e-04 +-9.964881811231409703e-01,1.023898340371178113e-02,6.634374406595785860e-03,1.023896824639097340e-05,2.986379272787295029e-05,-1.349374969956493135e-02,-9.660494766437511269e-04 +-9.963105046946395538e-01,1.076266175294141730e-02,6.950893797856658000e-03,1.121717778131020265e-05,3.293486783092153199e-05,-1.416851193178225374e-02,-1.036515959070000289e-03 +-9.961284521279242243e-01,1.129989496760361312e-02,7.273049243184456260e-03,1.225696866152347086e-05,3.623441302721597712e-05,-1.485906577731362956e-02,-1.110582515601232214e-03 +-9.959420242226324493e-01,1.185074174631991530e-02,7.600645207823826348e-03,1.335986715536346793e-05,3.977309330728262944e-05,-1.556534309342686925e-02,-1.188342508308591399e-03 +-9.957512217976196567e-01,1.241526319472179354e-02,7.933477459207615920e-03,1.452730032257286609e-05,4.356176547892419654e-05,-1.628727446030235268e-02,-1.269886002876266734e-03 +-9.955560456909556821e-01,1.299352359168766229e-02,8.271332343495406761e-03,1.576058804792291850e-05,4.761147305124416778e-05,-1.702478786820387988e-02,-1.355300242948894547e-03 +-9.953564967599211055e-01,1.358559015004373073e-02,8.613987045967930883e-03,1.706093503574931386e-05,5.193344034716563791e-05,-1.777780928714702349e-02,-1.444668264806571499e-03 +-9.951525758810034761e-01,1.419153250484345152e-02,8.961208266401326367e-03,1.842942272890574966e-05,5.653906741635552008e-05,-1.854626374391964844e-02,-1.538070469526965156e-03 +-9.949442839498933155e-01,1.481142432616712121e-02,9.312753200257976871e-03,1.986700099943787922e-05,6.143992383889711173e-05,-1.933007241144263366e-02,-1.635581857765946480e-03 +-9.947316218814804545e-01,1.544534144618715972e-02,9.668368522869195958e-03,2.137448088540450406e-05,6.664774258380052208e-05,-2.012915622899656351e-02,-1.737272995423964168e-03 +-9.945145906098497024e-01,1.609336351122677908e-02,1.002778975591368978e-02,2.295252594017421405e-05,7.217441434654130726e-05,-2.094343290743564540e-02,-1.843210249535410855e-03 +-9.942931910882769619e-01,1.675557322492633033e-02,1.039074179961434891e-02,2.460164472290550224e-05,7.803198031871484848e-05,-2.177281844461415386e-02,-1.953453825909282765e-03 +-9.940674242892252321e-01,1.743205611836361885e-02,1.075693773220816503e-02,2.632218276345744056e-05,8.423262659777318220e-05,-2.261722764545039469e-02,-2.068059043737128632e-03 +-9.938372912043398344e-01,1.812290182250941503e-02,1.112607927517476371e-02,2.811431491771163740e-05,9.078867668153055087e-05,-2.347657181691172740e-02,-2.187074475451878611e-03 +-9.936027928444443047e-01,1.882820233445253386e-02,1.149785591863125740e-02,2.997803774362083721e-05,9.771258499916576998e-05,-2.435076209050840088e-02,-2.310542588006000726e-03 +-9.933639302395360637e-01,1.954805373059874049e-02,1.187194490825461100e-02,3.191316203475359992e-05,1.050169298428087059e-04,-2.523970627159163871e-02,-2.438498758499811018e-03 +-9.931207044387818650e-01,2.028255532429806135e-02,1.224801047160161901e-02,3.391930516900325878e-05,1.127144057849433043e-04,-2.614331046915623069e-02,-2.570971706341594609e-03 +-9.928731165105130207e-01,2.103180999179347227e-02,1.262570429124358282e-02,3.599588404432022812e-05,1.208178160758218469e-04,-2.706147852278731336e-02,-2.707981578629066466e-03 +-9.926211675422206282e-01,2.179592318080299240e-02,1.300466413463748951e-02,3.814210795519065315e-05,1.293400656713939736e-04,-2.799411388628083794e-02,-2.849541483319385088e-03 +-9.923648586405513505e-01,2.257500464780257815e-02,1.338451420470505512e-02,4.035697123174238749e-05,1.382941533238617323e-04,-2.894111638039659901e-02,-2.995655788401834982e-03 +-9.921041909313020879e-01,2.336916717426754456e-02,1.376486480546426985e-02,4.263924679951363569e-05,1.476931632555217961e-04,-2.990238459425427486e-02,-3.146319698408358775e-03 +-9.918391655594147593e-01,2.417852738266126900e-02,1.414531156755700209e-02,4.498747952029019627e-05,1.575502569851042114e-04,-3.087781433265205169e-02,-3.301519649844882921e-03 +-9.915697836889719730e-01,2.500320401968175385e-02,1.452543513447553288e-02,4.739997925943132063e-05,1.678786655637114786e-04,-3.186730179389837131e-02,-3.461232837393989938e-03 +-9.912960465031916968e-01,2.584332042707298063e-02,1.490480170193564380e-02,4.987481517512545772e-05,1.786916814652482726e-04,-3.287073887616812073e-02,-3.625425135726688831e-03 +-9.910179552044214857e-01,2.669900269168625981e-02,1.528296098090464912e-02,5.240980950696569085e-05,1.900026487918755446e-04,-3.388801656411465774e-02,-3.794053848086727937e-03 +-9.907355110141340404e-01,2.757037993340019644e-02,1.565944753535259901e-02,5.500253119560607622e-05,2.018249558976194387e-04,-3.491902428432327316e-02,-3.967064124217310229e-03 +-9.904487151729213235e-01,2.845758481313346092e-02,1.603377954542316297e-02,5.765029135994474815e-05,2.141720256638691641e-04,-3.596364882393122270e-02,-4.144390203439442776e-03 +-9.901575689404893410e-01,2.936075240143082646e-02,1.640545798421597784e-02,6.035013667880650702e-05,2.270573072058961568e-04,-3.702177629742345322e-02,-4.325955880942583889e-03 +-9.898620735956522587e-01,3.028002132747548841e-02,1.677396783997493757e-02,6.309884500315441453e-05,2.404942665648487042e-04,-3.809328981028502203e-02,-4.511671138588709169e-03 +-9.895622304363274058e-01,3.121553265218334969e-02,1.713877617163659522e-02,6.589292072330536266e-05,2.544963773625200814e-04,-3.917807136099064891e-02,-4.701434713798947895e-03 +-9.892580407795287245e-01,3.216743030567546097e-02,1.749933243152796553e-02,6.872858969167958425e-05,2.690771117853014462e-04,-4.027600077904260056e-02,-4.895132419154998860e-03 +-9.889495059613619965e-01,3.313586056760429999e-02,1.785506790451459741e-02,7.160179499116875565e-05,2.842499310366469571e-04,-4.138695642522150636e-02,-5.092637121382568612e-03 +-9.886366273370181812e-01,3.412097222655550988e-02,1.820539574949833414e-02,7.450819304149697091e-05,3.000282761650310370e-04,-4.251081458424782467e-02,-5.293807594634122524e-03 +-9.883194062807676428e-01,3.512291629551924504e-02,1.854971002832950244e-02,7.744314996647098752e-05,3.164255580430944343e-04,-4.364744965377550384e-02,-5.498489277366367355e-03 +-9.879978441859542659e-01,3.614184551332111361e-02,1.888738593082298425e-02,8.040173800224135450e-05,3.334551483441465622e-04,-4.479673469667688757e-02,-5.706512792498483026e-03 +-9.876719424649894608e-01,3.717791490803266236e-02,1.921777889312571361e-02,8.337873268699655068e-05,3.511303692876851287e-04,-4.595853996223948912e-02,-5.917694549913188155e-03 +-9.873417025493456123e-01,3.823128025929557749e-02,1.954022474042269031e-02,8.636860978055597505e-05,3.694644843714076326e-04,-4.713273530503027386e-02,-6.131835436851433042e-03 +-9.870071258895497524e-01,3.930209930895558340e-02,1.985403891758314929e-02,8.936554336847572495e-05,3.884706887076839533e-04,-4.831918740683962404e-02,-6.348721228321403329e-03 +-9.866682139551777864e-01,4.039053053423431044e-02,2.015851625996231342e-02,9.236340306292175326e-05,4.081620986826525982e-04,-4.951776152299706968e-02,-6.568121995975817126e-03 +-9.863249682348473879e-01,4.149673320844026597e-02,2.045293097783184438e-02,9.535575306287639150e-05,4.285517424327936259e-04,-5.072832076664049450e-02,-6.789791129105464916e-03 +-9.859773902362116704e-01,4.262086668636293207e-02,2.073653565070117735e-02,9.833585019947824256e-05,4.496525501464932319e-04,-5.195072679667349369e-02,-7.013466226278887439e-03 +-9.856254814859526370e-01,4.376309075126367382e-02,2.100856140207872300e-02,1.012966432117172873e-04,4.714773441279366357e-04,-5.318483846316421348e-02,-7.238867789388727519e-03 +-9.852692435297746298e-01,4.492356428505680371e-02,2.126821710454293926e-02,1.042307712901618483e-04,4.940388290882507735e-04,-5.443051354250300761e-02,-7.465699753139687887e-03 +-9.849086779323972252e-01,4.610244605194276785e-02,2.151469050519261988e-02,1.071305652334384497e-04,5.173495824415531678e-04,-5.568760645346442634e-02,-7.693646432895476575e-03 +-9.845437862775482385e-01,4.729989314008424539e-02,2.174714525298038215e-02,1.099880467966151174e-04,5.414220441299077979e-04,-5.695597030588122472e-02,-7.922377177156257957e-03 +-9.841745701679575076e-01,4.851606131756559803e-02,2.196472337209056519e-02,1.127949288337432542e-04,5.662685074919658569e-04,-5.823545529939327947e-02,-8.151540831312831595e-03 +-9.838010312253490097e-01,4.975110385305477750e-02,2.216654265844925087e-02,1.155426168826498740e-04,5.919011093116681530e-04,-5.952590993791671836e-02,-8.380769744921780229e-03 +-9.834231710904342005e-01,5.100517187947736092e-02,2.235169824127722013e-02,1.182222094333607168e-04,6.183318206326557549e-04,-6.082717928596018120e-02,-8.609675995928515665e-03 +-9.830409914229045754e-01,5.227841300488857146e-02,2.251926102550367084e-02,1.208245008938678129e-04,6.455724365666243953e-04,-6.213910644418603468e-02,-8.837853491708298759e-03 +-9.826544939014246749e-01,5.357097112321640431e-02,2.266827797920013474e-02,1.233399823330610110e-04,6.736345679701807561e-04,-6.346153170581454728e-02,-9.064876641220818987e-03 +-9.822636802236245357e-01,5.488298595753000192e-02,2.279777159093675579e-02,1.257588447404162731e-04,7.025296313918743532e-04,-6.479429214296811290e-02,-9.290300616339612627e-03 +-9.818685521060921406e-01,5.621459217333476394e-02,2.290674010330361401e-02,1.280709811550743266e-04,7.322688403269344507e-04,-6.613722192553256329e-02,-9.513660192893862513e-03 +-9.814691112843658694e-01,5.756591844294607629e-02,2.299415670311923121e-02,1.302659909872192737e-04,7.628631963049282766e-04,-6.749015265590524448e-02,-9.734470585340051010e-03 +-9.810653595129272819e-01,5.893708763011993451e-02,2.305896934837438003e-02,1.323331820344478672e-04,7.943234805587595518e-04,-6.885291152543214499e-02,-9.952227127481397956e-03 +-9.806572985651929031e-01,6.032821571966719837e-02,2.310010114531297482e-02,1.342615758514484694e-04,8.266602435678495949e-04,-7.022532174395104687e-02,-1.016640396411116511e-02 +-9.802449302335066728e-01,6.173940954802516201e-02,2.311644945552231503e-02,1.360399111565009059e-04,8.598837987603961302e-04,-7.160720513567277179e-02,-1.037645516906132541e-02 +-9.798282563291319525e-01,6.317076880550044626e-02,2.310688670647256804e-02,1.376566502272675619e-04,8.940042137109045512e-04,-7.299837664172550977e-02,-1.058181271715767820e-02 +-9.794072786822439758e-01,6.462238291528132894e-02,2.307025786634851749e-02,1.390999820643740539e-04,9.290313007638682135e-04,-7.439864834299082796e-02,-1.078189076143932189e-02 +-9.789819991419210776e-01,6.609433074679510467e-02,2.300538386224007872e-02,1.403578284306360462e-04,9.649746111547599994e-04,-7.580782808085947955e-02,-1.097607882034450436e-02 +-9.785524195761373667e-01,6.758668044408429332e-02,2.291105860743943706e-02,1.414178514805443214e-04,1.001843426220722652e-03,-7.722571777294325035e-02,-1.116374700465235133e-02 +-9.781185418717538438e-01,6.909948785427987017e-02,2.278604980627060503e-02,1.422674579722290537e-04,1.039646749931121949e-03,-7.865211426253873594e-02,-1.134424422166426319e-02 +-9.776803679345105191e-01,7.063279522373049069e-02,2.262909912830214643e-02,1.428938069179949352e-04,1.078393301541915542e-03,-8.008680957353520036e-02,-1.151689762512471057e-02 +-9.772378996890179748e-01,7.218662947421108922e-02,2.243892178545597166e-02,1.432838162182558425e-04,1.118091511428223866e-03,-8.152959185848712709e-02,-1.168101325158865667e-02 +-9.767911390787489267e-01,7.376100442763727782e-02,2.221420710795192141e-02,1.434241704879421378e-04,1.158749512482745627e-03,-8.298023884772429137e-02,-1.183587485432350574e-02 +-9.763400880660293435e-01,7.535591588071266500e-02,2.195361845333989453e-02,1.433013299690825775e-04,1.200375132691042452e-03,-8.443852460799314286e-02,-1.198074406168983357e-02 +-9.758847486320304521e-01,7.697134275014592963e-02,2.165579147911947042e-02,1.429015374656361441e-04,1.242975891235105360e-03,-8.590421482076562865e-02,-1.211486368173496884e-02 +-9.754251227767596344e-01,7.860724442235214604e-02,2.131933656916533068e-02,1.422108252305346319e-04,1.286558991382327505e-03,-8.737706907819360602e-02,-1.223745330009645173e-02 +-9.749612125190515455e-01,8.026356008697661804e-02,2.094283866834539126e-02,1.412150275362695519e-04,1.331131317740626933e-03,-8.885683935951853296e-02,-1.234770982344519756e-02 +-9.744930198965594537e-01,8.194020859099702958e-02,2.052485480721886676e-02,1.398997867524000142e-04,1.376699429113059647e-03,-9.034326743085663303e-02,-1.244481244396368827e-02 +-9.740205469657464699e-01,8.363708544974660097e-02,2.006391856491121506e-02,1.382505638006889688e-04,1.423269553703729144e-03,-9.183608747463499478e-02,-1.252791468911570311e-02 +-9.735437958018759996e-01,8.535406135183004617e-02,1.955853645746043895e-02,1.362526500485540298e-04,1.470847587160727978e-03,-9.333502581654296026e-02,-1.259615170986597490e-02 +-9.730627684990029724e-01,8.709098342949093963e-02,1.900718984893332367e-02,1.338911742286550452e-04,1.519439087165323169e-03,-9.483979536564042523e-02,-1.264863731700711924e-02 +-9.725774671699647378e-01,8.884766982161737547e-02,1.840833527620330812e-02,1.311511150833555024e-04,1.569049269300737837e-03,-9.635010252911657691e-02,-1.268446409984810667e-02 +-9.720878939463715174e-01,9.062391145199816256e-02,1.776040437881249573e-02,1.280173109423428282e-04,1.619683007442122239e-03,-9.786564049721009628e-02,-1.270270439819044805e-02 +-9.715940509785973012e-01,9.241946953170139534e-02,1.706180469186589777e-02,1.244744718967820006e-04,1.671344826423061015e-03,-9.938609044442801854e-02,-1.270240977384311937e-02 +-9.710959404357700775e-01,9.423407292959511128e-02,1.631091962626158437e-02,1.205071903818965168e-04,1.724038903383682099e-03,-1.009111228778968999e-01,-1.268261212864679049e-02 +-9.705935645057625072e-01,9.606741817877316458e-02,1.550610904613833156e-02,1.160999531229603589e-04,1.777769063923689483e-03,-1.024403939441605182e-01,-1.264232383075991790e-02 +-9.700869253951824867e-01,9.791916647452671152e-02,1.464571021992967272e-02,1.112371523865701109e-04,1.832538782195425710e-03,-1.039735472789300313e-01,-1.258053727363914014e-02 +-9.695760253293631559e-01,9.978894291392484406e-02,1.372803780852394247e-02,1.059030994693818743e-04,1.888351178721697299e-03,-1.055102115572893462e-01,-1.249622637917423458e-02 +-9.690608665523531284e-01,1.016763341846702029e-01,1.275138469074016748e-02,1.000820347174405058e-04,1.945209021508443291e-03,-1.070500008569096884e-01,-1.238834667506926585e-02 +-9.685414513269068326e-01,1.035808876867649331e-01,1.171402366388016661e-02,9.375814330469044004e-05,2.003114723392627767e-03,-1.085925122421780992e-01,-1.225583388132284873e-02 +-9.680177819344745194e-01,1.055021077390924655e-01,1.061420562705819165e-02,8.691556507846687847e-05,2.062070345550963326e-03,-1.101373287240085819e-01,-1.209760924704114844e-02 +-9.674898606751921593e-01,1.074394569767094521e-01,9.450164064849865542e-03,7.953840909600595527e-05,2.122077595894832601e-03,-1.116840123996863388e-01,-1.191257323561555580e-02 +-9.669576898678714505e-01,1.093923507613987905e-01,8.220112202099063084e-03,7.161076690846525437e-05,2.183137831290245766e-03,-1.132321106086291024e-01,-1.169961311879048373e-02 +-9.664212718499893828e-01,1.113601584163542851e-01,6.922246903151404313e-03,6.311672452135370264e-05,2.245252059883943389e-03,-1.147811492120719284e-01,-1.145759809827150318e-02 +-9.658806089776785786e-01,1.133421994798880367e-01,5.554747975847653188e-03,5.404037755389454509e-05,2.308420942323753146e-03,-1.163306351351548473e-01,-1.118538322278752294e-02 +-9.653357036257157464e-01,1.153377429027850343e-01,4.115780712907721878e-03,4.436584457489788519e-05,2.372644793935488241e-03,-1.178800533169546000e-01,-1.088180742060824470e-02 +-9.647865581875129104e-01,1.173460030966886752e-01,2.603495148811581922e-03,3.407728165404866160e-05,2.437923589624730377e-03,-1.194288694967443193e-01,-1.054569788222177451e-02 +-9.642331750751055308e-01,1.193661412778955766e-01,1.016028433705744695e-03,2.315889341053508756e-05,2.504256966306355522e-03,-1.209765230082980314e-01,-1.017586879653580717e-02 +-9.636755567191421790e-01,1.213972601632023890e-01,-6.484930119898796698e-04,1.159494939926679154e-05,2.571644225380283980e-03,-1.225224319815429452e-01,-9.771120702497049573e-03 +-9.631137055688746562e-01,1.234384032188551583e-01,-2.391953288269149051e-03,-6.302012641761244584e-07,2.640084340023286878e-03,-1.240659899554875240e-01,-9.330243698597247975e-03 +-9.625476240921463367e-01,1.254885529162892421e-01,-4.216245394119134439e-03,-1.353211724974295525e-05,2.709575959627177726e-03,-1.256065643081415995e-01,-8.852018221497079051e-03 +-9.619773147753815090e-01,1.275466291455691903e-01,-6.123268264388156620e-03,-2.712624898507656217e-05,2.780117412647261331e-03,-1.271434943523253036e-01,-8.335213531932865114e-03 +-9.614027801235747184e-01,1.296114846954688482e-01,-8.114927300118137884e-03,-4.142792459845931366e-05,2.851706715108802762e-03,-1.286760949004074339e-01,-7.778592980152662130e-03 +-9.608240226602797751e-01,1.316819072844345739e-01,-1.019313024760963582e-02,-5.645233620511518784e-05,2.924341574734434530e-03,-1.302036475228771084e-01,-7.180910785710034779e-03 +-9.602410449275984305e-01,1.337566118423960948e-01,-1.235978731884243217e-02,-7.221452386967757429e-05,2.998019398566345497e-03,-1.317254100715871679e-01,-6.540917011420724000e-03 +-9.596538494861691637e-01,1.358342445918990504e-01,-1.461680784249417414e-02,-8.872936363592705798e-05,3.072737302248501699e-03,-1.332406040731584362e-01,-5.857356279759609770e-03 +-9.590624389151560791e-01,1.379133762017422105e-01,-1.696609807323058369e-02,-1.060115497297408531e-04,3.148492112326703625e-03,-1.347484226146740127e-01,-5.128968914681326582e-03 +-9.584668158122380266e-01,1.399925024658392470e-01,-1.940956017564130637e-02,-1.240755816739413746e-04,3.225280379625655638e-03,-1.362480241327871378e-01,-4.354494545278702819e-03 +-9.578669827935962777e-01,1.420700411888447534e-01,-2.194908857886574902e-02,-1.429357493718884755e-04,3.303098381938352444e-03,-1.377385333387027011e-01,-3.532671044498428194e-03 +-9.572629424939035347e-01,1.441443299290512958e-01,-2.458656819817440706e-02,-1.626061176436361874e-04,3.381942137252448317e-03,-1.392190405703763179e-01,-2.662237239004729659e-03 +-9.566546975663126062e-01,1.462136248531504534e-01,-2.732387226593441590e-02,-1.831005127333256918e-04,3.461807409095463573e-03,-1.406885991062374608e-01,-1.741935162457770771e-03 +-9.560422506824440836e-01,1.482760973585003916e-01,-3.016285836622525224e-02,-2.044325074092497865e-04,3.542689719005846088e-03,-1.421462267231041465e-01,-7.705092215160078530e-04 +-9.554256045323753499e-01,1.503298333013140808e-01,-3.310536684876352193e-02,-2.266154049264973327e-04,3.624584355234732828e-03,-1.435909024390203681e-01,2.532898912403119861e-04 +-9.548047618246282564e-01,1.523728315677661260e-01,-3.615321740697659963e-02,-2.496622262441052062e-04,3.707486382777594198e-03,-1.450215645725705382e-01,1.330704436400347544e-03 +-9.541797252861573542e-01,1.544030008141429322e-01,-3.930820585141819173e-02,-2.735856935320307042e-04,3.791390650588334459e-03,-1.464371123333620683e-01,2.462968206118040446e-03 +-9.535504976623380147e-01,1.564181573971507588e-01,-4.257210197906368537e-02,-2.983982176544526868e-04,3.876291808179497308e-03,-1.478364052936991802e-01,3.651302749502055838e-03 +-9.529170817169542174e-01,1.584160263243420408e-01,-4.594664448616789243e-02,-3.241118827155768222e-04,3.962184311518181581e-03,-1.492182573252384681e-01,4.896918839533842722e-03 +-9.522794802321865593e-01,1.603942359823657682e-01,-4.943353953968033371e-02,-3.507384312457669222e-04,4.049062436107707212e-03,-1.505814423456380835e-01,6.201010775439897912e-03 +-9.516376960085998205e-01,1.623503195695589207e-01,-5.303445585531232909e-02,-3.782892519422740869e-04,4.136920287968386643e-03,-1.519246876541001656e-01,7.564756927693293449e-03 +-9.509917318651310847e-01,1.642817112965618276e-01,-5.675102112127287241e-02,-4.067753626519535594e-04,4.225751814925627660e-03,-1.532466772533131183e-01,8.989317450385022820e-03 +-9.503415906390768608e-01,1.661857463236760812e-01,-6.058481937143549850e-02,-4.362073984047558882e-04,4.315550819011346184e-03,-1.545460483586932332e-01,1.047582990235714634e-02 +-9.496872751860808703e-01,1.680596586908466306e-01,-6.453738546512671759e-02,-4.665955969201369502e-04,4.406310968318266803e-03,-1.558213918831817535e-01,1.202540998128639495e-02 +-9.490287883801215019e-01,1.699005804237720962e-01,-6.861020215050452065e-02,-4.979497842739065861e-04,4.498025808610971697e-03,-1.570712509469393559e-01,1.363914710107308470e-02 +-9.483661331134992656e-01,1.717055397999081578e-01,-7.280469514227209948e-02,-5.302793610274272830e-04,4.590688777166633885e-03,-1.582941212114150897e-01,1.531810338006424008e-02 +-9.476993122968236927e-01,1.734714615535648263e-01,-7.712222959955844503e-02,-5.635932889002794955e-04,4.684293213353115520e-03,-1.594884478463890476e-01,1.706330969268635839e-02 +-9.470283288590011228e-01,1.751951643408979631e-01,-8.156410489632369420e-02,-5.979000771424593632e-04,4.778832372028940859e-03,-1.606526279235608590e-01,1.887576461287778298e-02 +-9.463531857472214925e-01,1.768733615431523076e-01,-8.613155106993695964e-02,-6.332077701327876174e-04,4.874299435454646640e-03,-1.617850068602053326e-01,2.075642990380140163e-02 +-9.456738859269452346e-01,1.785026590285739845e-01,-9.082572241842234995e-02,-6.695239333718990774e-04,4.970687526335774986e-03,-1.628838808957835671e-01,2.270623104978902740e-02 +-9.449904323818909546e-01,1.800795559238147769e-01,-9.564769444282092414e-02,-7.068556397992764324e-04,5.067989720775119451e-03,-1.639474942775719923e-01,2.472605121640881337e-02 +-9.443028281140214419e-01,1.816004433158170950e-01,-1.005984574000055171e-01,-7.452094572049589374e-04,5.166199061390032606e-03,-1.649740407005163556e-01,2.681673127113141236e-02 +-9.436110761435310135e-01,1.830616055690400534e-01,-1.056789130145340122e-01,-7.845914356259237445e-04,5.265308568672509050e-03,-1.659616602392730011e-01,2.897906359207263333e-02 +-9.429151795088319687e-01,1.844592186917951016e-01,-1.108898668635011492e-01,-8.250070949270178495e-04,5.365311253056664377e-03,-1.669084422319580818e-01,3.121379370803492592e-02 +-9.422151412665417114e-01,1.857893512129650260e-01,-1.162320258442573473e-01,-8.664614107825388545e-04,5.466200128416539607e-03,-1.678124238985028160e-01,3.352161213444132537e-02 +-9.415109644914688714e-01,1.870479639106026049e-01,-1.217059905163423300e-01,-9.089588059684429932e-04,5.567968226324858556e-03,-1.686715915767269536e-01,3.590315555558232230e-02 +-9.408026522765996491e-01,1.882309126808823785e-01,-1.273122501573889231e-01,-9.525031333841775614e-04,5.670608605855296212e-03,-1.694838765585883755e-01,3.835900266383377583e-02 +-9.400902077330849371e-01,1.893339462526960060e-01,-1.330511779679014184e-01,-9.970976668310861830e-04,5.774114365283190310e-03,-1.702471611030131893e-01,4.088966947683841252e-02 +-9.393736339902262200e-01,1.903527097984835936e-01,-1.389230248969411385e-01,-1.042745089333524389e-03,5.878478655947371422e-03,-1.709592739290833885e-01,4.349560701044936223e-02 +-9.386529341954613637e-01,1.912827444572770541e-01,-1.449279130617271982e-01,-1.089447481479418082e-03,5.983694691510803819e-03,-1.716179939337661164e-01,4.617719949682961428e-02 +-9.379281115143518477e-01,1.921194899207331053e-01,-1.510658312509766399e-01,-1.137206308974161558e-03,6.089755761933746646e-03,-1.722210487502347787e-01,4.893475849465249439e-02 +-9.371991691305681105e-01,1.928582857000026574e-01,-1.573366271452593113e-01,-1.186022412844509781e-03,6.196655242354378451e-03,-1.727661163873714101e-01,5.176852297057897567e-02 +-9.364661102458753383e-01,1.934943733890863715e-01,-1.637400029042138105e-01,-1.235895994645939923e-03,6.304386605997050648e-03,-1.732508256325145102e-01,5.467865290171525922e-02 +-9.357289380801203649e-01,1.940228992309550038e-01,-1.702755088078475587e-01,-1.286826610672445583e-03,6.412943432248294391e-03,-1.736727565416646413e-01,5.766522637975392968e-02 +-9.349876558712167940e-01,1.944389157010743552e-01,-1.769425362060048357e-01,-1.338813157959254079e-03,6.522319419144288234e-03,-1.740294434508982413e-01,6.072823788657272237e-02 +-9.342422668751307890e-01,1.947373860032365067e-01,-1.837403119854928712e-01,-1.391853865346994986e-03,6.632508392169446723e-03,-1.743183732291485877e-01,6.386759362061553147e-02 +-9.334927743658669730e-01,1.949131858030098696e-01,-1.906678923010986237e-01,-1.445946281242605630e-03,6.743504313143705402e-03,-1.745369894351761764e-01,6.708310812414666757e-02 +-9.327391816354543286e-01,1.949611078703568923e-01,-1.977241563377720168e-01,-1.501087266725395367e-03,6.855301290103832726e-03,-1.746826917669493939e-01,7.037450079803946046e-02 +-9.319814919939308773e-01,1.948758654746728936e-01,-2.049077994618526966e-01,-1.557272981786356810e-03,6.967893583633121175e-03,-1.747528386191859739e-01,7.374139353524562790e-02 +-9.312197087693302455e-01,1.946520961984959386e-01,-2.122173285303016188e-01,-1.614498880207301161e-03,7.081275616292521079e-03,-1.747447496459680061e-01,7.718330430991547642e-02 +-9.304538353076660107e-01,1.942843666748714504e-01,-2.196510527020491543e-01,-1.672759696185111304e-03,7.195441980170671527e-03,-1.746557074020257116e-01,8.069964923218483910e-02 +-9.296838749729178231e-01,1.937671778090696850e-01,-2.272070807178978857e-01,-1.732049436041577239e-03,7.310387444373840464e-03,-1.744829589061654695e-01,8.428973253873639493e-02 +-9.289098311470160851e-01,1.930949701565493892e-01,-2.348833122443878818e-01,-1.792361371159763642e-03,7.426106958611002236e-03,-1.742237177606281162e-01,8.795274781112018270e-02 +-9.281317072298272963e-01,1.922621285872382624e-01,-2.426774324638932279e-01,-1.853688027347430130e-03,7.542595660317084026e-03,-1.738751684752829763e-01,9.168777324070617740e-02 +-9.273495066391393982e-01,1.912629890920836129e-01,-2.505869061969957512e-01,-1.916021175976444311e-03,7.659848878755373507e-03,-1.734344676582890987e-01,9.549376794058297979e-02 +-9.265632328106458981e-01,1.900918441714042895e-01,-2.586089724498749076e-01,-1.979351828644914692e-03,7.777862141109427065e-03,-1.728987487644521870e-01,9.936956766932451834e-02 +-9.257728891979321029e-01,1.887429513468196207e-01,-2.667406370241863778e-01,-2.043670227606438940e-03,7.896631173554602423e-03,-1.722651218995861211e-01,1.033138844366232323e-01 +-9.249784792724585758e-01,1.872105374296359204e-01,-2.749786679675104706e-01,-2.108965837971860276e-03,8.016151905378252410e-03,-1.715306825065588126e-01,1.073253010736729823e-01 +-9.241800065235469264e-01,1.854888088482680997e-01,-2.833195889654090416e-01,-2.175227338539278705e-03,8.136420472657520872e-03,-1.706925096269123199e-01,1.114022699975212305e-01 +-9.233774744583637117e-01,1.835719581371324793e-01,-2.917596758792107559e-01,-2.242442617016243093e-03,8.257433215827923029e-03,-1.697476723050866110e-01,1.155431064566103533e-01 +-9.225708866019060039e-01,1.814541722668228663e-01,-3.002949491858878384e-01,-2.310598763721740442e-03,8.379186681834476305e-03,-1.686932334931813271e-01,1.197459898862039546e-01 +-9.217602464969846254e-01,1.791296412936855265e-01,-3.089211704775539635e-01,-2.379682064163948178e-03,8.501677623943609471e-03,-1.675262543152449823e-01,1.240089581262052154e-01 +-9.209455577042096053e-01,1.765925679090185607e-01,-3.176338363761705974e-01,-2.449677993139766537e-03,8.624902999731324030e-03,-1.662437976067385192e-01,1.283299070094761074e-01 +-9.201268238019744139e-01,1.738371755370201888e-01,-3.264281749652989673e-01,-2.520571208504236108e-03,8.748859970855744211e-03,-1.648429351382936503e-01,1.327065858056715175e-01 +-9.193040483864395318e-01,1.708577205937095445e-01,-3.352991399905030923e-01,-2.592345544274002409e-03,8.873545899565767406e-03,-1.633207479998534384e-01,1.371365974762065176e-01 +-9.184772350715175726e-01,1.676485006205602202e-01,-3.442414099566178942e-01,-2.664984007298136070e-03,8.998958342382273184e-03,-1.616743356785107777e-01,1.416173904286272822e-01 +-9.176463874888565186e-01,1.642038659294889069e-01,-3.532493809575694144e-01,-2.738468777060978909e-03,9.125095047914423899e-03,-1.599008195053389370e-01,1.461462627582885421e-01 +-9.168115092878250660e-01,1.605182302367740843e-01,-3.623171640406794292e-01,-2.812781193956283635e-03,9.251953950107954378e-03,-1.579973489420164190e-01,1.507203587795932220e-01 +-9.159726041354947501e-01,1.565860828660111947e-01,-3.714385833209493670e-01,-2.887901759035163291e-03,9.379533161149825049e-03,-1.559611058581482634e-01,1.553366649998065285e-01 +-9.151296757166257345e-01,1.524019989188800661e-01,-3.806071722727104034e-01,-2.963810129396427483e-03,9.507830964675893087e-03,-1.537893135370998199e-01,1.599920104168069401e-01 +-9.142827277336491587e-01,1.479606539122732267e-01,-3.898161720010296372e-01,-3.040485116968172450e-03,9.636845807242604145e-03,-1.514792382081188715e-01,1.646830640700649162e-01 +-9.134317639066518169e-01,1.432568332536452438e-01,-3.990585287597890884e-01,-3.117904683776362689e-03,9.766576287816774657e-03,-1.490282011370191173e-01,1.694063350284857605e-01 +-9.125767879733592824e-01,1.382854487689762768e-01,-4.083268938626608002e-01,-3.196045943552851525e-03,9.897021149975987128e-03,-1.464335783493253829e-01,1.741581689759427176e-01 +-9.117178036891198101e-01,1.330415490573529369e-01,-4.176136201353647937e-01,-3.274885157035880776e-03,1.002817926321624385e-02,-1.436928127575598790e-01,1.789347524262900058e-01 +-9.108548148268875710e-01,1.275203337879254606e-01,-4.269107647871199784e-01,-3.354397731561421531e-03,1.016004961907617833e-02,-1.408034196947035299e-01,1.837321061072461348e-01 +-9.099878251772063331e-01,1.217171686332258612e-01,-4.362100871225241816e-01,-3.434558223170946015e-03,1.029263131066709969e-02,-1.377629920183931767e-01,1.885460892838678459e-01 +-9.091168385481928071e-01,1.156275975006962609e-01,-4.455030507126322425e-01,-3.515340336791752517e-03,1.042592352380280855e-02,-1.345692110068386349e-01,1.933723970466771913e-01 +-9.082418587655194386e-01,1.092473596647220158e-01,-4.547808217789031660e-01,-3.596716924228671695e-03,1.055992551679786617e-02,-1.312198487503824229e-01,1.982065660863299994e-01 +-9.073628896723984205e-01,1.025724014467600947e-01,-4.640342758072383833e-01,-3.678659985727711617e-03,1.069463660674466188e-02,-1.277127814037881393e-01,2.030439664823544543e-01 +-9.064799351295638186e-01,9.559889455895401178e-02,-4.732539953439169889e-01,-3.761140677543506165e-03,1.083005614984260644e-02,-1.240459905334104912e-01,2.078798115299818616e-01 +-9.055929990152560283e-01,8.832324791479873038e-02,-4.824302764749165973e-01,-3.844129308111431716e-03,1.096618352199770635e-02,-1.202175772490920308e-01,2.127091528477396132e-01 +-9.047020852252026790e-01,8.074212506276119694e-02,-4.915531302984154083e-01,-3.927595349442463551e-03,1.110301810319961648e-02,-1.162257662708316569e-01,2.175268864920962830e-01 +-9.038071976726038681e-01,7.285246006511471306e-02,-5.006122881643578770e-01,-4.011507431579057308e-03,1.124055925222976031e-02,-1.120689133269130722e-01,2.223277536833408174e-01 +-9.029083402881132869e-01,6.465147035582076429e-02,-5.095972086054116890e-01,-4.095833357886129698e-03,1.137880628738096624e-02,-1.077455185668855647e-01,2.271063400887987638e-01 +-9.020055170198213457e-01,5.613667537047584144e-02,-5.184970802698641856e-01,-4.180540106038459214e-03,1.151775847068124692e-02,-1.032542294325582144e-01,2.318570843829403649e-01 +-9.010987318332384088e-01,4.730591396793326631e-02,-5.273008308016615686e-01,-4.265593836603725347e-03,1.165741497108667771e-02,-9.859384600140155830e-02,2.365742774874354526e-01 +-9.001879887112764766e-01,3.815735397259163025e-02,-5.359971336118987129e-01,-4.350959898791680568e-03,1.179777484774401260e-02,-9.376334123225094075e-02,2.412520676431678113e-01 +-8.992732916542326427e-01,2.868951536371498398e-02,-5.445744168294223142e-01,-4.436602844584945811e-03,1.193883702713077061e-02,-8.876185566929647996e-02,2.458844633198631335e-01 +-8.983546446797705531e-01,1.890128088682855451e-02,-5.530208705700540017e-01,-4.522486432295488509e-03,1.208060026972740050e-02,-8.358871568979382716e-02,2.504653412441356641e-01 +-8.974320518229038646e-01,8.791914373197384086e-03,-5.613244593559856277e-01,-4.608573640799310672e-03,1.222306315339839224e-02,-7.824343716905968193e-02,2.549884467756986517e-01 +-8.965055171359771480e-01,-1.638922798471393726e-03,-5.694729313455330066e-01,-4.694826679289238608e-03,1.236622403543406572e-02,-7.272573240440764752e-02,2.594474023047920186e-01 +-8.955750446886495686e-01,-1.239115928307870859e-02,-5.774538314922215143e-01,-4.781207006085214789e-03,1.251008103114650480e-02,-6.703552429678778790e-02,2.638357103543755300e-01 +-8.946406385678755679e-01,-2.346429431701339136e-02,-5.852545107396379676e-01,-4.867675334745366102e-03,1.265463198234218975e-02,-6.117294692091271996e-02,2.681467662155948006e-01 +-8.937023028778884326e-01,-3.485738642752045369e-02,-5.928621445970475579e-01,-4.954191653630683582e-03,1.279987442756869848e-02,-5.513836086838582590e-02,2.723738551182620093e-01 +-8.927600417401808652e-01,-4.656903463223596024e-02,-6.002637425549116612e-01,-5.040715242599817271e-03,1.294580557321806796e-02,-4.893235387096749739e-02,2.765101687196920288e-01 +-8.918138592934876652e-01,-5.859736750749862388e-02,-6.074461665802743937e-01,-5.127204688296340719e-03,1.309242225745382154e-02,-4.255575542659431609e-02,2.805488067257361218e-01 +-8.908637596937675207e-01,-7.094002796125911670e-02,-6.143961451359509773e-01,-5.213617902737309635e-03,1.323972093047570166e-02,-3.600964250471282851e-02,2.844827890710450724e-01 +-8.899097471141842464e-01,-8.359415639588563263e-02,-6.211002925570126010e-01,-5.299912145298656292e-03,1.338769761313280543e-02,-2.929534124747842222e-02,2.883050602240953553e-01 +-8.889518257450892413e-01,-9.655638132008446950e-02,-6.275451244309839804e-01,-5.386044043041715579e-03,1.353634786485697315e-02,-2.241444153137139156e-02,2.920085031530553765e-01 +-8.879899997940023937e-01,-1.098228043352850319e-01,-6.337170784731525064e-01,-5.471969614326008605e-03,1.368566675242535897e-02,-1.536879975052591642e-02,2.955859451475560951e-01 +-8.870242734855938727e-01,-1.233889901429867098e-01,-6.396025314826730979e-01,-5.557644286887658200e-03,1.383564881938684904e-02,-8.160549657936298773e-03,2.990301731341628022e-01 +-8.860546510616654770e-01,-1.372499519472402119e-01,-6.451878257928546079e-01,-5.643022930527581346e-03,1.398628805435148809e-02,-7.921030880959171192e-04,3.023339333143670871e-01 +-8.850811367811323160e-01,-1.514001420853510471e-01,-6.504592825677335544e-01,-5.728059883390757868e-03,1.413757785208917886e-02,6.733841131703820551e-03,3.054899575620711460e-01 +-8.841037349200038253e-01,-1.658334419376389091e-01,-6.554032309613219143e-01,-5.812708971058678543e-03,1.428951098302943569e-02,1.441429136712410984e-02,3.084909621400429014e-01 +-8.831224497713646704e-01,-1.805431526797863506e-01,-6.600060282370917575e-01,-5.896923546854922552e-03,1.444207955980715552e-02,2.224595766003291050e-02,3.113296653855469054e-01 +-8.821372856453569833e-01,-1.955219863087063747e-01,-6.642540844697107927e-01,-5.980656517425071532e-03,1.459527500616319452e-02,3.022524770899175442e-02,3.139987987741189035e-01 +-8.811482468691598235e-01,-2.107620576254061462e-01,-6.681338859282089926e-01,-6.063860374913013739e-03,1.474908802083750928e-02,3.834826319844745340e-02,3.164911223684933606e-01 +-8.801553377869717476e-01,-2.262548784175912875e-01,-6.716320235718300502e-01,-6.146487229605496899e-03,1.490350854538802748e-02,4.661079427930787250e-02,3.187994325288991959e-01 +-8.791585627599906028e-01,-2.419913517407753234e-01,-6.747352159926299775e-01,-6.228488848475890148e-03,1.505852573286066594e-02,5.500831627298097964e-02,3.209165818103280610e-01 +-8.781579261663950975e-01,-2.579617667170737705e-01,-6.774303398068931559e-01,-6.309816687996699751e-03,1.521412791737892199e-02,6.353598794407708250e-02,3.228354865866152057e-01 +-8.771534324013248174e-01,-2.741557940252501435e-01,-6.797044537480029236e-01,-6.390421937803749015e-03,1.537030257618527025e-02,7.218865114614517187e-02,3.245491480880584034e-01 +-8.761450858768620176e-01,-2.905624871989522240e-01,-6.815448299037527580e-01,-6.470255551299648571e-03,1.552703630197150610e-02,8.096082237389171099e-02,3.260506615489636717e-01 +-8.751328910220110835e-01,-3.071702776351515896e-01,-6.829389817778329652e-01,-6.549268299262913512e-03,1.568431477652696901e-02,8.984669903103077726e-02,3.273332327092682847e-01 +-8.741168522826797682e-01,-3.239669767783736054e-01,-6.838746911749289392e-01,-6.627410800561461986e-03,1.584212273322579967e-02,9.884015528790891791e-02,3.283901978063777927e-01 +-8.730969741216594304e-01,-3.409397779889623559e-01,-6.843400420379195381e-01,-6.704633569286138836e-03,1.600044393634734302e-02,1.079347416949416327e-01,3.292150316925539499e-01 +-8.720732610186056055e-01,-3.580752571451557564e-01,-6.843234491240344219e-01,-6.780887064700561080e-03,1.615926114677194297e-02,1.171236904480671137e-01,3.298013666583395898e-01 +-8.710457174700181326e-01,-3.753593797692912348e-01,-6.838136870282343072e-01,-6.856121736787647997e-03,1.631855609665647938e-02,1.263999118098000918e-01,3.301430114933339155e-01 +-8.700143479892213927e-01,-3.927775048878344166e-01,-6.827999227576524444e-01,-6.930288067788545336e-03,1.647830946365921520e-02,1.357560001473826772e-01,3.302339646281327590e-01 +-8.689791571063447684e-01,-4.103143946017844268e-01,-6.812717474978408072e-01,-7.003336629521221372e-03,1.663850085012998081e-02,1.451842328565029761e-01,3.300684293740484554e-01 +-8.679401493683025492e-01,-4.279542190663344070e-01,-6.792192062554747167e-01,-7.075218130797322931e-03,1.679910875839571785e-02,1.546765815779193587e-01,3.296408335613300045e-01 +-8.668973293387736145e-01,-4.456805702939587999e-01,-6.766328303462065819e-01,-7.145883470622175354e-03,1.696011056364686923e-02,1.642247106414666258e-01,3.289458440863133881e-01 +-8.658507015981822263e-01,-4.634764727832346720e-01,-6.735036688434207219e-01,-7.215283790217001496e-03,1.712148249966935834e-02,1.738199853117377292e-01,3.279783834693743017e-01 +-8.648002707436770464e-01,-4.813243976409127267e-01,-6.698233195933296358e-01,-7.283370531979805641e-03,1.728319963682318355e-02,1.834534774044441208e-01,3.267336470999037124e-01 +-8.637460413891117073e-01,-4.992062781411992223e-01,-6.655839593188326297e-01,-7.350095486495815920e-03,1.744523587002529477e-02,1.931159721901186677e-01,3.252071218679576936e-01 +-8.626880181650236068e-01,-5.171035250336512012e-01,-6.607783784846456010e-01,-7.415410856873969232e-03,1.760756390017588144e-02,2.027979798041064374e-01,3.233945953438847054e-01 +-8.616262057186147016e-01,-5.349970462044365549e-01,-6.554000076785487794e-01,-7.479269318127655490e-03,1.777015521628060382e-02,2.124897425717015842e-01,3.212921801315878345e-01 +-8.605606087137299687e-01,-5.528672695233899370e-01,-6.494429498905217590e-01,-7.541624071548763686e-03,1.793298008960327733e-02,2.221812404703638055e-01,3.188963262142982535e-01 +-8.594912318308379762e-01,-5.706941618588499132e-01,-6.429020095224730058e-01,-7.602428903864518438e-03,1.809600757052858694e-02,2.318622079092272081e-01,3.162038382606248699e-01 +-8.584180797670095675e-01,-5.884572532911368503e-01,-6.357727237135454290e-01,-7.661638251377497939e-03,1.825920547278998829e-02,2.415221449032168566e-01,3.132118871933267656e-01 +-8.573411572358974331e-01,-6.061356638991538537e-01,-6.280513871506836487e-01,-7.719207261620519150e-03,1.842254037230403596e-02,2.511503275631696841e-01,3.099180323470460685e-01 +-8.562604689677154601e-01,-6.237081301048829074e-01,-6.197350829172788567e-01,-7.775091848043348686e-03,1.858597760219883252e-02,2.607358235413216319e-01,3.063202304439084922e-01 +-8.551760197092181937e-01,-6.411530349451791677e-01,-6.108217097838628540e-01,-7.829248766065666221e-03,1.874948125662488294e-02,2.702675042127267169e-01,3.024168491419343963e-01 +-8.540878142236794091e-01,-6.584484370279904963e-01,-6.013100029861132567e-01,-7.881635667143531074e-03,1.891301419061047762e-02,2.797340633826466694e-01,2.982066904557959530e-01 +-8.529958572908716841e-01,-6.755721055654412277e-01,-5.911995646472667509e-01,-7.932211156335028032e-03,1.907653802860077505e-02,2.891240286052846886e-01,2.936889935129112228e-01 +-8.519001537070450825e-01,-6.925015511963173998e-01,-5.804908858858679466e-01,-7.980934869071020302e-03,1.924001317339306058e-02,2.984257843348062100e-01,2.888634500744834410e-01 +-8.508007082849067260e-01,-7.092140632898102925e-01,-5.691853674684226316e-01,-8.027767530532985488e-03,1.940339881065949665e-02,3.076275867792879759e-01,2.837302197156937322e-01 +-8.496975258535985898e-01,-7.256867474396775375e-01,-5.572853410876268399e-01,-8.072671017526462933e-03,1.956665292557853431e-02,3.167175821555582305e-01,2.782899405198006604e-01 +-8.485906112586771854e-01,-7.418965642965752449e-01,-5.447940905733493766e-01,-8.115608421428914185e-03,1.972973232153786560e-02,3.256838260814969299e-01,2.725437363427984927e-01 +-8.474799693620920227e-01,-7.578203680497427319e-01,-5.317158677452513693e-01,-8.156544120339951492e-03,1.989259263819209353e-02,3.345143071446772809e-01,2.664932303290475923e-01 +-8.463656050421637378e-01,-7.734349499869201949e-01,-5.180559068545700363e-01,-8.195443825078824879e-03,2.005518837043798741e-02,3.431969643264827230e-01,2.601405570386526866e-01 +-8.452475231935638877e-01,-7.887170807361874036e-01,-5.038204465667958099e-01,-8.232274657410000931e-03,2.021747290000576458e-02,3.517197101166243955e-01,2.534883561685913334e-01 +-8.441257287272921905e-01,-8.036435534911906542e-01,-4.890167323085324314e-01,-8.267005215490761533e-03,2.037939851826127066e-02,3.600704548301454921e-01,2.465397985953597659e-01 +-8.430002265706553199e-01,-8.181912311569096152e-01,-4.736530318084679059e-01,-8.299605616392103855e-03,2.054091646077817190e-02,3.682371264310440040e-01,2.392985829602000403e-01 +-8.418710216672458113e-01,-8.323370899123723188e-01,-4.577386445005093840e-01,-8.330047567982598600e-03,2.070197693775357659e-02,3.762076997283484991e-01,2.317689388283442342e-01 +-8.407381189769198571e-01,-8.460582711088474595e-01,-4.412839070369448113e-01,-8.358304433580973636e-03,2.086252917146996094e-02,3.839702123779023535e-01,2.239556319996521272e-01 +-8.396015234757753243e-01,-8.593321259608118767e-01,-4.243001933726903707e-01,-8.384351284966813991e-03,2.102252144706586584e-02,3.915127965995208181e-01,2.158639746569905038e-01 +-8.384612401561303274e-01,-8.721362643129949577e-01,-4.067999215176897887e-01,-8.408164950586719671e-03,2.118190113970208621e-02,3.988237052037980068e-01,2.074998174809089546e-01 +-8.373172740265014680e-01,-8.844486095692961625e-01,-3.887965513012069385e-01,-8.429724087205634625e-03,2.134061477409905302e-02,4.058913277554735410e-01,1.988695529082594171e-01 +-8.361696301115808527e-01,-8.962474409583441881e-01,-3.703045789083202477e-01,-8.449009222008313155e-03,2.149860806943872105e-02,4.127042318935673637e-01,1.899801185676395776e-01 +-8.350183134522153328e-01,-9.075114523919968201e-01,-3.513395348492067138e-01,-8.466002810085755670e-03,2.165582598278698381e-02,4.192511746625624536e-01,1.808389882496316547e-01 +-8.338633291053832997e-01,-9.182198001729144909e-01,-3.319179721342602196e-01,-8.480689283680733007e-03,2.181221278425300378e-02,4.255211355712635313e-01,1.714541750733536429e-01 +-8.327046821441730362e-01,-9.283521529771564529e-01,-3.120574573284637943e-01,-8.493055096281421426e-03,2.196771210061302521e-02,4.315033458898989149e-01,1.618342229622768136e-01 +-8.315423776577602899e-01,-9.378887458045690462e-01,-2.917765570622485871e-01,-8.503088770798737997e-03,2.212226698186292931e-02,4.371873106847850021e-01,1.519882002383713127e-01 +-8.303764207513859574e-01,-9.468104294023325096e-01,-2.710948213769805992e-01,-8.510780946613821091e-03,2.227581995911694090e-02,4.425628391799191763e-01,1.419256926593806534e-01 +-8.292068165463334362e-01,-9.550987244607508364e-01,-2.500327629345306391e-01,-8.516124413647198008e-03,2.242831311954535806e-02,4.476200656622482676e-01,1.316567976451799571e-01 +-8.280335701799065307e-01,-9.627358678949333726e-01,-2.286118383274572119e-01,-8.519114150759093951e-03,2.257968817081184784e-02,4.523494843598187543e-01,1.211921079445384547e-01 +-8.268566868054064711e-01,-9.697048682307862766e-01,-2.068544211252372200e-01,-8.519747364767663381e-03,2.272988651452391776e-02,4.567419660191611719e-01,1.105427041406049499e-01 +-8.256761715921098199e-01,-9.759895499087677395e-01,-1.847837756612068782e-01,-8.518023518576155542e-03,2.287884932314212388e-02,4.607887936588033195e-01,9.972013909576143975e-02 +-8.244920297252450458e-01,-9.815746057227390242e-01,-1.624240263900425807e-01,-8.513944363895338194e-03,2.302651760726632732e-02,4.644816810890148306e-01,8.873642403592450767e-02 +-8.233042664059700977e-01,-9.864456435883066643e-01,-1.398001246803742548e-01,-8.507513962593388213e-03,2.317283230432328964e-02,4.678127997002138327e-01,7.760401280516163181e-02 +-8.221128868513501997e-01,-9.905892326477692134e-01,-1.169378136400601176e-01,-8.498738711041470967e-03,2.331773435528809724e-02,4.707748037454725920e-01,6.633578322025734941e-02 +-8.209178962943334268e-01,-9.939929481189155869e-01,-9.386358851680356996e-02,-8.487627353013938586e-03,2.346116478839956401e-02,4.733608548400110028e-01,5.494502020491219552e-02 +-8.197192999837294991e-01,-9.966454157437840689e-01,-7.060465896637618810e-02,-8.474191000052895897e-03,2.360306479862665091e-02,4.755646436833757540e-01,4.344538900138882748e-02 +-8.185171031841850242e-01,-9.985363545287795350e-01,-4.718890076770562020e-02,-8.458443144696422764e-03,2.374337584134108420e-02,4.773804110686625246e-01,3.185092186763630040e-02 +-8.173113111761618477e-01,-9.996566147687609982e-01,-2.364481208040133606e-02,-8.440399655697332706e-03,2.388203971497237024e-02,4.788029734310821350e-01,2.017599009895832199e-02 +-8.161019292559127392e-01,-9.999982193062991742e-01,-1.466039290599369866e-06,-8.420078792841236351e-03,2.401899865184375579e-02,4.798277376438284669e-01,8.435276927030565391e-03 +-8.148889627354590770e-01,-9.995543962513298020e-01,2.371154456375888092e-02,-8.397501200793602155e-03,2.415419540930800746e-02,4.804507267605278020e-01,-3.356240544277015302e-03 +-8.136724169425666453e-01,-9.983196150547294367e-01,4.746415724904606837e-02,-8.372689896163805490e-03,2.428757335147952584e-02,4.806685940321133166e-01,-1.518332501564592998e-02 +-8.124522972207226523e-01,-9.962896188071073134e-01,7.122589471773602132e-02,-8.345670271890976225e-03,2.441907655196347629e-02,4.804786382005866163e-01,-2.703050882584450906e-02 +-8.112286089291126379e-01,-9.934614494060887147e-01,9.496592788279527297e-02,-8.316470076905021699e-03,2.454864987898703471e-02,4.798788260095348535e-01,-3.888211356019877218e-02 +-8.100013574425958263e-01,-9.898334778659937383e-01,1.186531310210999363e-01,-8.285119387250773626e-03,2.467623908583995579e-02,4.788677985748612809e-01,-5.072228922318531424e-02 +-8.087705481516828110e-01,-9.854054256514822585e-01,1.422561435594618950e-01,-8.251650597306810914e-03,2.480179091197395880e-02,4.774448877582532225e-01,-6.253504545243683543e-02 +-8.075361864625111297e-01,-9.801783833987534056e-01,1.657434361665048872e-01,-8.216098387902692410e-03,2.492525316883444780e-02,4.756101302960473176e-01,-7.430427880522021133e-02 +-8.062982777968216164e-01,-9.741548293789535906e-01,1.890833743452035309e-01,-8.178499689492493147e-03,2.504657482338549027e-02,4.733642749693253715e-01,-8.601380852102358254e-02 +-8.050568275919344208e-01,-9.673386471025832156e-01,2.122442840384957952e-01,-8.138893647267815210e-03,2.516570610375541001e-02,4.707087836612453557e-01,-9.764741192149845039e-02 +-8.038118413007259155e-01,-9.597351278046067913e-01,2.351945206786512643e-01,-8.097321580595971680e-03,2.528259858850418276e-02,4.676458527770980811e-01,-1.091888563668418077e-01 +-8.025633243916036053e-01,-9.513509834312565694e-01,2.579025364651632013e-01,-8.053826933576292701e-03,2.539720527671632602e-02,4.641784067462442764e-01,-1.206219371289684317e-01 +-8.013112823484830338e-01,-9.421943504027250338e-01,2.803369496760872437e-01,-8.008455233013614741e-03,2.550948069762547579e-02,4.603101003727200213e-01,-1.319305136106802834e-01 +-8.000557206707632485e-01,-9.322747849223692596e-01,3.024666178508710335e-01,-7.961254028209775735e-03,2.561938098477681627e-02,4.560453283938359359e-01,-1.430985400098599425e-01 +-7.987966448733025970e-01,-9.216032638714933833e-01,3.242607060362152072e-01,-7.912272822614571655e-03,2.572686395868314588e-02,4.513892157527723548e-01,-1.541101064744835325e-01 +-7.975340604863950800e-01,-9.101921758420347119e-01,3.456887550058900804e-01,-7.861563027153692720e-03,2.583188921692561660e-02,4.463476174058076396e-01,-1.649494810549443358e-01 +-7.962679730557449265e-01,-8.980553080845486980e-01,3.667207608159198662e-01,-7.809177885438944866e-03,2.593441821039297809e-02,4.409271167140625391e-01,-1.756011306523950211e-01 +-7.949983881424437238e-01,-8.852078336014195248e-01,3.873272365397030237e-01,-7.755172388405916217e-03,2.603441431547148799e-02,4.351350144010134868e-01,-1.860497751194815919e-01 +-7.937253113229446599e-01,-8.716662917786611731e-01,4.074792878552943587e-01,-7.699603225296869878e-03,2.613184291855217417e-02,4.289793204050404429e-01,-1.962804146074282330e-01 +-7.924487481890386542e-01,-8.574485634725578231e-01,4.271486850555861192e-01,-7.642528671438883654e-03,2.622667147867107501e-02,4.224687468810291446e-01,-2.062783623108120912e-01 +-7.911687043478299319e-01,-8.425738475618385070e-01,4.463079239806131415e-01,-7.584008527499387642e-03,2.631886959933626285e-02,4.156126892098043757e-01,-2.160292961387449417e-01 +-7.898851854217111557e-01,-8.270626274063678229e-01,4.649303052593858343e-01,-7.524104025708921312e-03,2.640840909494108005e-02,4.084212167250323433e-01,-2.255192736717469282e-01 +-7.885981970483383341e-01,-8.109366374922751808e-01,4.829899924492839225e-01,-7.462877728020170763e-03,2.649526404601267732e-02,4.009050539513179245e-01,-2.347347837593070641e-01 +-7.873077448806070633e-01,-7.942188264660438035e-01,5.004620811190607643e-01,-7.400393449800464138e-03,2.657941085902535208e-02,3.930755595763983923e-01,-2.436627741297733019e-01 +-7.860138345866267695e-01,-7.769333145544807806e-01,5.173226650920056402e-01,-7.336716146316401774e-03,2.666082832735822833e-02,3.849447070490913947e-01,-2.522906803153059929e-01 +-7.847164718496960623e-01,-7.591053455196633548e-01,5.335488928309204404e-01,-7.271911812255111955e-03,2.673949767066938210e-02,3.765250666930395340e-01,-2.606064685145582649e-01 +-7.834156623682784204e-01,-7.407612417717903686e-01,5.491190306257212050e-01,-7.206047394214433813e-03,2.681540257907138319e-02,3.678297733725227059e-01,-2.685986606230384988e-01 +-7.821114118559759909e-01,-7.219283491834027622e-01,5.640125220333074285e-01,-7.139190664045851412e-03,2.688852926080429959e-02,3.588725051788717590e-01,-2.762563606906400793e-01 +-7.808037260415053860e-01,-7.026349817441026602e-01,5.782100363019198053e-01,-7.071410121613113005e-03,2.695886647099205238e-02,3.496674543982792316e-01,-2.835692959212295361e-01 +-7.794926106686721479e-01,-6.829103627028314527e-01,5.916935274067446349e-01,-7.002774887157044680e-03,2.702640554564814113e-02,3.402292973522117459e-01,-2.905278311650468237e-01 +-7.781780714963458800e-01,-6.627845615807120838e-01,6.044462799538522502e-01,-6.933354573806806836e-03,2.709114042177472592e-02,3.305731646047801453e-01,-2.971230009372392566e-01 +-7.768601142984342678e-01,-6.422884315394463695e-01,6.164529528762044963e-01,-6.863219185351859992e-03,2.715306766317940323e-02,3.207146034974340809e-01,-3.033465380139672707e-01 +-7.755387448638585424e-01,-6.214535375051262189e-01,6.276996270030669178e-01,-6.792439002001836139e-03,2.721218647478502659e-02,3.106695514117127632e-01,-3.091908867559007290e-01 +-7.742139689965273908e-01,-6.003120886504943465e-01,6.381738401779909475e-01,-6.721084447642771374e-03,2.726849870067497156e-02,3.004542946853520635e-01,-3.146492313425279796e-01 +-7.728857925153117536e-01,-5.788968667469464036e-01,6.478646211874246053e-01,-6.649226000512563350e-03,2.732200884221631584e-02,2.900854295580010400e-01,-3.197155173987281107e-01 +-7.715542212540194011e-01,-5.572411477977813332e-01,6.567625295523561446e-01,-6.576934050019884494e-03,2.737272405307820608e-02,2.795798304876417895e-01,-3.243844534143167091e-01 +-7.702192610613689538e-01,-5.353786273944495244e-01,6.648596702057392172e-01,-6.504278787173008690e-03,2.742065412765387969e-02,2.689546067730948509e-01,-3.286515496908071765e-01 +-7.688809178009646805e-01,-5.133433413857972916e-01,6.721497286618957068e-01,-6.431330097806424277e-03,2.746581149046703821e-02,2.582270637899141130e-01,-3.325131089848102350e-01 +-7.675391973512698529e-01,-4.911695863335392986e-01,6.786279834136289546e-01,-6.358157421352697222e-03,2.750821117650602970e-02,2.474146610136816471e-01,-3.359662496819701416e-01 +-7.661941056055823207e-01,-4.688918379865649744e-01,6.842913210356236231e-01,-6.284829653310818227e-03,2.754787081128936763e-02,2.365349707267427048e-01,-3.390089134751358979e-01 +-7.648456484720069781e-01,-4.465446672637797265e-01,6.891382474905013433e-01,-6.211415027834569363e-03,2.758481057387776256e-02,2.256056389661932615e-01,-3.416398695224526216e-01 +-7.634938318734313389e-01,-4.241626613392558975e-01,6.931688957561268349e-01,-6.137980997549735002e-03,2.761905316518279210e-02,2.146443350929592986e-01,-3.438587146804559236e-01 +-7.621386617474984471e-01,-4.017803357835629252e-01,6.963850223129088857e-01,-6.064594118521959148e-03,2.765062377572207603e-02,2.036687169503928474e-01,-3.456658836573325289e-01 +-7.607801440465815634e-01,-3.794320517036286478e-01,6.987900070978687594e-01,-5.991319951271143612e-03,2.767955002273815926e-02,1.926963861149845192e-01,-3.470626415924710217e-01 +-7.594182847377570766e-01,-3.571519346519789750e-01,7.003888447030763853e-01,-5.918222939415006614e-03,2.770586190806827165e-02,1.817448399479514309e-01,-3.480510819133973976e-01 +-7.580530898027795228e-01,-3.349737893525306376e-01,7.011881303946849719e-01,-5.845366307902067834e-03,2.772959176485137109e-02,1.708314322699681087e-01,-3.486341227391104858e-01 +-7.566845652380541631e-01,-3.129310157089180144e-01,7.011960415000257907e-01,-5.772811965856647354e-03,2.775077419183224689e-02,1.599733330759091354e-01,-3.488155008196570650e-01 +-7.553127170546113378e-01,-2.910565292961397321e-01,7.004223198209906798e-01,-5.700620404719067280e-03,2.776944597903576589e-02,1.491874819547505671e-01,-3.485997524604378950e-01 +-7.539375512780798205e-01,-2.693826805836360139e-01,6.988782402163556950e-01,-5.628850588030471017e-03,2.778564604908619756e-02,1.384905466656307371e-01,-3.479922093893880741e-01 +-7.525590739486605063e-01,-2.479411737840396535e-01,6.965765804768047076e-01,-5.557559875883788351e-03,2.779941537087613629e-02,1.278988858551477203e-01,-3.469989813522384448e-01 +-7.511772911210995440e-01,-2.267629920311740599e-01,6.935315860892934392e-01,-5.486803926281188203e-03,2.781079688417757712e-02,1.174285040017026965e-01,-3.456269376272700367e-01 +-7.497922088646621352e-01,-2.058783199156153099e-01,6.897589299072457436e-01,-5.416636618885399372e-03,2.781983541118822692e-02,1.070950160543692048e-01,-3.438836876557266331e-01 +-7.484038332631057777e-01,-1.853164721568988926e-01,6.852756688224277415e-01,-5.347109960022280485e-03,2.782657757342120106e-02,9.691360593882354657e-02,-3.417775571268127366e-01 +-7.470121704146531760e-01,-1.651058196776931108e-01,6.801001917379916328e-01,-5.278274021294224530e-03,2.783107168870911294e-02,8.689899592517534577e-02,-3.393175705151845745e-01 +-7.456172264319661513e-01,-1.452737271291073273e-01,6.742521729417820797e-01,-5.210176858074487723e-03,2.783336768007393935e-02,7.706540111559335993e-02,-3.365134139849599304e-01 +-7.442190074421178858e-01,-1.258464834071363614e-01,6.677525086880787830e-01,-5.142864445773684054e-03,2.783351698221509657e-02,6.742650424296788014e-02,-3.333754206586261315e-01 +-7.428175195865669433e-01,-1.068492416992091465e-01,6.606232635049233970e-01,-5.076380627898165014e-03,2.783157243082131152e-02,5.799542032512963741e-02,-3.299145289576139173e-01 +-7.414127690211299582e-01,-8.830596126520727507e-02,6.528876039462200298e-01,-5.010767044849161883e-03,2.782758815621156831e-02,4.878466632080938337e-02,-3.261422562539655901e-01 +-7.400047619159539902e-01,-7.023935471348484449e-02,6.445697316253072851e-01,-4.946063096890338309e-03,2.782161948337621288e-02,3.980612923707145534e-02,-3.220706661368086254e-01 +-7.385935044554904350e-01,-5.267083306238425772e-02,6.356948166073680406e-01,-4.882305889657919361e-03,2.781372281601822427e-02,3.107104761234672635e-02,-3.177123278038884724e-01 +-7.371790028384670457e-01,-3.562046377515961509e-02,6.262889202896364527e-01,-4.819530198439218004e-03,2.780395551815164484e-02,2.258997833435950897e-02,-3.130802885223619714e-01 +-7.357612632778612882e-01,-1.910692780869006208e-02,6.163789248036775081e-01,-4.757768445864084261e-03,2.779237581405380811e-02,1.437277536742847563e-02,-3.081880278098838022e-01 +-7.343402920008724744e-01,-3.147479710642392490e-03,6.059924557868318074e-01,-4.697050655196744438e-03,2.777904267545845685e-02,6.428573187171862137e-03,-3.030494186137129842e-01 +-7.329160952488951164e-01,1.224208748497369013e-02,5.951577985374429947e-01,-4.637404445242219314e-03,2.776401568645253670e-02,-1.234228035393559802e-03,-2.976786961002072518e-01 +-7.314886792774907276e-01,2.704746324801108118e-02,5.839038247674296356e-01,-4.578855012268512326e-03,2.774735494003562758e-02,-8.607992684808545014e-03,-2.920904024410702626e-01 +-7.300580503563610657e-01,4.125585287109085481e-02,5.722599040738809428e-01,-4.521425108438349369e-03,2.772912092986262733e-02,-1.568584556708883804e-02,-2.862993567975579134e-01 +-7.286242147693199334e-01,5.485599059511848258e-02,5.602558227028922033e-01,-4.465135058342446610e-03,2.770937441445018787e-02,-2.246169417416360101e-02,-2.803206088733053436e-01 +-7.271871788142661996e-01,6.783815546134439045e-02,5.479217009014223372e-01,-4.410002749990632638e-03,2.768817632385999275e-02,-2.893023422077886653e-02,-2.741693930642224442e-01 +-7.257469488031560445e-01,8.019418268895495139e-02,5.352879070694330776e-01,-4.356043643945717476e-03,2.766558763133467047e-02,-3.508695185152480023e-02,-2.678610872750115113e-01 +-7.243035310619742040e-01,9.191746655497841367e-02,5.223849717416452210e-01,-4.303270785885921108e-03,2.764166924078128446e-02,-4.092813000348782121e-02,-2.614111714459028679e-01 +-7.228569319307079910e-01,1.030029592388042314e-01,5.092435035211884120e-01,-4.251694840677505347e-03,2.761648187511055869e-02,-4.645085054686542980e-02,-2.548351825379747693e-01 +-7.214071577633178745e-01,1.134471674338558972e-01,4.958941077766920724e-01,-4.201324098476962032e-03,2.759008597200805074e-02,-5.165298886013654223e-02,-2.481486651823076039e-01 +-7.199542149277103897e-01,1.232481411894914697e-01,4.823672955534962781e-01,-4.152164517016200540e-03,2.756254156420617774e-02,-5.653321130589015336e-02,-2.413671422471771744e-01 +-7.184981098057098281e-01,1.324054581953006127e-01,4.686934081335284441e-01,-4.104219766113880764e-03,2.753390818546122901e-02,-6.109096971560481343e-02,-2.345060583505959029e-01 +-7.170388487930305921e-01,1.409202060967279513e-01,4.549025314377802554e-01,-4.057491254948318408e-03,2.750424475706279154e-02,-6.532648820677826318e-02,-2.275807454127465046e-01 +-7.155764382992484407e-01,1.487949548928474974e-01,4.410244169684522375e-01,-4.011978192739126120e-03,2.747360949606144145e-02,-6.924075746137529908e-02,-2.206063796361217300e-01 +-7.141108847477731780e-01,1.560337323670046572e-01,4.270884049159409113e-01,-3.967677629463899590e-03,2.744205981226447572e-02,-7.283551245215971748e-02,-2.135979387945444452e-01 +-7.126421945758196763e-01,1.626419861259636723e-01,4.131233447953304716e-01,-3.924584521895233685e-03,2.740965221263463869e-02,-7.611322472541401374e-02,-2.065701691822568742e-01 +-7.111703742343798984e-01,1.686265487535627272e-01,3.991575262230190879e-01,-3.882691795130953014e-03,2.737644222019664300e-02,-7.907707864709018208e-02,-1.995375392151380878e-01 +-7.096954301881950311e-01,1.739955952408191175e-01,3.852186048874153279e-01,-3.841990399337128891e-03,2.734248428243290369e-02,-8.173095233062353804e-02,-1.925142083203967847e-01 +-7.082173689157260643e-01,1.787585979075866638e-01,3.713335353350998846e-01,-3.802469388590626418e-03,2.730783169251114334e-02,-8.407939390390442580e-02,-1.855139899086110655e-01 +-7.067361969091264795e-01,1.829262774252065349e-01,3.575285060198299947e-01,-3.764115988336481401e-03,2.727253650483707145e-02,-8.612759619596485616e-02,-1.785503175178511093e-01 +-7.052519206742123847e-01,1.865105481776033747e-01,3.438288767163226178e-01,-3.726915681487570717e-03,2.723664946502215439e-02,-8.788137365623702180e-02,-1.716362144596568773e-01 +-7.037645467304356472e-01,1.895244627627028500e-01,3.302591231905295799e-01,-3.690852281239814965e-03,2.720021994940279367e-02,-8.934713304362938702e-02,-1.647842582794263877e-01 +-7.022740816108536954e-01,1.919821550007946853e-01,3.168427802898884371e-01,-3.655908010301331217e-03,2.716329589819729481e-02,-9.053183964422104046e-02,-1.580065570473310754e-01 +-7.007805318621018742e-01,1.938987766782999111e-01,3.036023909795180220e-01,-3.622063593171327613e-03,2.712592374671244505e-02,-9.144298856238372575e-02,-1.513147237758842578e-01 +-6.992839040443636911e-01,1.952904310365432827e-01,2.905594617360880982e-01,-3.589298350576005606e-03,2.708814838254304525e-02,-9.208857606502596216e-02,-1.447198485584015315e-01 +-6.977842047313435048e-01,1.961741092944723763e-01,2.777344215865885091e-01,-3.557590271320531811e-03,2.705001309998088591e-02,-9.247705983191734092e-02,-1.382324738382690599e-01 +-6.962814405102357718e-01,1.965676209697732324e-01,2.651465781948085798e-01,-3.526916117053039736e-03,2.701155954933568207e-02,-9.261732609167422359e-02,-1.318625854564202948e-01 +-6.947756179816976241e-01,1.964895228869645016e-01,2.528140919240359219e-01,-3.497251518766437625e-03,2.697282770177799466e-02,-9.251865515534336837e-02,-1.256195804701483731e-01 +-6.932667437598194482e-01,1.959590472986015575e-01,2.407539418559273003e-01,-3.468571052061568752e-03,2.693385582265364966e-02,-9.219068523939223658e-02,-1.195122607165396977e-01 +-6.917548244720952422e-01,1.949960317545167410e-01,2.289819013746396403e-01,-3.440848352130282221e-03,2.689468043967604971e-02,-9.164337033633222773e-02,-1.135488192433143084e-01 +-6.902398667593944159e-01,1.936208414566090164e-01,2.175125210648420626e-01,-3.414056196972621755e-03,2.685533631706737978e-02,-9.088695020497709887e-02,-1.077368239200351963e-01 +-6.887218772759319263e-01,1.918542973573130472e-01,2.063591091435490465e-01,-3.388166609169910443e-03,2.681585645676485760e-02,-8.993190832763416076e-02,-1.020832166112086159e-01 +-6.872008626892394112e-01,1.897176042552653208e-01,1.955337259669575056e-01,-3.363150947727518072e-03,2.677627206565127635e-02,-8.878892931445850345e-02,-9.659429689339345260e-02 +-6.856768296801356577e-01,1.872322698688696496e-01,1.850471722691645615e-01,-3.338979993913167646e-03,2.673661256194744679e-02,-8.746887336656589129e-02,-9.127572840796964460e-02 +-6.841497849426976252e-01,1.844200378698228338e-01,1.749089903540149982e-01,-3.315624062403336059e-03,2.669690558114824058e-02,-8.598272539897473810e-02,-8.613253171247574336e-02 +-6.826197351842306915e-01,1.813028108350228373e-01,1.651274667345295488e-01,-3.293053077292097414e-03,2.665717696652062166e-02,-8.434156458304610526e-02,-8.116908505946678065e-02 +-6.810866871252394539e-01,1.779025794655771653e-01,1.557096374299454800e-01,-3.271236676444729506e-03,2.661745078773698064e-02,-8.255652432124410689e-02,-7.638913064532043551e-02 +-6.795506474993976420e-01,1.742413501869868875e-01,1.466613023211462419e-01,-3.250144289103983758e-03,2.657774934751414020e-02,-8.063875808954272528e-02,-7.179577409540581490e-02 +-6.780116230535196964e-01,1.703410753249816334e-01,1.379870369781843464e-01,-3.229745229360961659e-03,2.653809321597068449e-02,-7.859940391167750950e-02,-6.739149863525456530e-02 +-6.764696205475297930e-01,1.662235885405092262e-01,1.296902149800724380e-01,-3.210008779157171380e-03,2.649850124057492873e-02,-7.644954303939840989e-02,-6.317816921993704704e-02 +-6.749246467544329775e-01,1.619105342894126276e-01,1.217730289855175618e-01,-3.190904277387936888e-03,2.645899057439870042e-02,-7.420017448237899160e-02,-5.915704828358387540e-02 +-6.733767084602854114e-01,1.574233070068953855e-01,1.142365219083737032e-01,-3.172401187880392708e-03,2.641957671535904301e-02,-7.186217650041178129e-02,-5.532880143060043415e-02 +-6.718258124641639517e-01,1.527829890549135261e-01,1.070806108930882211e-01,-3.154469182640496066e-03,2.638027353301749048e-02,-6.944627613148955658e-02,-5.169352509106162757e-02 +-6.702719655781371744e-01,1.480102901034561491e-01,1.003041276846739494e-01,-3.137078224244179391e-03,2.634109331738328924e-02,-6.696302223348438165e-02,-4.825075144500409058e-02 +-6.687151746272345099e-01,1.431254951759408089e-01,9.390485227602068941e-02,-3.120198622434741144e-03,2.630204681937981653e-02,-6.442274893380921241e-02,-4.499947347607797382e-02 +-6.671554464494171555e-01,1.381484079991691372e-01,8.787955346059045381e-02,-3.103801113417002135e-03,2.626314328628293754e-02,-6.183555495024538329e-02,-4.193816409081640995e-02 +-6.655927878955469890e-01,1.330983017105574207e-01,8.222403328245440901e-02,-3.087856915264634416e-03,2.622439052790784958e-02,-5.921127655908076798e-02,-3.906479440935520936e-02 +-6.640272058293580359e-01,1.279938744114658689e-01,7.693316897951003874e-02,-3.072337788634773442e-03,2.618579495688872980e-02,-5.655945925403549346e-02,-3.637686268057802735e-02 +-6.624587071274248284e-01,1.228532035482302887e-01,7.200096306936602797e-02,-3.057216105368988895e-03,2.614736164625600101e-02,-5.388933967454870300e-02,-3.387141328192740108e-02 +-6.608872986791325399e-01,1.176937078158692140e-01,6.742059322040604152e-02,-3.042464879558641564e-03,2.610909438138877062e-02,-5.120982175197223707e-02,-3.154506088731093283e-02 +-6.593129873866474533e-01,1.125321081249939514e-01,6.318445802225176877e-02,-3.028057841158979507e-03,2.607099572871179188e-02,-4.852946314439030018e-02,-2.939402649138554008e-02 +-6.577357801648857638e-01,1.073843998409026584e-01,5.928424009318283144e-02,-3.013969463571485909e-03,2.603306709196527569e-02,-4.585644923410021889e-02,-2.741414448934284542e-02 +-6.561556839414840470e-01,1.022658202381259152e-01,5.571094731799999811e-02,-3.000174999962548512e-03,2.599530876505401489e-02,-4.319858491660429067e-02,-2.560091371318088532e-02 +-6.545727056567677282e-01,9.719082422837055368e-02,5.245497703178002985e-02,-2.986650545946731381e-03,2.595772000321387718e-02,-4.056327977696453507e-02,-2.394950778379722686e-02 +-6.529868522637218842e-01,9.217306289686569287e-02,4.950616815033663520e-02,-2.973373042471577112e-03,2.592029908829365098e-02,-3.795753693598230560e-02,-2.245480979129482052e-02 +-6.513981307279594901e-01,8.722536770603617207e-02,4.685385669308213957e-02,-2.960320322446355866e-03,2.588304338910908664e-02,-3.538794020065745560e-02,-2.111144173680542385e-02 +-6.498065480276917771e-01,8.235973484139155754e-02,4.448693288429941134e-02,-2.947471126402195253e-03,2.584594941887769295e-02,-3.286064982977668358e-02,-1.991379153830862350e-02 +-6.482121111536969238e-01,7.758731351047033764e-02,4.239389706232301464e-02,-2.934805129957455353e-03,2.580901291048386115e-02,-3.038139968090144624e-02,-1.885604220672307263e-02 +-6.466148271092899691e-01,7.291840244741586152e-02,4.056291600658139263e-02,-2.922302950757471336e-03,2.577222888692961775e-02,-2.795548763742331660e-02,-1.793219959938922164e-02 +-6.450147029102913931e-01,6.836244685261344545e-02,3.898187701353662826e-02,-2.909946168607296154e-03,2.573559170664590362e-02,-2.558777371935026801e-02,-1.713612319559228159e-02 +-6.434117455849966971e-01,6.392803487398902862e-02,3.763844385652581614e-02,-2.897717330594044502e-03,2.569909514113333238e-02,-2.328268735003788392e-02,-1.646155160184211222e-02 +-6.418059621741456500e-01,5.962290670690204020e-02,3.652010931084499756e-02,-2.885599960205487147e-03,2.566273243425658268e-02,-2.104421902201297420e-02,-1.590213223938376219e-02 +-6.401973597308906472e-01,5.545395626836639030e-02,3.561424951636656122e-02,-2.873578553048075981e-03,2.562649636549465398e-02,-1.887593367538131089e-02,-1.545144483349214851e-02 +-6.385859453207668457e-01,5.142724759885528019e-02,3.490817193588176270e-02,-2.861638577841228107e-03,2.559037931202729627e-02,-1.678096394984303005e-02,-1.510303368693612612e-02 +-6.369717260216599675e-01,4.754802234989394250e-02,3.438916858480099703e-02,-2.849766476439122200e-03,2.555437329445338357e-02,-1.476202730947405103e-02,-1.485042640344283287e-02 +-6.353547089237763235e-01,4.382071815136303478e-02,3.404456223327029935e-02,-2.837949638800151431e-03,2.551847005602222995e-02,-1.282142957104300679e-02,-1.468716170773649854e-02 +-6.337349011296107282e-01,4.024898776509656939e-02,3.386175017538205906e-02,-2.826176410563400673e-03,2.548266110600980877e-02,-1.096107338156827350e-02,-1.460681736368392783e-02 +-6.321123097539158575e-01,3.683572009905188016e-02,3.382825339461298375e-02,-2.814436068907207218e-03,2.544693777942267504e-02,-9.182469230892590165e-03,-1.460302316018684092e-02 +-6.304869419236707184e-01,3.358306363798366917e-02,3.393175294023522554e-02,-2.802718804439102186e-03,2.541129127887441566e-02,-7.486747537257574560e-03,-1.466949274392109412e-02 +-6.288588047780496737e-01,3.049245041243193316e-02,3.416013396352111930e-02,-2.791015706186839897e-03,2.537571273987440298e-02,-5.874674893066487817e-03,-1.480003567349226154e-02 +-6.272279054683905786e-01,2.756462798707633166e-02,3.450151918418981750e-02,-2.779318732658436764e-03,2.534019326891079338e-02,-4.346659995629798337e-03,-1.498858362572705501e-02 +-6.255942511581639165e-01,2.479968459788478921e-02,3.494430591305963674e-02,-2.767620699869058690e-03,2.530472398472308898e-02,-2.902776686910316095e-03,-1.522920405429985247e-02 +-6.239578490229409358e-01,2.219708147965914966e-02,3.547719857179074815e-02,-2.755915241117005542e-03,2.526929607311050388e-02,-1.542777307717351767e-03,-1.551611623250364128e-02 +-6.223187062503623412e-01,1.975568636718682061e-02,3.608923583856238348e-02,-2.744196784953132403e-03,2.523390082556976888e-02,-2.661071770657027222e-04,-1.584371117052880279e-02 +-6.206768300401065419e-01,1.747380741129597390e-02,3.676982001407474304e-02,-2.732460528665675830e-03,2.519852966898432356e-02,9.280788544413811876e-04,-1.620656106000881863e-02 +-6.190322276038583427e-01,1.534922549322720667e-02,3.750873993007908247e-02,-2.720702399633232946e-03,2.516317420326842288e-02,2.040894469287074173e-03,-1.659943444382997249e-02 +-6.173849061652770809e-01,1.337923111978578210e-02,3.829619224195058430e-02,-2.708919035156665510e-03,2.512782624108345422e-02,3.073706628332282739e-03,-1.701730799499778443e-02 +-6.157348729599646520e-01,1.156066050312136093e-02,3.912280317700501941e-02,-2.697107734095277858e-03,2.509247783543560109e-02,4.028117343786092805e-03,-1.745537101578691769e-02 +-6.140821352354338680e-01,9.889933199067904579e-03,3.997963910991855080e-02,-2.685266433058401160e-03,2.505712130004001184e-02,4.905947388081659602e-03,-1.790904448844354152e-02 +-6.124267002510773716e-01,8.363084450072890211e-03,4.085822685626309958e-02,-2.673393671660079154e-03,2.502174923719755958e-02,5.709209534741101037e-03,-1.837397556721623651e-02 +-6.107685752781343291e-01,6.975806187521307704e-03,4.175055873450683047e-02,-2.661488547908472981e-03,2.498635457579412300e-02,6.440097931423397273e-03,-1.884605438204890043e-02 +-6.091077675996596774e-01,5.723483670905219096e-03,4.264910457403827543e-02,-2.649550697513764711e-03,2.495093056373796220e-02,7.100969608218449941e-03,-1.932141169068531811e-02 +-6.074442845104918165e-01,4.601227135298481888e-03,4.354681712344893318e-02,-2.637580240704193169e-03,2.491547080191240313e-02,7.694317347482755240e-03,-1.979642292392066669e-02 +-6.057781333172199689e-01,3.603912785999560057e-03,4.443713258986420334e-02,-2.625577757892326621e-03,2.487996925711704471e-02,8.222761186883933981e-03,-2.026771555152483140e-02 +-6.041093213381530935e-01,2.726214579258466088e-03,4.531397587469875032e-02,-2.613544258143210997e-03,2.484442026905164802e-02,8.689024120513420477e-03,-2.073216203025267032e-02 +-6.024378559032870228e-01,1.962639535768446065e-03,4.617175748604115254e-02,-2.601481123807784443e-03,2.480881856567072538e-02,9.095916244125624994e-03,-2.118688296503280083e-02 +-6.007637443542724887e-01,1.307561720300303180e-03,4.700536748712400709e-02,-2.589390102281666651e-03,2.477315927167274279e-02,9.446318543434743809e-03,-2.162925067314642666e-02 +-5.990869940443830366e-01,7.552554442169575211e-04,4.781017751912940150e-02,-2.577273249037169509e-03,2.473743790167466997e-02,9.743167669407221121e-03,-2.205687271203571678e-02 +-5.974076123384820525e-01,2.999231315027374185e-04,4.858202283604640126e-02,-2.565132907242950819e-03,2.470165037195332092e-02,9.989433428049331043e-03,-2.246760826075557688e-02 +-5.957256066129916761e-01,-6.427155132781462788e-05,4.931720550897786270e-02,-2.552971672693799769e-03,2.466579300677646663e-02,1.018810911163661563e-02,-2.285954047256263943e-02 +-5.940409842558590503e-01,-3.431749976001758251e-04,5.001247086985122908e-02,-2.540792353389330870e-03,2.462986253099290479e-02,1.034219671591553140e-02,-2.323099496673301095e-02 +-5.923537526665245689e-01,-5.426176586107509100e-04,5.066500692980918652e-02,-2.528597959578081540e-03,2.459385605960601531e-02,1.045469049924867451e-02,-2.358051151476174903e-02 +-5.906639192558894580e-01,-6.683893695264464703e-04,5.127242189645571380e-02,-2.516391641272881785e-03,2.455777111236506771e-02,1.052856162177438770e-02,-2.390685338048116962e-02 +-5.889714914462828022e-01,-7.262101414760933470e-04,5.183273362763451964e-02,-2.504176693396206223e-03,2.452160558822697248e-02,1.056675500202554525e-02,-2.420899119003639549e-02 +-5.872764766714295703e-01,-7.217157869476879470e-04,5.234435117362176010e-02,-2.491956504133989794e-03,2.448535776963621513e-02,1.057216219823429830e-02,-2.448609887318986197e-02 +-5.855788823764169759e-01,-6.604273799885975512e-04,5.280605981139950555e-02,-2.479734548261748432e-03,2.444902631541495328e-02,1.054762853175448059e-02,-2.473754062132353659e-02 +-5.838787160176629465e-01,-5.477393905434893616e-04,5.321700018838271956e-02,-2.467514341745764579e-03,2.441261023085142950e-02,1.049592919782391109e-02,-2.496286678105473153e-02 +-5.821759850628824839e-01,-3.888998951934138134e-04,5.357665213004861315e-02,-2.455299442230360601e-03,2.437610888170111192e-02,1.041976427955797722e-02,-2.516179921839498124e-02 +-5.804706969910552461e-01,-1.889928369934419986e-04,5.388481339683730276e-02,-2.443093397788720714e-03,2.433952197049724098e-02,1.032175398592195641e-02,-2.533422468840746539e-02 +-5.787628592923925730e-01,4.707442154708450148e-05,5.414157901162785363e-02,-2.430899761925054119e-03,2.430284951604670327e-02,1.020442802819770435e-02,-2.548018598445855876e-02 +-5.770524794683048464e-01,3.145813647269638837e-04,5.434732509310442883e-02,-2.418722040616493561e-03,2.426609185973861368e-02,1.007021494566030795e-02,-2.559986379528656972e-02 +-5.753395650313679610e-01,6.090133462899773161e-04,5.450268173343846695e-02,-2.406563699736586834e-03,2.422924963048167166e-02,9.921451839513571042e-03,-2.569357855751535843e-02 +-5.736241235052911280e-01,9.260581214787934985e-04,5.460851940009370936e-02,-2.394428134654316438e-03,2.419232373133502201e-02,9.760355860990140367e-03,-2.576176669106269509e-02 +-5.719061624248831244e-01,1.261625307763847986e-03,5.466592214817111300e-02,-2.382318664208531080e-03,2.415531534990304416e-02,9.589042528159189005e-03,-2.580498158092749475e-02 +-5.701856893360195411e-01,1.611846023227287176e-03,5.467617216456008289e-02,-2.370238511178958309e-03,2.411822590127931515e-02,9.409510608992650849e-03,-2.582387358975640063e-02 +-5.684627117956096987e-01,1.973078447836932973e-03,5.464072482123254193e-02,-2.358190798916675029e-03,2.408105704627799479e-02,9.223641723522574623e-03,-2.581918839268257035e-02 +-5.667372373715630074e-01,2.341911817801750704e-03,5.456119389866213865e-02,-2.346178533239211012e-03,2.404381065032335993e-02,9.033200486559153988e-03,-2.579174700306954837e-02 +-5.650092736427564377e-01,2.715165483496650555e-03,5.443932705717952070e-02,-2.334204594388020453e-03,2.400648879560712476e-02,8.839828724527317577e-03,-2.574244506126715029e-02 +-5.632788281990006585e-01,3.089896141730149280e-03,5.427698988728479890e-02,-2.322271732319586366e-03,2.396909373476417693e-02,8.645058188662315540e-03,-2.567223721722430851e-02 +-5.615459086410068412e-01,3.463386485155130635e-03,5.407614565191294631e-02,-2.310382565471137076e-03,2.393162788311797504e-02,8.450291424525708389e-03,-2.558213095483371016e-02 +-5.598105225803533536e-01,3.833150343692720563e-03,5.383884092179484077e-02,-2.298539559356182517e-03,2.389409382727966311e-02,8.256816383796248410e-03,-2.547317089846240848e-02 +-5.580726776394524524e-01,4.196930458061158206e-03,5.356718079476888855e-02,-2.286745040068785713e-03,2.385649427880612294e-02,8.065809852932152163e-03,-2.534644433134180391e-02 +-5.563323814515162002e-01,4.552688143617756623e-03,5.326332349421427498e-02,-2.275001189471043041e-03,2.381883206553081500e-02,7.878328116292047587e-03,-2.520305219269726796e-02 +-5.545896416605240464e-01,4.898602860597077273e-03,5.292945492612097236e-02,-2.263310020689348820e-03,2.378111013203164023e-02,7.695318436281278068e-03,-2.504411958202757554e-02 +-5.528444659211878554e-01,5.233065575622272356e-03,5.256777835574306251e-02,-2.251673413692571561e-03,2.374333150720712993e-02,7.517620934175859096e-03,-2.487077994088283950e-02 +-5.510968618989195988e-01,5.554670845672405634e-03,5.218050127922887332e-02,-2.240093088265568758e-03,2.370549929528068672e-02,7.345969916399078953e-03,-2.468416660198278698e-02 +-5.493468372697968283e-01,5.862208335713316350e-03,5.176982061651235617e-02,-2.228570619669732224e-03,2.366761666617754611e-02,7.180995777702954899e-03,-2.448540965694624358e-02 +-5.475943997205290348e-01,6.154657157659637091e-03,5.133791252060780630e-02,-2.217107423140975229e-03,2.362968685295031845e-02,7.023233632301628844e-03,-2.427562638640784823e-02 +-5.458395569484247867e-01,6.431178150593401532e-03,5.088691702371794107e-02,-2.205704782163818694e-03,2.359171311671602600e-02,6.873129324022036156e-03,-2.405592362017465458e-02 +-5.440823166613565354e-01,6.691100319241367021e-03,5.041893665377097933e-02,-2.194363829985069344e-03,2.355369874482660109e-02,6.731035527745586611e-03,-2.382737616795381139e-02 +-5.423226865777274197e-01,6.933915076953753830e-03,4.993601636902791213e-02,-2.183085555413988212e-03,2.351564704977263109e-02,6.597223338210456318e-03,-2.359104254480984264e-02 +-5.405606744264380703e-01,7.159266665517124983e-03,4.944014448235803383e-02,-2.171870821568732107e-03,2.347756134546651746e-02,6.471887359107594308e-03,-2.334794359405732364e-02 +-5.387962879468511934e-01,7.366939695890535600e-03,4.893323877410574868e-02,-2.160720356301644527e-03,2.343944494380703625e-02,6.355145884192018625e-03,-2.309907110911120151e-02 +-5.370295348887590414e-01,7.556852339262330222e-03,4.841714661904426842e-02,-2.149634767784710902e-03,2.340130114574482528e-02,6.247051991626536953e-03,-2.284537236805748647e-02 +-5.352604230123479967e-01,7.729042586061911010e-03,4.789363217040022713e-02,-2.138614532211897436e-03,2.336313323329506156e-02,6.147591796222972096e-03,-2.258776107576277239e-02 +-5.334889600881657090e-01,7.883662879652619457e-03,4.736437745399477844e-02,-2.127660034364972207e-03,2.332494446597790580e-02,6.056698405075783796e-03,-2.232710430402347693e-02 +-5.317151538970863456e-01,8.020965420210702732e-03,4.683097770538099947e-02,-2.116771529714576387e-03,2.328673806087095885e-02,5.974248234704035432e-03,-2.206422223657999657e-02 +-5.299390122302762851e-01,8.141294979712261279e-03,4.629493391063854713e-02,-2.105949190995695577e-03,2.324851720933230476e-02,5.900071130953299207e-03,-2.179989508912942228e-02 +-5.281605428891603671e-01,8.245080292659665716e-03,4.575765922692614368e-02,-2.095193086299206465e-03,2.321028504566752249e-02,5.833957411349361549e-03,-2.153484585173623747e-02 +-5.263797536853872527e-01,8.332820858966074815e-03,4.522046786310129907e-02,-2.084503193935429485e-03,2.317204465483828357e-02,5.775655782857346855e-03,-2.126975767315099700e-02 +-5.245966524407953413e-01,8.405079931976094584e-03,4.468458178036045086e-02,-2.073879414567398900e-03,2.313379907183026399e-02,5.724882229285094964e-03,-2.100525948958330974e-02 +-5.228112469873781309e-01,8.462476195846738158e-03,4.415112565773287451e-02,-2.063321572970234098e-03,2.309555127864234073e-02,5.681325718412826714e-03,-2.074193521195950851e-02 +-5.210235451672501350e-01,8.505674314664984567e-03,4.362113182922788129e-02,-2.052829426425417051e-03,2.305730419155514743e-02,5.644650987251311272e-03,-2.048031566554308486e-02 +-5.192335548326121319e-01,8.535376785939919816e-03,4.309553704055206491e-02,-2.042402662975766753e-03,2.301906066793034736e-02,5.614502892948469021e-03,-2.022088720837093498e-02 +-5.174412838457173036e-01,8.552316177044541265e-03,4.257518615505223591e-02,-2.032040925643709833e-03,2.298082349567381888e-02,5.590510538478793597e-03,-1.996408855024721374e-02 +-5.156467400788354860e-01,8.557247000154255545e-03,4.206083521747625703e-02,-2.021743803178181406e-03,2.294259540499231018e-02,5.572289724990260769e-03,-1.971030989521727408e-02 +-5.138499314142201957e-01,8.550941344637286862e-03,4.155315168711019858e-02,-2.011510842602856619e-03,2.290437905415309422e-02,5.559451410014989818e-03,-1.945989839900508570e-02 +-5.120508657440723255e-01,8.534178880244646173e-03,4.105272048426647502e-02,-2.001341545387210017e-03,2.286617703160824175e-02,5.551598539364125881e-03,-1.921315362345032626e-02 +-5.102495509705069487e-01,8.507742060136679052e-03,4.056004182058329383e-02,-1.991235378808188430e-03,2.282799186536882061e-02,5.548331533230466822e-03,-1.897033918366389585e-02 +-5.084459950055177924e-01,8.472412666508496309e-03,4.007554038275461944e-02,-1.981191789875449104e-03,2.278982601766682894e-02,5.549255178301843242e-03,-1.873167378758594862e-02 +-5.066402057709425977e-01,8.428963036387029928e-03,3.959956491015132612e-02,-1.971210196219332599e-03,2.275168188034439043e-02,5.553974428847811146e-03,-1.849734086281914353e-02 +-5.048321911984281485e-01,8.378155042239392516e-03,3.913239479711464602e-02,-1.961290005919748280e-03,2.271356178848547847e-02,5.562103632095916073e-03,-1.826748548584943357e-02 +-5.030219592293964093e-01,8.320733405553327328e-03,3.867424284781754984e-02,-1.951430604630767854e-03,2.267546799885884062e-02,5.573264021851833359e-03,-1.804221890173847431e-02 +-5.012095178150079988e-01,8.257421265051821668e-03,3.822525950316007365e-02,-1.941631374263828565e-03,2.263740272385979940e-02,5.587084348999443034e-03,-1.782162056621200766e-02 +-4.993948749161288836e-01,8.188922415194956062e-03,3.778553727927127376e-02,-1.931891686245481264e-03,2.259936809969105564e-02,5.603212950690956290e-03,-1.760573998473937904e-02 +-4.975780385032944064e-01,8.115908589754333713e-03,3.735511534983113058e-02,-1.922210914490988295e-03,2.256136620915870708e-02,5.621300739327221921e-03,-1.739459839151697579e-02 +-4.957590165566744811e-01,8.039027410116392908e-03,3.693398286959707899e-02,-1.912588428465243759e-03,2.252339908846186928e-02,5.641021931349807575e-03,-1.718819281923511869e-02 +-4.939378170660390643e-01,7.958894459769152649e-03,3.652208440948137569e-02,-1.903023612206040417e-03,2.248546870583946478e-02,5.662064043958273292e-03,-1.698649616724137029e-02 +-4.921144480307220181e-01,7.876092006247902705e-03,3.611932449773660320e-02,-1.893515844012348638e-03,2.244757698231416784e-02,5.684129429607079516e-03,-1.678945883502839034e-02 +-4.902889174595871369e-01,7.791169458735564858e-03,3.572556960990212055e-02,-1.884064519893050450e-03,2.240972579246768015e-02,5.706939184044267088e-03,-1.659701496628817122e-02 +-4.884612333709922316e-01,7.704641970278039258e-03,3.534065541357377105e-02,-1.874669037482722686e-03,2.237191695890857965e-02,5.730232743164725105e-03,-1.640907861105145998e-02 +-4.866314037927538805e-01,7.616988940316076623e-03,3.496438645094698122e-02,-1.865328817242975414e-03,2.233415225979571220e-02,5.753766511520303981e-03,-1.622555378715910546e-02 +-4.847994367621128453e-01,7.528654157256611454e-03,3.459654543745539768e-02,-1.856043289317237414e-03,2.229643343036990383e-02,5.777314837171277154e-03,-1.604632618696845425e-02 +-4.829653403256978783e-01,7.440045908706990896e-03,3.423689144087976172e-02,-1.846811903570216547e-03,2.225876217277395278e-02,5.800670250908020013e-03,-1.587127536923104223e-02 +-4.811291225394909166e-01,7.351538548452069693e-03,3.388516876424877561e-02,-1.837634120666124544e-03,2.222114014368856832e-02,5.823645817135341719e-03,-1.570026648410148204e-02 +-4.792907914687918880e-01,7.263469772826242066e-03,3.354110364626989443e-02,-1.828509424638596718e-03,2.218356896792129593e-02,5.846068874763852771e-03,-1.553316441464809572e-02 +-4.774503551881826846e-01,7.176144879173566166e-03,3.320441518358149130e-02,-1.819437320423635245e-03,2.214605023543290974e-02,5.867787359843888370e-03,-1.536982080418510498e-02 +-4.756078217814919684e-01,7.089834040785752099e-03,3.287481139666187380e-02,-1.810417326739646336e-03,2.210858551089195032e-02,5.888662579153870931e-03,-1.521008847308366260e-02 +-4.737631993417600329e-01,7.004778494863576031e-03,3.255199682475292539e-02,-1.801448986265218781e-03,2.207117632593243867e-02,5.908578293787354087e-03,-1.505381373966759723e-02 +-4.719164959712024987e-01,6.921185016256010264e-03,3.223567144254425865e-02,-1.792531858978685909e-03,2.203382418271722723e-02,5.927427489930719995e-03,-1.490084452524420566e-02 +-4.700677197811755081e-01,6.839233045741967623e-03,3.192553625955306396e-02,-1.783665528202965558e-03,2.199653056852958477e-02,5.945122578958180609e-03,-1.475102543760129815e-02 +-4.682168788921395874e-01,6.759073230858424382e-03,3.162129260598748465e-02,-1.774849592884090999e-03,2.195929694110998823e-02,5.961589247010045875e-03,-1.460420420081512405e-02 +-4.663639814336241196e-01,6.680829528874578788e-03,3.132264627716714600e-02,-1.766083675191705820e-03,2.192212474131974981e-02,5.976766782080427005e-03,-1.446022848729111056e-02 +-4.645090355441917063e-01,6.604600778774773369e-03,3.102930774338939074e-02,-1.757367411526364331e-03,2.188501539143656791e-02,5.990607248607587865e-03,-1.431894964633405147e-02 +-4.626520493714022519e-01,6.530463203966128949e-03,3.074099402784187876e-02,-1.748700461429566165e-03,2.184797029473357216e-02,6.003076292667677838e-03,-1.418022283449024354e-02 +-4.607930310717773814e-01,6.458469777143255666e-03,3.045743059417226062e-02,-1.740082502746092370e-03,2.181099083862137952e-02,6.014147989688571616e-03,-1.404390667017868348e-02 +-4.589319888107645240e-01,6.388654672181611274e-03,3.017835228940261155e-02,-1.731513229509802285e-03,2.177407840052940011e-02,6.023809192400767822e-03,-1.390986421743869300e-02 +-4.570689307627009978e-01,6.321032838607055192e-03,2.990350353143142112e-02,-1.722992353723112229e-03,2.173723434152212419e-02,6.032054730944686090e-03,-1.377796496142963227e-02 +-4.552038651107780942e-01,6.255603256738279279e-03,2.963264074934597592e-02,-1.714519603386286317e-03,2.170046001353252013e-02,6.038889542897547640e-03,-1.364808215496433538e-02 +-4.533368000470053838e-01,6.192348965837752087e-03,2.936553108217770588e-02,-1.706094718881710250e-03,2.166375675385212585e-02,6.044324809667239728e-03,-1.352009679628529519e-02 +-4.514677437721743569e-01,6.131239957099677423e-03,2.910195404625583529e-02,-1.697717459860478536e-03,2.162712589584220302e-02,6.048379547794534121e-03,-1.339389567886851116e-02 +-4.495967044958226744e-01,6.072234466179324448e-03,2.884170216775705162e-02,-1.689387596342314815e-03,2.159056875761548519e-02,6.051079305344848371e-03,-1.326937104308552329e-02 +-4.477236904361980296e-01,6.015279336879801944e-03,2.858457985068673518e-02,-1.681104911771366629e-03,2.155408665462563156e-02,6.052453242961259791e-03,-1.314642322189494769e-02 +-4.458487098202219556e-01,5.960313629548907866e-03,2.833040489538829476e-02,-1.672869203533229121e-03,2.151768089313787005e-02,6.052537442209582515e-03,-1.302495802652452357e-02 +-4.439717708834537979e-01,5.907267962551059591e-03,2.807900762474541470e-02,-1.664680278250831426e-03,2.148135277180455646e-02,6.051370371988783019e-03,-1.290488835257836038e-02 +-4.420928818700546881e-01,5.856066882299226783e-03,2.783023106193814253e-02,-1.656537955458946797e-03,2.144510358339755257e-02,6.048994203981317326e-03,-1.278613357348238992e-02 +-4.402120510327509062e-01,5.806629629947453360e-03,2.758393110937556666e-02,-1.648442060912564669e-03,2.140893461681044052e-02,6.045453302150747242e-03,-1.266861871569007272e-02 +-4.383292866327981319e-01,5.758872022927685741e-03,2.733997502456747206e-02,-1.640392430665325516e-03,2.137284715393298479e-02,6.040794990241978198e-03,-1.255227663491768203e-02 +-4.364445969399446956e-01,5.712706684254858190e-03,2.709824253393211188e-02,-1.632388910150808260e-03,2.133684247088685204e-02,6.035067414253236610e-03,-1.243704507754790634e-02 +-4.345579902323956079e-01,5.668044312024540440e-03,2.685862482247400665e-02,-1.624431349724910672e-03,2.130092183797750033e-02,6.028319551303623278e-03,-1.232286757925794146e-02 +-4.326694747967760879e-01,5.624794492514429214e-03,2.662102336499975325e-02,-1.616519606415693557e-03,2.126508652472522756e-02,6.020600560017008544e-03,-1.220969453695092827e-02 +-4.307790589280949822e-01,5.582867805436900373e-03,2.638535067333741893e-02,-1.608653546422158425e-03,2.122933778992871015e-02,6.011961750494680852e-03,-1.209748058561651435e-02 +-4.288867509297087377e-01,5.542173136551628576e-03,2.615152911819506459e-02,-1.600833035999456311e-03,2.119367689136326255e-02,6.002449762532622961e-03,-1.198618550255931350e-02 +-4.269925591132845422e-01,5.502623087407194219e-03,2.591948981834328591e-02,-1.593057950392711298e-03,2.115810508600379086e-02,5.992115117108452391e-03,-1.187577492180489670e-02 +-4.250964917987639091e-01,5.464131096351744835e-03,2.568917352917523406e-02,-1.585328165224609166e-03,2.112262361742559685e-02,5.981005412540714439e-03,-1.176621725370345997e-02 +-4.231985573143263735e-01,5.426612873928989868e-03,2.546052812155854758e-02,-1.577643561455181493e-03,2.108723372805288918e-02,5.969166790111250440e-03,-1.165748696829521994e-02 +-4.212987639963524100e-01,5.389986934693695095e-03,2.523350989104409040e-02,-1.570004021113185697e-03,2.105193665493449232e-02,5.956643878923444622e-03,-1.154956068238244260e-02 +-4.193971201893874623e-01,5.354175480493433316e-03,2.500808101641209535e-02,-1.562409430858401104e-03,2.101673363016456561e-02,5.943480564694476193e-03,-1.144242046626948658e-02 +-4.174936342461046945e-01,5.319103476493775365e-03,2.478421116246951744e-02,-1.554859677617422041e-03,2.098162587886793704e-02,5.929717526919872465e-03,-1.133604939237594679e-02 +-4.155883145272687984e-01,5.284700189800269933e-03,2.456187434311292439e-02,-1.547354650756612576e-03,2.094661462392438320e-02,5.915394540662161776e-03,-1.123043599451047266e-02 +-4.136811694016988561e-01,5.250898624368576309e-03,2.434105071949514698e-02,-1.539894242931961858e-03,2.091170107922361854e-02,5.900548972295363397e-03,-1.112556950724229166e-02 +-4.117722072462318139e-01,5.217635738722203181e-03,2.412172470395490889e-02,-1.532478346189282848e-03,2.087688645498156359e-02,5.885215870950202648e-03,-1.102144207473277834e-02 +-4.098614364456857340e-01,5.184852579361850372e-03,2.390388502881930430e-02,-1.525106849835698155e-03,2.084217195588303900e-02,5.869428020279134593e-03,-1.091804732521894396e-02 +-4.079488653928228237e-01,5.152495297258193746e-03,2.368752256470733897e-02,-1.517779648704224152e-03,2.080755877781804292e-02,5.853217753805124192e-03,-1.081538322187045736e-02 +-4.060345024883124099e-01,5.120512393470191999e-03,2.347263303907816401e-02,-1.510496638042366615e-03,2.077304810945181102e-02,5.836611798100639625e-03,-1.071344578716974284e-02 +-4.041183561406945790e-01,5.088858576522611692e-03,2.325921331356070598e-02,-1.503257711144839878e-03,2.073864113786741403e-02,5.819638600766120252e-03,-1.061223497080597197e-02 +-4.022004347663427071e-01,5.057491484172920403e-03,2.304726371380937458e-02,-1.496062760598068512e-03,2.070433903110255325e-02,5.802322357444383502e-03,-1.051174923820374051e-02 +-4.002807467894267113e-01,5.026371930577063288e-03,2.283678447574479659e-02,-1.488911677768194964e-03,2.067014296288710931e-02,5.784683717253710084e-03,-1.041199126232847383e-02 +-3.983593006418760796e-01,4.995466423292369527e-03,2.262777869922966145e-02,-1.481804358383814789e-03,2.063605409377784750e-02,5.766744803728390996e-03,-1.031296148677939180e-02 +-3.964361047633429003e-01,4.964743674572019531e-03,2.242024938068387305e-02,-1.474740693506153104e-03,2.060207357758444754e-02,5.748523025322768362e-03,-1.021466286466016747e-02 +-3.945111676011643920e-01,4.934176565011568101e-03,2.221420104831889342e-02,-1.467720578269379695e-03,2.056820256046065776e-02,5.730035150897445223e-03,-1.011709704062226274e-02 +-3.925844976103263773e-01,4.903741354346490174e-03,2.200963802847626105e-02,-1.460743901493775920e-03,2.053444218173411037e-02,5.711296257858388542e-03,-1.002026692246319427e-02 +-3.906561032534255906e-01,4.873417250368549357e-03,2.180656475048979581e-02,-1.453810558184050021e-03,2.050079356838336100e-02,5.692319377938275665e-03,-9.924175501385119544e-03 +-3.887259930006330966e-01,4.843186276474868365e-03,2.160498609791566479e-02,-1.446920437341663946e-03,2.046725784543524870e-02,5.673115722518447257e-03,-9.828824654803154301e-03 +-3.867941753296565421e-01,4.813033896623165886e-03,2.140490569277535976e-02,-1.440073432302075261e-03,2.043383612232259339e-02,5.653696341028410961e-03,-9.734217891644886267e-03 +-3.848606587257031308e-01,4.782946765326753086e-03,2.120632810449920094e-02,-1.433269431926804905e-03,2.040052950771024942e-02,5.634068395728634646e-03,-9.640355803033115456e-03 +-3.829254516814425413e-01,4.752916107262450049e-03,2.100925559574587029e-02,-1.426508325596199033e-03,2.036733909415731902e-02,5.614241993809643803e-03,-9.547241817230643982e-03 +-3.809885626969692352e-01,4.722932194955329656e-03,2.081369084400523997e-02,-1.419790005221499866e-03,2.033426597009796791e-02,5.594220325008321913e-03,-9.454876808078551201e-03 +-3.790500002797654311e-01,4.692990260466270262e-03,2.061963512066292137e-02,-1.413114361849087623e-03,2.030131121748535161e-02,5.574011230429182923e-03,-9.363262274741435132e-03 +-3.771097729446636349e-01,4.663085295868108632e-03,2.042708982462604697e-02,-1.406481283478606265e-03,2.026847590414267847e-02,5.553617926434906496e-03,-9.272397282273740771e-03 +-3.751678892138092802e-01,4.633214996155272068e-03,2.023605370011598639e-02,-1.399890657355142530e-03,2.023576109235021223e-02,5.533044981179525632e-03,-9.182283541123395509e-03 +-3.732243576166232035e-01,4.603377622813159195e-03,2.004652589607399185e-02,-1.393342374786121947e-03,2.020316783232954660e-02,5.512294749503675317e-03,-9.092919576339250284e-03 +-3.712791866897643955e-01,4.573572930579619146e-03,1.985850331543489652e-02,-1.386836324330455086e-03,2.017069716836983675e-02,5.491369531833142047e-03,-9.004305642608519122e-03 +-3.693323849770918654e-01,4.543801677714114952e-03,1.967198308508413818e-02,-1.380372399509313154e-03,2.013835013536595039e-02,5.470271055273658554e-03,-8.916439073154517128e-03 +-3.673839610296283364e-01,4.514065784785294307e-03,1.948696063981042795e-02,-1.373950485710353315e-03,2.010612775700361399e-02,5.449001147290180244e-03,-8.829317883803984579e-03 +-3.654339234055213326e-01,4.484367195507386709e-03,1.930343036030149687e-02,-1.367570474715858880e-03,2.007403104965025872e-02,5.427559951724975074e-03,-8.742939616039897832e-03 +-3.634822806700064302e-01,4.454709382034544897e-03,1.912138559007880295e-02,-1.361232254312270441e-03,2.004206101833244658e-02,5.405949077716310988e-03,-8.657301368771266187e-03 +-3.615290413953694548e-01,4.425094464299221542e-03,1.894081877646556014e-02,-1.354935719361252677e-03,2.001021866340795680e-02,5.384166499830138342e-03,-8.572399618360938486e-03 +-3.595742141609086784e-01,4.395527599929367236e-03,1.876172189416523001e-02,-1.348680757141322399e-03,1.997850497658117466e-02,5.362215064394726567e-03,-8.488229526360312888e-03 +-3.576178075528972378e-01,4.366011706405491764e-03,1.858408513113861057e-02,-1.342467262350785593e-03,1.994692093338671854e-02,5.340092843719179000e-03,-8.404787520030465239e-03 +-3.556598301645456095e-01,4.336550724874201242e-03,1.840789904728310292e-02,-1.336295123542686862e-03,1.991546751330997012e-02,5.317799480466087712e-03,-8.322067372787948780e-03 +-3.537002905959633070e-01,4.307149855233072429e-03,1.823315183388633451e-02,-1.330164236862903639e-03,1.988414567581026429e-02,5.295336828986228067e-03,-8.240065479704503093e-03 +-3.517391974541219102e-01,4.277812117259600228e-03,1.805983302714050595e-02,-1.324074493677239667e-03,1.985295637434387681e-02,5.272702683951061356e-03,-8.158774038207007584e-03 +-3.497765593528163741e-01,4.248541991948065998e-03,1.788792952649684645e-02,-1.318025788721637276e-03,1.982190055699867282e-02,5.249897761740042468e-03,-8.078188661186716524e-03 +-3.478123849126281697e-01,4.219343422591797024e-03,1.771742936445448377e-02,-1.312018014372967091e-03,1.979097915886130279e-02,5.226922084696075839e-03,-7.998301458546422382e-03 +-3.458466827608864258e-01,4.190219970916378781e-03,1.754831808024193834e-02,-1.306051069597660014e-03,1.976019310744735191e-02,5.203775385409560063e-03,-7.919107986422894638e-03 +-3.438794615316307368e-01,4.161174782392951764e-03,1.738058373826601236e-02,-1.300124847444383272e-03,1.972954332353553039e-02,5.180457133940858695e-03,-7.840597986314203419e-03 +-3.419107298655730265e-01,4.132211382849788547e-03,1.721421000872136106e-02,-1.294239244991084040e-03,1.969903072021638585e-02,5.156968105780289531e-03,-7.762768504749787725e-03 +-3.399404964100594673e-01,4.103332649656439221e-03,1.704918443628626254e-02,-1.288394163632652998e-03,1.966865619876689147e-02,5.133308510434400965e-03,-7.685608531219171972e-03 +-3.379687698190326217e-01,4.074541036476575333e-03,1.688549097751973135e-02,-1.282589496391878768e-03,1.963842065510996993e-02,5.109478455111951360e-03,-7.609113127117193322e-03 +-3.359955587529934729e-01,4.045838763985910511e-03,1.672311510738459794e-02,-1.276825148707899605e-03,1.960832497648600653e-02,5.085478330827571981e-03,-7.533273981542943330e-03 +-3.340208718789632880e-01,4.017228384378589146e-03,1.656204173701174634e-02,-1.271101018695897002e-03,1.957837004194269703e-02,5.061309886705395687e-03,-7.458083439168661406e-03 +-3.320447178704457047e-01,3.988711295603309011e-03,1.640225601166424643e-02,-1.265417007425917537e-03,1.954855671598845421e-02,5.036973448801766728e-03,-7.383533120620808764e-03 +-3.300671054073883171e-01,3.960288072123457556e-03,1.624374202967125355e-02,-1.259773014223318976e-03,1.951888586733255271e-02,5.012468535263305859e-03,-7.309616432850205882e-03 +-3.280880431761448723e-01,3.931961433405484554e-03,1.608648452744780577e-02,-1.254168946193080867e-03,1.948935834587007740e-02,4.987799408308869580e-03,-7.236325510342926821e-03 +-3.261075398694370242e-01,3.903730152340280802e-03,1.593046847107319752e-02,-1.248604705982483574e-03,1.945997499805374176e-02,4.962963631299920249e-03,-7.163652073166128925e-03 +-3.241256041863160853e-01,3.875596405028677079e-03,1.577567866528395424e-02,-1.243080196953126907e-03,1.943073666464607829e-02,4.937965822057611108e-03,-7.091588244242027009e-03 +-3.221422448321247800e-01,3.847558937132963732e-03,1.562209942062777086e-02,-1.237595324338742900e-03,1.940164417282610326e-02,4.912804806982243831e-03,-7.020127248845322958e-03 +-3.201574705184592196e-01,3.819618649682791391e-03,1.546971616093622930e-02,-1.232149995383092665e-03,1.937269835124684828e-02,4.887484057721014699e-03,-6.949260468302276804e-03 +-3.181712899631304881e-01,3.791775127499631345e-03,1.531851313323893786e-02,-1.226744116985583862e-03,1.934390000779243057e-02,4.862005141615539580e-03,-6.878981793290410950e-03 +-3.161837118901262844e-01,3.764027133331417606e-03,1.516847622978820254e-02,-1.221377601306237928e-03,1.931524995468423059e-02,4.836368605641347580e-03,-6.809282384604093200e-03 +-3.141947450295727307e-01,3.736374747344313376e-03,1.501959071094592472e-02,-1.216050352310439056e-03,1.928674898993321118e-02,4.810577943972655160e-03,-6.740154987165388432e-03 +-3.122043981176959027e-01,3.708816098747454420e-03,1.487184164109076230e-02,-1.210762283345513928e-03,1.925839790783685085e-02,4.784633424703068662e-03,-6.671593150160959156e-03 +-3.102126798967836940e-01,3.681350999345517263e-03,1.472521549647258510e-02,-1.205513302750955886e-03,1.923019749423963975e-02,4.758538857335898112e-03,-6.603588243127345159e-03 +-3.082195991151471803e-01,3.653977744731780244e-03,1.457969733732052131e-02,-1.200303321901020816e-03,1.920214852410273565e-02,4.732295550092018022e-03,-6.536134782906254451e-03 +-3.062251645270823164e-01,3.626695091127821093e-03,1.443527359379734830e-02,-1.195132256826228912e-03,1.917425176814580370e-02,4.705905972384196410e-03,-6.469225228749376534e-03 +-3.042293848928312450e-01,3.599501126330457122e-03,1.429193120955490719e-02,-1.190000018029336790e-03,1.914650798926494785e-02,4.679371594854747803e-03,-6.402851601834994369e-03 +-3.022322689785442162e-01,3.572394656349027215e-03,1.414965596102981706e-02,-1.184906518174455060e-03,1.911891794584378440e-02,4.652695447937960256e-03,-6.337008645209164619e-03 +-3.002338255562408409e-01,3.545374161109814733e-03,1.400843521739189058e-02,-1.179851673998324373e-03,1.909148238183115340e-02,4.625880124181306480e-03,-6.271688653285549010e-03 +-2.982340634037716764e-01,3.518436963787428123e-03,1.386825560786576587e-02,-1.174835398236919511e-03,1.906420204214578570e-02,4.598926183548924032e-03,-6.206885845918779995e-03 +-2.962329913047793695e-01,3.491582571968160353e-03,1.372910427775477123e-02,-1.169857612057029900e-03,1.903707766070969476e-02,4.571838382911763872e-03,-6.142594010336215743e-03 +-2.942306180486604639e-01,3.464808238209806258e-03,1.359096942177647101e-02,-1.164918229343588638e-03,1.901010996083185231e-02,4.544617319388043718e-03,-6.078805495062116794e-03 +-2.922269524305266541e-01,3.438112136097111558e-03,1.345383778244451344e-02,-1.160017168996076197e-03,1.898329966700545990e-02,4.517265355460906712e-03,-6.015515891564733424e-03 +-2.902220032511660386e-01,3.411492971329204912e-03,1.331769804990400562e-02,-1.155154350858214910e-03,1.895664749040080685e-02,4.489785862075029894e-03,-5.952717648619627357e-03 +-2.882157793170046500e-01,3.384948349616051424e-03,1.318253798282688002e-02,-1.150329692139045144e-03,1.893015413595839844e-02,4.462180133571407893e-03,-5.890405457739212453e-03 +-2.862082894400676536e-01,3.358476687583377817e-03,1.304834594826071587e-02,-1.145543113900912308e-03,1.890382030415835302e-02,4.434450948220244881e-03,-5.828573351472606315e-03 +-2.841995424379405444e-01,3.332075987803843377e-03,1.291511036414676010e-02,-1.140794536907093100e-03,1.887764668521401887e-02,4.406600251557652413e-03,-5.767215733134573898e-03 +-2.821895471337307892e-01,3.305744093579030592e-03,1.278282018370431421e-02,-1.136083882899966866e-03,1.885163396897428234e-02,4.378629616636184156e-03,-5.706326452013224006e-03 +-2.801783123560285804e-01,3.279480312097095979e-03,1.265146405620193142e-02,-1.131411072385705204e-03,1.882578283063874799e-02,4.350543272411373091e-03,-5.645900359793178461e-03 +-2.781658469388685329e-01,3.253281188460446262e-03,1.252103103557638576e-02,-1.126776030454560004e-03,1.880009394347549254e-02,4.322340167206138728e-03,-5.585931964352300848e-03 +-2.761521597216903823e-01,3.227146606903652515e-03,1.239151076188049587e-02,-1.122178677625211085e-03,1.877456797699554245e-02,4.294025397938900507e-03,-5.526415071846275257e-03 +-2.741372595493007380e-01,3.201074212350100111e-03,1.226289191942720076e-02,-1.117618938786223148e-03,1.874920558314282018e-02,4.265599741770207017e-03,-5.467345660048771906e-03 +-2.721211552718337257e-01,3.175062271238027894e-03,1.213516453481444830e-02,-1.113096739422919862e-03,1.872400741897756182e-02,4.237065013406707964e-03,-5.408717551978823278e-03 +-2.701038557447121846e-01,3.149109791330812706e-03,1.200831780872852611e-02,-1.108612006021204507e-03,1.869897412574644813e-02,4.208424284773418060e-03,-5.350526464287924493e-03 +-2.680853698286093101e-01,3.123214407857874235e-03,1.188234243540009852e-02,-1.104164663757311194e-03,1.867410634435268113e-02,4.179677917794720997e-03,-5.292765641624028704e-03 +-2.660657063894090180e-01,3.097375532667113410e-03,1.175722753712299395e-02,-1.099754636313706032e-03,1.864940470645332776e-02,4.150829467257994426e-03,-5.235431588315719517e-03 +-2.640448742981674202e-01,3.071590953178908543e-03,1.163296336453037254e-02,-1.095381854630548711e-03,1.862486983690542219e-02,4.121879302841741435e-03,-5.178518989947029742e-03 +-2.620228824310735782e-01,3.045859631101465511e-03,1.150954013491356509e-02,-1.091046245846929216e-03,1.860050235837197585e-02,4.092829857254479294e-03,-5.122022876852212747e-03 +-2.599997396694109231e-01,3.020180612544901434e-03,1.138694821280252824e-02,-1.086747739843357688e-03,1.857630288125254950e-02,4.063683583779234380e-03,-5.065938271644059541e-03 +-2.579754548995178420e-01,2.994552106412304033e-03,1.126517807880134808e-02,-1.082486264212276748e-03,1.855227201121270944e-02,4.034441230769367746e-03,-5.010260234694823359e-03 +-2.559500370127490432e-01,2.968972961605040664e-03,1.114422017053801235e-02,-1.078261748804050513e-03,1.852841035123485233e-02,4.005104614117287187e-03,-4.954984149861632603e-03 +-2.539234949054359203e-01,2.943442673136002340e-03,1.102406491411713797e-02,-1.074074123983779355e-03,1.850471849271999661e-02,3.975676632316247142e-03,-4.900105654382364354e-03 +-2.518958374788481391e-01,2.917959262381221702e-03,1.090470318644901418e-02,-1.069923322545524762e-03,1.848119702104790776e-02,3.946157296071953896e-03,-4.845619761271155283e-03 +-2.498670736391540859e-01,2.892521698822133301e-03,1.078612593113181806e-02,-1.065809273816562984e-03,1.845784652343794227e-02,3.916548273498124053e-03,-4.791521569881642718e-03 +-2.478372122973819258e-01,2.867130130265849609e-03,1.066832376624956419e-02,-1.061731910225526248e-03,1.843466757021799962e-02,3.886853326525524510e-03,-4.737806992291641872e-03 +-2.458062623693803850e-01,2.841782162639277208e-03,1.055128784366325505e-02,-1.057691163774294833e-03,1.841166072792905367e-02,3.857071340560614976e-03,-4.684471133996954105e-03 +-2.437742327757797534e-01,2.816477338200595523e-03,1.043500906205573238e-02,-1.053686967506118699e-03,1.838882656409969452e-02,3.827204726177019841e-03,-4.631509761988119103e-03 +-2.417411324419524721e-01,2.791214847187445391e-03,1.031947850522096574e-02,-1.049719255040649580e-03,1.836616563382147127e-02,3.797255133828912610e-03,-4.578918476314522944e-03 +-2.397069702979740813e-01,2.765994011989424128e-03,1.020468712893039982e-02,-1.045787962957076987e-03,1.834367848651292932e-02,3.767224366569072846e-03,-4.526693291045580901e-03 +-2.376717552785839183e-01,2.740813191922712028e-03,1.009062669822568224e-02,-1.041893026254218316e-03,1.832136566681016529e-02,3.737112331784957914e-03,-4.474828871568720980e-03 +-2.356354963231459820e-01,2.715672637653394634e-03,9.977288179831188958e-03,-1.038034378987836084e-03,1.829922771608863408e-02,3.706922398443385138e-03,-4.423321543861097674e-03 +-2.335982023756095483e-01,2.690571236869919620e-03,9.864662997651050050e-03,-1.034211959181111648e-03,1.827726515801745416e-02,3.676655299826116653e-03,-4.372166936192711274e-03 +-2.315598823844699505e-01,2.665507047263053399e-03,9.752742784521772207e-03,-1.030425702365303032e-03,1.825547852760274828e-02,3.646310137935463661e-03,-4.321360440668995870e-03 +-2.295205453027292508e-01,2.640481370213765538e-03,9.641518787913997898e-03,-1.026675546643253346e-03,1.823386834271529047e-02,3.615892025952715562e-03,-4.270898324210684743e-03 +-2.274802000878569097e-01,2.615491833286828164e-03,9.530983002972481452e-03,-1.022961428955513220e-03,1.821243511488757766e-02,3.585399122340150561e-03,-4.220775600977270080e-03 +-2.254388557017504569e-01,2.590538445033331669e-03,9.421126469165245002e-03,-1.019283287577305670e-03,1.819117935710597503e-02,3.554833987707380159e-03,-4.170989223631640062e-03 +-2.233965211106962168e-01,2.565620454901969185e-03,9.311941061533320768e-03,-1.015641065172704759e-03,1.817010157029841713e-02,3.524197700922293477e-03,-4.121534722264378128e-03 +-2.213532052853296739e-01,2.540737298847218183e-03,9.203419039822744938e-03,-1.012034699906438993e-03,1.814920225119635161e-02,3.493491635785619075e-03,-4.072407047811662725e-03 +-2.193089172005963650e-01,2.515888110532190150e-03,9.095551997439400502e-03,-1.008464128735437606e-03,1.812848189114983286e-02,3.462716547782952042e-03,-4.023602540779264505e-03 +-2.172636658357122441e-01,2.491072736953172089e-03,8.988331404103013650e-03,-1.004929294106396680e-03,1.810794097426157018e-02,3.431874481609744395e-03,-3.975117912167049593e-03 +-2.152174601741243254e-01,2.466289755819076144e-03,8.881749676968687252e-03,-1.001430137452903645e-03,1.808757998055639596e-02,3.400965056515725411e-03,-3.926948233602687958e-03 +-2.131703092034713254e-01,2.441539335233408690e-03,8.775798287738048251e-03,-9.979666011130303160e-04,1.806739938626704400e-02,3.369990830928626478e-03,-3.879090485090834657e-03 +-2.111222219155439450e-01,2.416820465682872118e-03,8.670469811217102266e-03,-9.945386286559611613e-04,1.804739965725318113e-02,3.338952114625218671e-03,-3.831539712734938034e-03 +-2.090732073062456231e-01,2.392132830561471100e-03,8.565755806366160893e-03,-9.911461626918697827e-04,1.802758125820867405e-02,3.307850478290982540e-03,-3.784293000582924539e-03 +-2.070232743755528182e-01,2.367475771649461304e-03,8.461648944733789895e-03,-9.877891492742769518e-04,1.800794464390684940e-02,3.276686815679324440e-03,-3.737345478724237759e-03 +-2.049724321274755956e-01,2.342848610460558211e-03,8.358141418061851985e-03,-9.844675289023335427e-04,1.798849026791381894e-02,3.245461947841299637e-03,-3.690693307904960307e-03 +-2.029206895700181590e-01,2.318251020371502567e-03,8.255225252414810991e-03,-9.811812471091020844e-04,1.796921857566221442e-02,3.214177322472104061e-03,-3.644333089434345380e-03 +-2.008680557151390489e-01,2.293682454372327970e-03,8.152892713947286832e-03,-9.779302495169475869e-04,1.795013000707300702e-02,3.182833942267569889e-03,-3.598261103334987002e-03 +-1.988145395787117298e-01,2.269142132335924371e-03,8.051136407351276378e-03,-9.747144831643725577e-04,1.793122499767564951e-02,3.151432342346807604e-03,-3.552473122737382326e-03 +-1.967601501804849828e-01,2.244629925644530798e-03,7.949948445878438422e-03,-9.715338935307113455e-04,1.791250397732349620e-02,3.119974250001477170e-03,-3.506965968932712618e-03 +-1.947048965440432433e-01,2.220144672968822879e-03,7.849321402813072210e-03,-9.683884316278775764e-04,1.789396737100634391e-02,3.088459427717145551e-03,-3.461735726278567741e-03 +-1.926487876967668822e-01,2.195687001013282760e-03,7.749248468490752516e-03,-9.652780409973161883e-04,1.787561559829284677e-02,3.056890962848818512e-03,-3.416777446732046503e-03 +-1.905918326697928489e-01,2.171255493412132739e-03,7.649721412167526623e-03,-9.622026688887388860e-04,1.785744906704367016e-02,3.025268085125235647e-03,-3.372088970090240424e-03 +-1.885340404979746198e-01,2.146849715031372806e-03,7.550732998538658980e-03,-9.591622664740311575e-04,1.783946819218107036e-02,2.993591837862720484e-03,-3.327666391760537162e-03 +-1.864754202198428135e-01,2.122469762929542550e-03,7.452276061022977335e-03,-9.561567841078201209e-04,1.782167337229891707e-02,2.961864236567715320e-03,-3.283505799211778719e-03 +-1.844159808775653053e-01,2.098114602702982456e-03,7.354343583437355017e-03,-9.531861698947970878e-04,1.780406500664276359e-02,2.930085149997931418e-03,-3.239603117525628306e-03 +-1.823557315169075932e-01,2.073783980389314920e-03,7.256928142414290593e-03,-9.502503726160305099e-04,1.778664348665301084e-02,2.898255885574483725e-03,-3.195955153485413505e-03 +-1.802946811871931343e-01,2.049477573485481395e-03,7.160022094119999382e-03,-9.473493451130971703e-04,1.776940920137916324e-02,2.866377597764987930e-03,-3.152559244122674856e-03 +-1.782328389412633773e-01,2.025194978641575022e-03,7.063618763130377273e-03,-9.444830426595153278e-04,1.775236252750230642e-02,2.834451265427246573e-03,-3.109411026056537863e-03 +-1.761702138354382385e-01,2.000935077113158433e-03,6.967711282173525561e-03,-9.416514143479885919e-04,1.773550384747586348e-02,2.802476502383582143e-03,-3.066506609740306949e-03 +-1.741068149294762724e-01,1.976698522905293107e-03,6.872292236685097200e-03,-9.388544142208900441e-04,1.771883352889324151e-02,2.770456223006960688e-03,-3.023843244318385852e-03 +-1.720426512865346202e-01,1.952483534220121131e-03,6.777355019923925888e-03,-9.360919942836314544e-04,1.770235194202291959e-02,2.738388751315513295e-03,-2.981416775260469290e-03 +-1.699777319731296532e-01,1.928291353321979909e-03,6.682892234048459605e-03,-9.333641100042095210e-04,1.768605944561121163e-02,2.706278058594661137e-03,-2.939224640830200366e-03 +-1.679120660590968095e-01,1.904119708691886766e-03,6.588897853913370378e-03,-9.306707134986608480e-04,1.766995639690302142e-02,2.674121505171432253e-03,-2.897261813299447254e-03 +-1.658456626175507931e-01,1.879970160768154337e-03,6.495363828073936335e-03,-9.280117591864010980e-04,1.765404314780393003e-02,2.641923613824358977e-03,-2.855527170528296410e-03 +-1.637785307248458277e-01,1.855840582425618678e-03,6.402284098640778978e-03,-9.253872077058273183e-04,1.763832004015839991e-02,2.609681971117018811e-03,-2.814015957063198663e-03 +-1.617106794605358555e-01,1.831731380483074155e-03,6.309652172897499932e-03,-9.227970100150752978e-04,1.762278742197596199e-02,2.577398892767864685e-03,-2.772724335804843416e-03 +-1.596421179073343188e-01,1.807642088829609396e-03,6.217460483663866076e-03,-9.202411245530572404e-04,1.760744562601978050e-02,2.545075036133717054e-03,-2.731650585657016336e-03 +-1.575728551510746644e-01,1.783572452468612686e-03,6.125703084798708557e-03,-9.177195102734909959e-04,1.759229498428316676e-02,2.512711433023843716e-03,-2.690790045723217942e-03 +-1.555029002806702365e-01,1.759521590455284494e-03,6.034373244879424024e-03,-9.152321217870711860e-04,1.757733582291906571e-02,2.480307920410746922e-03,-2.650139625523476039e-03 +-1.534322623880743364e-01,1.735490132639755639e-03,5.943464070704352828e-03,-9.127789182608156329e-04,1.756256846356666484e-02,2.447867147194977833e-03,-2.609696634333776803e-03 +-1.513609505682403933e-01,1.711476523291847757e-03,5.852969050071906223e-03,-9.103598598914823623e-04,1.754799321843951432e-02,2.415387643785982599e-03,-2.569457761669747490e-03 +-1.492889739190818854e-01,1.687480809779777838e-03,5.762881792931138364e-03,-9.079749074805954206e-04,1.753361040777616162e-02,2.382870926104111841e-03,-2.529419561118888965e-03 +-1.472163415414325383e-01,1.663503310423132065e-03,5.673196168310312010e-03,-9.056240187134788474e-04,1.751942033092796641e-02,2.350318998108840048e-03,-2.489578191801815853e-03 +-1.451430625390061624e-01,1.639542193642914113e-03,5.583905220590911107e-03,-9.033071532087166584e-04,1.750542329533699704e-02,2.317729813074566585e-03,-2.449931448808472401e-03 +-1.430691460183568520e-01,1.615598890744627571e-03,5.495002607486542540e-03,-9.010242745908644216e-04,1.749161959738309430e-02,2.285107422752607857e-03,-2.410476065309120029e-03 +-1.409946010888388224e-01,1.591671747726795642e-03,5.406482319769613347e-03,-8.987753438871421644e-04,1.747800952635720356e-02,2.252450074978078459e-03,-2.371208236407595839e-03 +-1.389194368625665810e-01,1.567760913388158584e-03,5.318337862109622644e-03,-8.965603214977944458e-04,1.746459337372592915e-02,2.219759379122942643e-03,-2.332125153477068724e-03 +-1.368436624543747093e-01,1.543865775458662078e-03,5.230562752985949866e-03,-8.943791711615039662e-04,1.745137142222770443e-02,2.187035499208401981e-03,-2.293224065169219804e-03 +-1.347672869817779229e-01,1.519986501056825017e-03,5.143150812825274948e-03,-8.922318573556155957e-04,1.743834395156100558e-02,2.154280042609694589e-03,-2.254501735614575231e-03 +-1.326903195649310474e-01,1.496122575723042927e-03,5.056096042993745224e-03,-8.901183437764906368e-04,1.742551123154667536e-02,2.121493321043213184e-03,-2.215954669699088733e-03 +-1.306127693265889400e-01,1.472273231935662370e-03,4.969392078764802399e-03,-8.880385933400054485e-04,1.741287353449808353e-02,2.088675154497262113e-03,-2.177580137228406296e-03 +-1.285346453920664100e-01,1.448438750487692850e-03,4.883032648183693175e-03,-8.859925730187593381e-04,1.740043112506564280e-02,2.055827311603812286e-03,-2.139375310942336333e-03 +-1.264559568891981123e-01,1.424618626422329047e-03,4.797012053538541460e-03,-8.839802467502746487e-04,1.738818426145849472e-02,2.022950071109232140e-03,-2.101336362299972664e-03 +-1.243767129482984818e-01,1.400812398093709017e-03,4.711323688999752117e-03,-8.820015803209506778e-04,1.737613319945957233e-02,1.990043776785228955e-03,-2.063461239305051410e-03 +-1.222969227021216548e-01,1.377019932953390483e-03,4.625961711249966524e-03,-8.800565419459420533e-04,1.736427819043598331e-02,1.957109373614434808e-03,-2.025746532655919516e-03 +-1.202165952858213066e-01,1.353240716210252258e-03,4.540920092997270847e-03,-8.781450985326792920e-04,1.735261948035918961e-02,1.924147074380858861e-03,-1.988189248905026379e-03 +-1.181357398369105166e-01,1.329474696958250638e-03,4.456192878880135648e-03,-8.762672174376453810e-04,1.734115731267679011e-02,1.891157946540354858e-03,-1.950786329179449684e-03 +-1.160543654952217590e-01,1.305721711776799965e-03,4.371773882828234800e-03,-8.744228674070286074e-04,1.732989192099300760e-02,1.858142831744875358e-03,-1.913535215179898268e-03 +-1.139724814028665600e-01,1.281980600282257333e-03,4.287657360803794233e-03,-8.726120182463187758e-04,1.731882354165668880e-02,1.825100688098939012e-03,-1.876432586379741347e-03 +-1.118900967041955297e-01,1.258252205019277882e-03,4.203837611686953891e-03,-8.708346372340717342e-04,1.730795240436391186e-02,1.792034214995619906e-03,-1.839475106921977920e-03 +-1.098072205457580330e-01,1.234535877848311550e-03,4.120308318264438718e-03,-8.690906930751748222e-04,1.729727872793243929e-02,1.758943303178505529e-03,-1.802660659857445143e-03 +-1.077238620762621668e-01,1.210830739458303813e-03,4.037063458559621287e-03,-8.673801596866106750e-04,1.728680273516873186e-02,1.725827396554500265e-03,-1.765986637049311511e-03 +-1.056400304465344858e-01,1.187137426757124649e-03,3.954097724213768116e-03,-8.657030081219478862e-04,1.727652464191318377e-02,1.692688768989202095e-03,-1.729449153358015179e-03 +-1.035557348094797714e-01,1.163455438146462993e-03,3.871405124669719065e-03,-8.640592079955708828e-04,1.726644465369871900e-02,1.659527546893238429e-03,-1.693045662412087432e-03 +-1.014709843200409523e-01,1.139783961626340087e-03,3.788979526611429855e-03,-8.624487324817323121e-04,1.725656297895765412e-02,1.626343262412444134e-03,-1.656773943843635900e-03 +-9.938578813515881738e-02,1.116123142481827884e-03,3.706815316157862934e-03,-8.608715573955982425e-04,1.724687982087567267e-02,1.593137230583874372e-03,-1.620630860304716429e-03 +-9.730015541373175647e-02,1.092472870619401156e-03,3.624907172153143866e-03,-8.593276539525786196e-04,1.723739537564235313e-02,1.559910275991907786e-03,-1.584612781352331794e-03 +-9.521409531657563929e-02,1.068832530265337744e-03,3.543248903377468027e-03,-8.578169954972417839e-04,1.722810983513470978e-02,1.526662263689472195e-03,-1.548717762280597604e-03 +-9.312761700638348683e-02,1.045202034406808632e-03,3.461835093487037902e-03,-8.563395565936787138e-04,1.721902339076483018e-02,1.493394039128147872e-03,-1.512942458578667203e-03 +-9.104072964768519793e-02,1.021581443667551620e-03,3.380659872230261809e-03,-8.548953117321542432e-04,1.721013622296206289e-02,1.460106713862886632e-03,-1.477284428656704899e-03 +-8.895344240680749803e-02,9.979698064895964858e-04,3.299717704235022916e-03,-8.534842363130575038e-04,1.720144851381548434e-02,1.426799491248548647e-03,-1.441740653542592605e-03 +-8.686576445183338824e-02,9.743674473211884086e-04,3.219002756020999164e-03,-8.521063068649225138e-04,1.719296043893966258e-02,1.393473956959728364e-03,-1.406308722496408615e-03 +-8.477770495256206629e-02,9.507735207743913015e-04,3.138509600639707506e-03,-8.507615002304762990e-04,1.718467217215071535e-02,1.360129493501193755e-03,-1.370985510693494567e-03 +-8.268927308046866709e-02,9.271888434909345564e-04,3.058232602969740248e-03,-8.494497934485877077e-04,1.717658387882778359e-02,1.326768590088474049e-03,-1.335768330353340801e-03 +-8.060047800866380896e-02,9.036120912679459268e-04,2.978166328007803183e-03,-8.481711623162867079e-04,1.716869572026364504e-02,1.293389712613886777e-03,-1.300654164092291017e-03 +-7.851132891185348683e-02,8.800437421476151219e-04,2.898304574422336938e-03,-8.469255874083210945e-04,1.716100785741144288e-02,1.259994697289661082e-03,-1.265641475771841053e-03 +-7.642183496629867401e-02,8.564831393918027934e-04,2.818642718437124137e-03,-8.457130479240649971e-04,1.715352044251154673e-02,1.226583243859689660e-03,-1.230725814475089590e-03 +-7.433200534977503493e-02,8.329303447287193752e-04,2.739174695063843756e-03,-8.445335200557759631e-04,1.714623362735671550e-02,1.193156390799047541e-03,-1.195905473521093791e-03 +-7.224184924153258247e-02,8.093850825629696211e-04,2.659894943874817168e-03,-8.433869858887181083e-04,1.713914755525183797e-02,1.159714535991003374e-03,-1.161177840923752072e-03 +-7.015137582225546009e-02,7.858469014019249825e-04,2.580798023358811446e-03,-8.422734271582097820e-04,1.713226236885497661e-02,1.126257739610055791e-03,-1.126540121274039949e-03 +-6.806059427402150197e-02,7.623154765337913311e-04,2.501879010270089722e-03,-8.411928225415545544e-04,1.712557820736578668e-02,1.092786291939109276e-03,-1.091988585832289914e-03 +-6.596951378026195967e-02,7.387909034297768631e-04,2.423131434217826435e-03,-8.401451533577061325e-04,1.711909520545089325e-02,1.059301263814798255e-03,-1.057522443961810216e-03 +-6.387814352572118715e-02,7.152729995718514833e-04,2.344550521598336070e-03,-8.391304057668348432e-04,1.711281349024488391e-02,1.025803197609720949e-03,-1.023137759547934015e-03 +-6.178649269641621478e-02,6.917610870603216723e-04,2.266130594934168650e-03,-8.381485604314783911e-04,1.710673318789596858e-02,9.922917001231239351e-04,-9.888323261305720195e-04 +-5.969457047959648988e-02,6.682554188239046801e-04,2.187866593102728071e-03,-8.371996019905839602e-04,1.710085442157094002e-02,9.587681167763251883e-04,-9.546028153157591666e-04 +-5.760238606370348541e-02,6.447561606709381610e-04,2.109752891662017554e-03,-8.362835104694760994e-04,1.709517730224113449e-02,9.252336232362876880e-04,-9.204469874466785721e-04 +-5.550994863833031562e-02,6.212617231033640925e-04,2.031783772432073278e-03,-8.354002739806668529e-04,1.708970194493900779e-02,8.916860979001255474e-04,-8.863628100761631809e-04 +-5.341726739418143494e-02,5.977730397473489778e-04,1.953954273807970154e-03,-8.345498773117973959e-04,1.708442846226356296e-02,8.581281440685459326e-04,-8.523468624413305106e-04 +-5.132435152303221199e-02,5.742895078098032846e-04,1.876259135744128307e-03,-8.337323052505965070e-04,1.707935695514629232e-02,8.245594786636935248e-04,-8.183963124566629626e-04 +-4.923121021768859379e-02,5.508107994088781711e-04,1.798692776104984922e-03,-8.329475410350632193e-04,1.707448752704090328e-02,7.909803285310887735e-04,-7.845089599679536032e-04 +-4.713785267194667977e-02,5.273368757768139101e-04,1.721249492332535633e-03,-8.321955754177095578e-04,1.706982027432824012e-02,7.573914574776349296e-04,-7.506828574705039095e-04 +-4.504428808055239986e-02,5.038676886565878832e-04,1.643924630286941512e-03,-8.314763946375710408e-04,1.706535528910581995e-02,7.237936065104558182e-04,-7.169141174245305219e-04 +-4.295052563916110233e-02,4.804028525976894969e-04,1.566712385921319260e-03,-8.307899864883782530e-04,1.706109265775446887e-02,6.901868796518256322e-04,-6.832010329038953542e-04 +-4.085657454429714869e-02,4.569421269108194222e-04,1.489608128353814894e-03,-8.301363372922674483e-04,1.705703246602900333e-02,6.565716478209767662e-04,-6.495397208755530200e-04 +-3.876244399331353618e-02,4.334856886539072292e-04,1.412605535852473441e-03,-8.295154348499450764e-04,1.705317479130001937e-02,6.229490608900855372e-04,-6.159294909429745106e-04 +-3.666814318435148573e-02,4.100323283640498794e-04,1.335699627157894932e-03,-8.289272749401096606e-04,1.704951971114532575e-02,5.893176649309007139e-04,-5.823671606036762757e-04 +-3.457368131630006447e-02,3.865827778584176580e-04,1.258885921814037164e-03,-8.283718435077276968e-04,1.704606730204275694e-02,5.556796420778943287e-04,-5.488486285256266193e-04 +-3.247906758875574590e-02,3.631370761587899166e-04,1.182158302978010946e-03,-8.278491294431017129e-04,1.704281762397581376e-02,5.220358717614807234e-04,-5.153728806404752000e-04 +-3.038431120198204285e-02,3.396934074587769662e-04,1.105511819463074214e-03,-8.273591250645684608e-04,1.703977074619731844e-02,4.883837522165733209e-04,-4.819367351930565818e-04 +-2.828942135686907111e-02,3.162534085593001159e-04,1.028940852442183787e-03,-8.269018228654092780e-04,1.703692673111863870e-02,4.547271520520635791e-04,-4.485382791258278731e-04 +-2.619440725489314420e-02,2.928160782285128025e-04,9.524408146222592244e-04,-8.264772149941163092e-04,1.703428562816807629e-02,4.210649906126762176e-04,-4.151736809425995251e-04 +-2.409927809807633700e-02,2.693810873331392705e-04,8.760056317439043807e-04,-8.260852929403171346e-04,1.703184749277885171e-02,3.873974446078788246e-04,-3.818419119023266049e-04 +-2.200404308894610833e-02,2.459485684577182455e-04,7.996307378660531557e-04,-8.257260536954047188e-04,1.702961237160366056e-02,3.537255538748333055e-04,-3.485391286815286808e-04 +-1.990871143049483677e-02,2.225180873493680812e-04,7.233107281241198203e-04,-8.253994861108061116e-04,1.702758030817208210e-02,3.200492930511959788e-04,-3.152630734135611259e-04 +-1.781329232613943978e-02,1.990893544539459349e-04,6.470401162824475231e-04,-8.251055884215543427e-04,1.702575134383297467e-02,2.863689059321191505e-04,-2.820116521970632631e-04 +-1.571779497968089911e-02,1.756626018903604278e-04,5.708143362595631629e-04,-8.248443511851146588e-04,1.702412551362548002e-02,2.526856124515963254e-04,-2.487810560687441592e-04 +-1.362222859526389211e-02,1.522370537682972685e-04,4.946271056695378775e-04,-8.246157718821746103e-04,1.702270284903146386e-02,2.189987408080950498e-04,-2.155707033983047692e-04 +-1.152660237733631014e-02,1.288129728076797473e-04,4.184739816569792723e-04,-8.244198482732582314e-04,1.702148337966774561e-02,1.853095651138080002e-04,-1.823765715498982469e-04 +-9.430925530608870802e-03,1.053898412841675774e-04,3.423494801987202901e-04,-8.242565761277010922e-04,1.702046712795083835e-02,1.516178946609241158e-04,-1.491965970498282097e-04 +-7.335207260014662417e-03,8.196767543128259637e-05,2.662485955240164012e-04,-8.241259518033477137e-04,1.701965411579003939e-02,1.179245389147050212e-04,-1.160278256364293204e-04 +-5.239456770668723257e-03,5.854629093649077833e-05,1.901657480497178515e-04,-8.240279736039088278e-04,1.701904435676685270e-02,8.422993036686636135e-05,-8.286838378535121635e-05 +-3.143683267827608162e-03,3.512533829827304828e-05,1.140959831929323401e-04,-8.239626405224594226e-04,1.701863786395985470e-02,5.053419103675434876e-05,-4.971522998133120076e-05 +-1.047895956848953446e-03,1.170470949379773770e-05,3.803388277719581864e-05,-8.239299522006214571e-04,1.701843464457217930e-02,1.683789519112693995e-05,-1.656619460831709506e-05 +1.047895956848953446e-03,-1.171595095671988791e-05,-3.802537020471871687e-05,-8.239299075268406150e-04,1.701843470401125102e-02,-1.685884715985106898e-05,1.658201758587787465e-05 +3.143683267827608162e-03,-3.513658255351335334e-05,-1.140873235011214602e-04,-8.239625053500401579e-04,1.701863804170533276e-02,-5.055514611064378615e-05,4.973133111852614109e-05 +5.239456770668723257e-03,-5.855762636124268013e-05,-1.901571565769669028e-04,-8.240277474799360914e-04,1.701904465381168241e-02,-8.425105221213526725e-05,8.288436159081172380e-05 +7.335207260014662417e-03,-8.197902964175913555e-05,-2.662399789816827038e-04,-8.241256351513179213e-04,1.701965453235694536e-02,-1.179456897509463238e-04,1.160438572940391709e-04 +9.430925530608870802e-03,-1.054011917830527254e-04,-3.423410076424499536e-04,-8.242561691780202161e-04,1.702046766440498682e-02,-1.516390302623061594e-04,1.492123679424300442e-04 +1.152660237733631014e-02,-1.288243291409319152e-04,-4.184656476094095927e-04,-8.244193529433808563e-04,1.702148403557207992e-02,-1.853307013342082269e-04,1.823920941955240458e-04 +1.362222859526389211e-02,-1.522484844065454847e-04,-4.946187164092515293e-04,-8.246151885578022981e-04,1.702270362507849952e-02,-2.190200040021143412e-04,2.155863432594759977e-04 +1.571779497968089911e-02,-1.756741189059638094e-04,-5.708057707013262927e-04,-8.248436781190797863e-04,1.702412641042112471e-02,-2.527070231285156684e-04,2.487970424108442422e-04 +1.781329232613943978e-02,-1.991007883581658185e-04,-6.470320413207341281e-04,-8.251048270542803197e-04,1.702575236166248632e-02,-2.863901442216377526e-04,2.820267368719913759e-04 +1.990871143049483677e-02,-2.225295716322156394e-04,-7.233026193554201351e-04,-8.253986400449866024e-04,1.702758144613734231e-02,-3.200706072353001188e-04,3.152782419765611834e-04 +2.200404308894610833e-02,-2.459600997316095097e-04,-7.996227145266170686e-04,-8.257251214432110914e-04,1.702961363082190760e-02,-3.537469354788049317e-04,3.485541598447593876e-04 +2.409927809807633700e-02,-2.693927472683030344e-04,-8.759976171708792848e-04,-8.260842766775744472e-04,1.703184887338748954e-02,-3.874190447262404642e-04,3.818569517359500032e-04 +2.619440725489314420e-02,-2.928276999162520789e-04,-9.524325134240104644e-04,-8.264761114576140051e-04,1.703428713153117002e-02,-4.210864942379833166e-04,4.151892855639960594e-04 +2.828942135686907111e-02,-3.162651192671200873e-04,-1.028932759834279460e-03,-8.269006325171599468e-04,1.703692835584345172e-02,-4.547487956945492479e-04,4.485535222564926362e-04 +3.038431120198204285e-02,-3.397053976543186948e-04,-1.105503618260040904e-03,-8.273578479075439007e-04,1.703977249607827385e-02,-4.884058909130973344e-04,4.819522138639368777e-04 +3.247906758875574590e-02,-3.631483611859548833e-04,-1.182150113134888261e-03,-8.278477651292161059e-04,1.704281949539038568e-02,-5.220566572144001065e-04,5.153883721961254613e-04 +3.457368131630006447e-02,-3.865944971893161175e-04,-1.258877782350703795e-03,-8.283703914880708417e-04,1.704606929266741164e-02,-5.557012086747188054e-04,5.488640620839243004e-04 +3.666814318435148573e-02,-4.100440925282829749e-04,-1.335691870431537139e-03,-8.289257383179742455e-04,1.704952182547377071e-02,-5.893392803272706499e-04,5.823819152602740168e-04 +3.876244399331353618e-02,-4.334970336069394700e-04,-1.412597312319214355e-03,-8.295138130361821458e-04,1.705317702539390495e-02,-6.229698528442382827e-04,6.159451618994692124e-04 +4.085657454429714869e-02,-4.569538290620872253e-04,-1.489599832115865666e-03,-8.301346260045229086e-04,1.705703481971561700e-02,-6.565930700000000659e-04,6.495555715270974690e-04 +4.295052563916110233e-02,-4.804145412806691291e-04,-1.566704387353783524e-03,-8.307881876977906343e-04,1.706109513342030787e-02,-6.902082349550532321e-04,6.832163711073505237e-04 +4.504428808055239986e-02,-5.038793797000867391e-04,-1.643916656124997473e-03,-8.314745097931960623e-04,1.706535788565971198e-02,-7.238149224570855763e-04,7.169294578321427957e-04 +4.713785267194667977e-02,-5.273485869773575276e-04,-1.721241557424899949e-03,-8.321936045630943081e-04,1.706982299225209831e-02,-7.574127652915086089e-04,7.506981745769398737e-04 +4.923121021768859379e-02,-5.508224676715465281e-04,-1.798684588096303039e-03,-8.329454830666352784e-04,1.707449036572368864e-02,-7.910015077561848061e-04,7.845248042714705883e-04 +5.132435152303221199e-02,-5.743009427758151257e-04,-1.876250935175215575e-03,-8.337301576729267728e-04,1.707935991309329077e-02,-8.245801701994903482e-04,8.184122352777997624e-04 +5.341726739418143494e-02,-5.977846781078626891e-04,-1.953946282719737620e-03,-8.345476417744364817e-04,1.708443153828441766e-02,-8.581491645830780684e-04,8.523624498052317531e-04 +5.550994863833031562e-02,-6.212732467479815165e-04,-2.031775678257647516e-03,-8.353979507346151511e-04,1.708970514094903040e-02,-8.917068487425261611e-04,8.863786503939220449e-04 +5.760238606370348541e-02,-6.447673956787128595e-04,-2.109744520400413936e-03,-8.362810970263273190e-04,1.709518061376152573e-02,-9.252537761793988699e-04,9.204634093306978630e-04 +5.969457047959648988e-02,-6.682672646578859425e-04,-2.187858507747315103e-03,-8.371970974508566570e-04,1.710085785111325682e-02,-9.587893561214813013e-04,9.546187655251101674e-04 +6.178649269641621478e-02,-6.917729629069591405e-04,-2.266122661039696694e-03,-8.381459683839253411e-04,1.710673673898317759e-02,-9.923129347471663572e-04,9.888480590850312092e-04 +6.387814352572118715e-02,-7.152847906988312300e-04,-2.344542439041876012e-03,-8.391277245903189025e-04,1.711281716175834522e-02,-1.025824210221334748e-03,1.023153840340472291e-03 +6.596951378026195967e-02,-7.388027708625470721e-04,-2.423123395962070541e-03,-8.401423831655782529e-04,1.711909899692732120e-02,-1.059322354141940727e-03,1.057538513125338454e-03 +6.806059427402150197e-02,-7.623273762864687074e-04,-2.501870810378077365e-03,-8.411899614425580535e-04,1.712558211958213616e-02,-1.092807375452391726e-03,1.092005031892667385e-03 +7.015137582225546009e-02,-7.858586638969757617e-04,-2.580790214510725062e-03,-8.422704765671279892e-04,1.713226640051933661e-02,-1.126278496198232330e-03,1.126555910329965276e-03 +7.224184924153258247e-02,-8.093968705941835366e-04,-2.659887119477254581e-03,-8.433839477767242484e-04,1.713915170540062510e-02,-1.159735268814404176e-03,1.161193737689271898e-03 +7.433200534977503493e-02,-8.329425304231877214e-04,-2.739166696723221127e-03,-8.445303927963160014e-04,1.714623789770090487e-02,-1.193177795445034620e-03,1.195921777364046144e-03 +7.642183496629867401e-02,-8.564953754864834635e-04,-2.818634904452179801e-03,-8.457098305043527246e-04,1.715352483622802071e-02,-1.226604667054521924e-03,1.230741855924761012e-03 +7.851132891185348683e-02,-8.800559340996076977e-04,-2.898297036854388230e-03,-8.469222833571377279e-04,1.716101237262300516e-02,-1.260015959599169186e-03,1.265657084568531758e-03 +8.060047800866380896e-02,-9.036245157497519408e-04,-2.978158486423565770e-03,-8.481677700239463385e-04,1.716870035891358837e-02,-1.293411330642683542e-03,1.300670430901173548e-03 +8.268927308046866709e-02,-9.272011769548336004e-04,-3.058225166814921171e-03,-8.494463131593951288e-04,1.717658864025111559e-02,-1.326789954980086422e-03,1.335783927309642160e-03 +8.477770495256206629e-02,-9.507860916928169816e-04,-3.138502210003116504e-03,-8.507579345421229953e-04,1.718467705762457415e-02,-1.360151218967513374e-03,1.371001115022226933e-03 +8.686576445183338824e-02,-9.743797267002827838e-04,-3.218995444800401475e-03,-8.521026555786789976e-04,1.719296544721874917e-02,-1.393495048923109941e-03,1.406324273012184720e-03 +8.895344240680749803e-02,-9.979822003319570919e-04,-3.299710301711926679e-03,-8.534804992351643785e-04,1.720145364372212965e-02,-1.426820708678483820e-03,1.441756472747350365e-03 +9.104072964768519793e-02,-1.021593686496570638e-03,-3.380652465281302685e-03,-8.548914875902361831e-04,1.721014147364721344e-02,-1.460127556730537839e-03,1.477300356110754784e-03 +9.312761700638348683e-02,-1.045214744061234106e-03,-3.461827792249374831e-03,-8.563356455782954180e-04,1.721902876350414960e-02,-1.493415663612263989e-03,1.512958290261068013e-03 +9.521409531657563929e-02,-1.068845025675288843e-03,-3.543241706310411993e-03,-8.578129985841011565e-04,1.722811533165174991e-02,-1.526683390435436619e-03,1.548733503553458967e-03 +9.730015541373175647e-02,-1.092485436102346536e-03,-3.624899880814387576e-03,-8.593235704638403898e-04,1.723740099335566156e-02,-1.559931435791468961e-03,1.584628806791105529e-03 +9.938578813515881738e-02,-1.116135728919289598e-03,-3.706808064222923658e-03,-8.608673867650154911e-04,1.724688556109108678e-02,-1.593158328996051440e-03,1.620646921788091929e-03 +1.014709843200409523e-01,-1.139796509621428819e-03,-3.788971982674042317e-03,-8.624444729667930885e-04,1.725656884009884456e-02,-1.626364185712595844e-03,1.656790665300192106e-03 +1.035557348094797714e-01,-1.163467988199776842e-03,-3.871397519899393447e-03,-8.640548567125559904e-04,1.726645063588254717e-02,-1.659548368678932491e-03,1.693062612945871962e-03 +1.056400304465344858e-01,-1.187150158548661927e-03,-3.954090295305102802e-03,-8.656985654835196338e-04,1.727653074449738563e-02,-1.692709823815538063e-03,1.729465891682814950e-03 +1.077238620762621668e-01,-1.210843783349012468e-03,-4.037056133862325542e-03,-8.673756270188885068e-04,1.728680896192438116e-02,-1.725848926443527892e-03,1.766003300013821659e-03 +1.098072205457580330e-01,-1.234548365140338130e-03,-4.120300842312509156e-03,-8.690860693325328044e-04,1.729728507593587816e-02,-1.758963676882858247e-03,1.802677728963747363e-03 +1.118900967041955297e-01,-1.258265259207792764e-03,-4.203830268260311080e-03,-8.708299215218726378e-04,1.730795887253334941e-02,-1.792055536681769418e-03,1.839492052576392972e-03 +1.139724814028665600e-01,-1.281993549372596394e-03,-4.287650325313009658e-03,-8.726072125846839759e-04,1.731883013411464681e-02,-1.825121695366286548e-03,1.876449082208582041e-03 +1.160543654952217590e-01,-1.305734262991884798e-03,-4.371766987659804655e-03,-8.744179740981739219e-04,1.732989863220092339e-02,-1.858162973266947729e-03,1.913551577983139962e-03 +1.181357398369105166e-01,-1.329487576841234574e-03,-4.456185824672474934e-03,-8.762622352680194153e-04,1.734116414267574194e-02,-1.891178582121363569e-03,1.950803122764939793e-03 +1.202165952858213066e-01,-1.353253719310698903e-03,-4.540913218635443671e-03,-8.781400270743392085e-04,1.735262643095076759e-02,-1.924167816454224605e-03,1.988205840391111569e-03 +1.222969227021216548e-01,-1.377033049320546942e-03,-4.625954816282097301e-03,-8.800513817672907073e-04,1.736428526241118320e-02,-1.957130201109192735e-03,2.025763300442992836e-03 +1.243767129482984818e-01,-1.400825501776910460e-03,-4.711316842148415354e-03,-8.819963306769395151e-04,1.737614039254336412e-02,-1.990064451149279115e-03,2.063478057114846807e-03 +1.264559568891981123e-01,-1.424631832443916954e-03,-4.797005212231259495e-03,-8.839749074837293741e-04,1.738819157554492845e-02,-2.022970805591873828e-03,2.101353312555555471e-03 +1.285346453920664100e-01,-1.448452074634343006e-03,-4.883025962658399215e-03,-8.859871441889504654e-04,1.740043856070249903e-02,-2.055848133355939883e-03,2.139392114541358112e-03 +1.306127693265889400e-01,-1.472286666890819645e-03,-4.969385473930291487e-03,-8.880330758711095116e-04,1.741288109236072135e-02,-2.088696047438080503e-03,2.177596937551978720e-03 +1.326903195649310474e-01,-1.496136047622796455e-03,-5.056089566354234066e-03,-8.901127378288263342e-04,1.742551891179574056e-02,-2.121514144251921731e-03,2.215971380313272801e-03 +1.347672869817779229e-01,-1.519999962695783510e-03,-5.143144421331029628e-03,-8.922261638115267131e-04,1.743835175350059996e-02,-2.154300705129798786e-03,2.254518439856185389e-03 +1.368436624543747093e-01,-1.543879512218878161e-03,-5.230556216334356599e-03,-8.943733888767927027e-04,1.745137934653739550e-02,-2.187056533739192756e-03,2.293241197522400462e-03 +1.389194368625665810e-01,-1.567774642547745123e-03,-5.318331368167119981e-03,-8.965544488917578936e-04,1.746460142176408867e-02,-2.219780253073588318e-03,2.332142364354276506e-03 +1.409946010888388224e-01,-1.591685369213220779e-03,-5.406476035287094764e-03,-8.987693820186436070e-04,1.747801769614572634e-02,-2.252470598271863814e-03,2.371225215868485222e-03 +1.430691460183568520e-01,-1.615612734605962209e-03,-5.494996462331903052e-03,-9.010182244699065958e-04,1.749162788867856394e-02,-2.285128211712244943e-03,2.410492947539102926e-03 +1.451430625390061624e-01,-1.639556455962637568e-03,-5.583899139892793337e-03,-9.033010155549880873e-04,1.750543171164027287e-02,-2.317751232877247707e-03,2.449948376911329374e-03 +1.472163415414325383e-01,-1.663517026883573700e-03,-5.673189998382753563e-03,-9.056177921493551710e-04,1.751942887080451886e-02,-2.350339238134346344e-03,2.489595456678494762e-03 +1.492889739190818854e-01,-1.687495063769858762e-03,-5.762875941218550783e-03,-9.079685926328134741e-04,1.753361906868500242e-02,-2.382892015126848295e-03,2.529436401616875964e-03 +1.513609505682403933e-01,-1.711490764425884110e-03,-5.852963167555715919e-03,-9.103534580717785639e-04,1.754800200557907197e-02,-2.415408547286646088e-03,2.569474835030804462e-03 +1.534322623880743364e-01,-1.735504242572523175e-03,-5.943458121888278538e-03,-9.127724275359395479e-04,1.756257737196484370e-02,-2.447887640721754885e-03,2.609714009925121446e-03 +1.555029002806702365e-01,-1.759536234522675715e-03,-6.034367384398601174e-03,-9.152255417897550748e-04,1.757734485659526516e-02,-2.480329248830538073e-03,2.650157016257833394e-03 +1.575728551510746644e-01,-1.783587030644808412e-03,-6.125697323312467336e-03,-9.177128411747368514e-04,1.759230414333539522e-02,-2.512732468691502435e-03,2.690807434447071555e-03 +1.596421179073343188e-01,-1.807656822017959493e-03,-6.217454750946396942e-03,-9.202343664784073257e-04,1.760745491139632107e-02,-2.545096190620858238e-03,2.731668106942177366e-03 +1.617106794605358555e-01,-1.831746149675642955e-03,-6.309646348128822128e-03,-9.227901615132020566e-04,1.762279683229555152e-02,-2.577419940171322369e-03,2.772742219060494919e-03 +1.637785307248458277e-01,-1.855855366076121895e-03,-6.402278604146685766e-03,-9.253802690576131643e-04,1.763832957722726025e-02,-2.609702868282315173e-03,2.814033413297590668e-03 +1.658456626175507931e-01,-1.879984547720707673e-03,-6.495358271931490901e-03,-9.280047320824084348e-04,1.765405280566396604e-02,-2.641943587050246418e-03,2.855544937502698961e-03 +1.679120660590968095e-01,-1.904135006279105433e-03,-6.588892209586248369e-03,-9.306635946969717653e-04,1.766996618001399552e-02,-2.674143002888640635e-03,2.897279943730332596e-03 +1.699777319731296532e-01,-1.928306039337004214e-03,-6.682886984360888669e-03,-9.333569015872530831e-04,1.768606935372002420e-02,-2.706298223905227469e-03,2.939242232362043999e-03 +1.720426512865346202e-01,-1.952498782101463231e-03,-6.777349583124012838e-03,-9.360846959477034422e-04,1.770236197376641799e-02,-2.738409781576889822e-03,2.981434921629587138e-03 +1.741068149294762724e-01,-1.976713410110461817e-03,-6.872287066230713115e-03,-9.388470255063776412e-04,1.771884368518670747e-02,-2.770476385594578851e-03,3.023861098255925615e-03 +1.761702138354382385e-01,-2.000950329563719668e-03,-6.967705988441605727e-03,-9.416439354444724628e-04,1.773551412662887142e-02,-2.802497155515635240e-03,3.066524904740322317e-03 +1.782328389412633773e-01,-2.025209875563389189e-03,-7.063613673370591761e-03,-9.444754725564932438e-04,1.775237292947255338e-02,-2.834471054804139391e-03,3.109429151519425404e-03 +1.802946811871931343e-01,-2.049493108528408804e-03,-7.160016925243382728e-03,-9.473416844022148958e-04,1.776941972541854625e-02,-2.866398383670192875e-03,3.152577733824909849e-03 +1.823557315169075932e-01,-2.073799422109403288e-03,-7.256922863855353421e-03,-9.502426185583311971e-04,1.778665413675911688e-02,-2.898276293746507130e-03,3.195974067749538915e-03 +1.844159808775653053e-01,-2.098130258460116724e-03,-7.354338512316174921e-03,-9.531783221932019689e-04,1.780407578037290772e-02,-2.930105753869023319e-03,3.239621864688496840e-03 +1.864754202198428135e-01,-2.122485419202056224e-03,-7.452271373926367196e-03,-9.561488452333196221e-04,1.782168427161016575e-02,-2.961884632922408191e-03,3.283524051067029850e-03 +1.885340404979746198e-01,-2.146865627270279527e-03,-7.550728357205754165e-03,-9.591542378786774075e-04,1.783947921588714680e-02,-2.993612502815207535e-03,3.327684785493958530e-03 +1.905918326697928489e-01,-2.171271178406507674e-03,-7.649716810992390412e-03,-9.621945497985123644e-04,1.785746021620316509e-02,-3.025288109737138322e-03,3.372107519182198157e-03 +1.926487876967668822e-01,-2.195702819109690996e-03,-7.749243896022186412e-03,-9.652698306146706214e-04,1.787562686880703536e-02,-3.056911019873601292e-03,3.416796175639565548e-03 +1.947048965440432433e-01,-2.220160886578347297e-03,-7.849317286717024875e-03,-9.683801316109904656e-04,1.789397876735422666e-02,-3.088480006257450939e-03,3.461753836065103573e-03 +1.967601501804849828e-01,-2.244645877208133362e-03,-7.949944219819397515e-03,-9.715255054084953211e-04,1.791251549739053886e-02,-3.119994114247859069e-03,3.506984524468447720e-03 +1.988145395787117298e-01,-2.269158345450616315e-03,-8.051132186023785692e-03,-9.747060042318636541e-04,1.793123664123097080e-02,-3.151452471079480011e-03,3.552491912032775759e-03 +2.008680557151390489e-01,-2.293698626359588592e-03,-8.152888549440276758e-03,-9.779216799409864123e-04,1.795014177387960208e-02,-3.182853765029721674e-03,3.598280031775772693e-03 +2.029206895700181590e-01,-2.318267497894966299e-03,-8.255221225562077783e-03,-9.811725861428473566e-04,1.796923046602920751e-02,-3.214197486331182158e-03,3.644352008491380784e-03 +2.049724321274755956e-01,-2.342865293761132138e-03,-8.358137602676510985e-03,-9.844587779686429698e-04,1.798850228382483513e-02,-3.245482262847743625e-03,3.690712082302081021e-03 +2.070232743755528182e-01,-2.367492320278018637e-03,-8.461645391049420553e-03,-9.877803092320646024e-04,1.800795678462117216e-02,-3.276706640510883804e-03,3.737364017376975690e-03 +2.090732073062456231e-01,-2.392149622759318171e-03,-8.565752353349141007e-03,-9.911372352381570862e-04,1.802759352232348802e-02,-3.307870519192554915e-03,3.784311608524270308e-03 +2.111222219155439450e-01,-2.416837539702887221e-03,-8.670466233355687141e-03,-9.945296116375629356e-04,1.804741204774613916e-02,-3.338972440332967952e-03,3.831558815968578394e-03 +2.131703092034713254e-01,-2.441556316165357458e-03,-8.775794899297899992e-03,-9.979574942276137495e-04,1.806741190203771733e-02,-3.370010735472601764e-03,3.879109497571863428e-03 +2.152174601741243254e-01,-2.466306909430133337e-03,-8.881746309494162328e-03,-1.001420940546989295e-03,1.808759262135821444e-02,-3.400985035150152352e-03,3.926967474387536156e-03 +2.172636658357122441e-01,-2.491089906953977782e-03,-8.988328254060143005e-03,-1.004920007259298489e-03,1.810795373953872495e-02,-3.431894238210427949e-03,3.975137016030083625e-03 +2.193089172005963650e-01,-2.515905628721141259e-03,-9.095549011529738617e-03,-1.008454752931710677e-03,1.812849478186953633e-02,-3.462736701372624588e-03,4.023621610861814052e-03 +2.213532052853296739e-01,-2.540754913384660543e-03,-9.203416308464020673e-03,-1.012025235981022214e-03,1.814921526861049822e-02,-3.493511710973227699e-03,4.072425917885810938e-03 +2.233965211106962168e-01,-2.565638252760717598e-03,-9.311938572934325489e-03,-1.015631514890056935e-03,1.817011471449143239e-02,-3.524217857592884355e-03,4.121553417737310911e-03 +2.254388557017504569e-01,-2.590556390084795130e-03,-9.421123832513488700e-03,-1.019273650368322792e-03,1.819119262863707828e-02,-3.554854154818778110e-03,4.171008480675500625e-03 +2.274802000878569097e-01,-2.615509795441482450e-03,-9.530980314091377567e-03,-1.022951701801467226e-03,1.821244851382856911e-02,-3.585419052609035030e-03,4.220795243381728241e-03 +2.295205453027292508e-01,-2.640499121738886477e-03,-9.641516454797743885e-03,-1.026665730385236622e-03,1.823388186596795052e-02,-3.615911289009383130e-03,4.270917590499793103e-03 +2.315598823844699505e-01,-2.665525432569231922e-03,-9.752740532640476190e-03,-1.030415798313200071e-03,1.825549217634163043e-02,-3.646330313642716543e-03,4.321379849098842645e-03 +2.335982023756095483e-01,-2.690589347858551601e-03,-9.864660822076577740e-03,-1.034201966547778229e-03,1.827727893408179899e-02,-3.676674682081612112e-03,4.372186499543390288e-03 +2.356354963231459820e-01,-2.715691026984427391e-03,-9.977285948741790658e-03,-1.038024297045116173e-03,1.829924161572108604e-02,-3.706942019998295083e-03,4.423341512875253456e-03 +2.376717552785839183e-01,-2.740832035335226402e-03,-1.009062449432246954e-02,-1.041882852843997080e-03,1.832137969536464656e-02,-3.737132518715644437e-03,4.474849094776863802e-03 +2.397069702979740813e-01,-2.766012567482339316e-03,-1.020468521913722726e-02,-1.045777699222614392e-03,1.834369264148217812e-02,-3.767243724023163019e-03,4.526713271591910337e-03 +2.417411324419524721e-01,-2.791233926658583803e-03,-1.031947646767742213e-02,-1.049708900331064801e-03,1.836617991598521113e-02,-3.797275180545357440e-03,4.578939008802514893e-03 +2.437742327757797534e-01,-2.816496308639028327e-03,-1.043500727161184201e-02,-1.053676520755089359e-03,1.838884097427211789e-02,-3.827224271543272407e-03,4.631530147242841206e-03 +2.458062623693803850e-01,-2.841801285289887770e-03,-1.055128625347605785e-02,-1.057680626501862186e-03,1.841167526479513608e-02,-3.857090870634388019e-03,4.684491463485421746e-03 +2.478372122973819258e-01,-2.867149295806184495e-03,-1.066832239192594227e-02,-1.061721282973660842e-03,1.843468223307234352e-02,-3.886872632610334759e-03,4.737827240461268095e-03 +2.498670736391540859e-01,-2.892541635735385117e-03,-1.078612468349699795e-02,-1.065798557157285331e-03,1.845786131543035100e-02,-3.916568717686886095e-03,4.791541907614442659e-03 +2.518958374788481391e-01,-2.917978812486095655e-03,-1.090470214769686218e-02,-1.069912516774316385e-03,1.848121194389639940e-02,-3.946176702943531532e-03,4.845640038191835128e-03 +2.539234949054359203e-01,-2.943462313617888158e-03,-1.102406374249506604e-02,-1.074063228423305328e-03,1.850473354100248824e-02,-3.975695892639123652e-03,4.900126515073866679e-03 +2.559500370127490432e-01,-2.968993008873421595e-03,-1.114421912014835515e-02,-1.078250761190308266e-03,1.852842552832857995e-02,-4.005124317182461383e-03,4.955005121735659920e-03 +2.579754548995178420e-01,-2.994572099313825859e-03,-1.126517724444831697e-02,-1.082475185676186292e-03,1.855228731627928465e-02,-4.034460507876648852e-03,5.010281143841417185e-03 +2.599997396694109231e-01,-3.020200716451069706e-03,-1.138694752179711389e-02,-1.086736570414962048e-03,1.857631831378343812e-02,-4.063702740883987606e-03,5.065959258291653777e-03 +2.620228824310735782e-01,-3.045879795290064307e-03,-1.150953938541771082e-02,-1.091034985019772403e-03,1.860051791699145890e-02,-4.092848795104304070e-03,5.122044323319654979e-03 +2.640448742981674202e-01,-3.071611315619829884e-03,-1.163296261691963707e-02,-1.095370499871206734e-03,1.862488552245290807e-02,-4.121898276096005141e-03,5.178540786991425726e-03 +2.660657063894090180e-01,-3.097395781345367062e-03,-1.175722690014804894e-02,-1.099743187427946892e-03,1.864942051674068485e-02,-4.150847886720203384e-03,5.235453535989888912e-03 +2.680853698286093101e-01,-3.123235285375726335e-03,-1.188234204798340257e-02,-1.104153120295759110e-03,1.867412228114667672e-02,-4.179697171720024064e-03,5.292787483387090090e-03 +2.701038557447121846e-01,-3.149130436014470173e-03,-1.200831778113166382e-02,-1.108600370743461272e-03,1.869899018988146686e-02,-4.208442753404938001e-03,5.350547997297696451e-03 +2.721211552718337257e-01,-3.175083404029088816e-03,-1.213516438771245105e-02,-1.113085011323392513e-03,1.872402360886679315e-02,-4.237084044396746164e-03,5.408739678929811757e-03 +2.741372595493007380e-01,-3.201095279323006287e-03,-1.226289197127414417e-02,-1.117607116886786987e-03,1.874922190152645413e-02,-4.265618292288584495e-03,5.467367787839788086e-03 +2.761521597216903823e-01,-3.227167731596447455e-03,-1.239151095934471793e-02,-1.122166761913039349e-03,1.877458441951456586e-02,-4.294043695516606106e-03,5.526437304459008715e-03 +2.781658469388685329e-01,-3.253302789931375102e-03,-1.252103172847454475e-02,-1.126764022970456851e-03,1.880011051384279305e-02,-4.322358993440493166e-03,5.585953649868857784e-03 +2.801783123560285804e-01,-3.279501549964902816e-03,-1.265146467726543066e-02,-1.131398974034228096e-03,1.882579952598387868e-02,-4.350561047437315546e-03,5.645922565914341697e-03 +2.821895471337307892e-01,-3.305766133547788626e-03,-1.278282102450566636e-02,-1.136071691905698750e-03,1.885165079028925175e-02,-4.378648522033447461e-03,5.706348636151805688e-03 +2.841995424379405444e-01,-3.332097903868379892e-03,-1.291511139696470412e-02,-1.140782254786974195e-03,1.887766363540739276e-02,-4.406618546958808273e-03,5.767237951372602804e-03 +2.862082894400676536e-01,-3.358498777152338702e-03,-1.304834704288261420e-02,-1.145530739558592824e-03,1.890383737997966551e-02,-4.434469187159195171e-03,5.828595852089274790e-03 +2.882157793170046500e-01,-3.384970890225041853e-03,-1.318253925498061481e-02,-1.150317225658048547e-03,1.893017134023356265e-02,-4.462198832218935811e-03,5.890428027938325768e-03 +2.902220032511660386e-01,-3.411515774733745419e-03,-1.331769948770192974e-02,-1.155141791824070008e-03,1.895666482438981143e-02,-4.489804663335879298e-03,5.952740314918003768e-03 +2.922269524305266541e-01,-3.438135246415233420e-03,-1.345383946563989686e-02,-1.160004518524812784e-03,1.898331713244343424e-02,-4.517284337864835818e-03,6.015538508686067189e-03 +2.942306180486604639e-01,-3.464831225085071489e-03,-1.359097103979253068e-02,-1.164905486047389119e-03,1.901012755658492218e-02,-4.544635671804657905e-03,6.078828649661955838e-03 +2.962329913047793695e-01,-3.491605931472911012e-03,-1.372910626033580633e-02,-1.169844775997911724e-03,1.903709538565484527e-02,-4.571857031038709281e-03,6.142616900994553319e-03 +2.982340634037716764e-01,-3.518460637263128715e-03,-1.386825744853929614e-02,-1.174822469128462702e-03,1.906421989917348944e-02,-4.598945012943618516e-03,6.206909426614936991e-03 +3.002338255562408409e-01,-3.545397750651004942e-03,-1.400843741638303080e-02,-1.179838650169047183e-03,1.909150036944676221e-02,-4.625898384205780721e-03,6.271711990453702687e-03 +3.022322689785442162e-01,-3.572418635563769096e-03,-1.414965848887500544e-02,-1.184893402715382106e-03,1.911893606376952395e-02,-4.652714021962909424e-03,6.337031798382530140e-03 +3.042293848928312450e-01,-3.599525214700919924e-03,-1.429193387496174861e-02,-1.189986810475438363e-03,1.914652623887069458e-02,-4.679389952185961099e-03,6.402874934011033685e-03 +3.062251645270823164e-01,-3.626719179174510768e-03,-1.443527671766181575e-02,-1.195118958997350720e-03,1.917427014729932572e-02,-4.705923902850792269e-03,6.469248142625340717e-03 +3.082195991151471803e-01,-3.654002171358969927e-03,-1.457970039984499838e-02,-1.200289934271880346e-03,1.920216703298226885e-02,-4.732313684253857712e-03,6.536158257539495184e-03 +3.102126798967836940e-01,-3.681375534508722625e-03,-1.472521863331364630e-02,-1.205499822480470083e-03,1.923021613324926715e-02,-4.758556758929216070e-03,6.603612028668063606e-03 +3.122043981176959027e-01,-3.708840972833925970e-03,-1.487184534534790985e-02,-1.210748712710686030e-03,1.925841667713653130e-02,-4.784651520765452470e-03,6.671616326642588850e-03 +3.141947450295727307e-01,-3.736399474374297279e-03,-1.501959430575058006e-02,-1.216036691896171633e-03,1.928676788917902535e-02,-4.810595318158297426e-03,6.740178828370468889e-03 +3.161837118901262844e-01,-3.764052015895661893e-03,-1.516848017255219630e-02,-1.221363849495005855e-03,1.931526898067109296e-02,-4.836385820127860058e-03,6.809306037592514384e-03 +3.181712899631304881e-01,-3.791800204536007607e-03,-1.531851714406738500e-02,-1.226730275383613360e-03,1.934391916159252170e-02,-4.862022264307234651e-03,6.879005787707916791e-03 +3.201574705184592196e-01,-3.819644155207674915e-03,-1.546972012839145476e-02,-1.232136060839244662e-03,1.937271763258678614e-02,-4.887501522273633887e-03,6.949285018025487710e-03 +3.221422448321247800e-01,-3.847584635132070014e-03,-1.562210362223348699e-02,-1.237581296683225047e-03,1.940166358499087126e-02,-4.912822165651816765e-03,7.020151838465325464e-03 +3.241256041863160853e-01,-3.875622004338817508e-03,-1.577568313350490373e-02,-1.243066076190174965e-03,1.943075620335231804e-02,-4.937982523671924882e-03,7.091612817826401828e-03 +3.261075398694370242e-01,-3.903756586170370902e-03,-1.593047333467744603e-02,-1.248590494042535991e-03,1.945999466706565054e-02,-4.962981420494836857e-03,7.163676394317365984e-03 +3.280880431761448723e-01,-3.931987715422747588e-03,-1.608648966275473546e-02,-1.254154644201743544e-03,1.948937814539839636e-02,-4.987816430417719646e-03,7.236349816016004327e-03 +3.300671054073883171e-01,-3.960315118174478725e-03,-1.624374714720637491e-02,-1.259758621882451062e-03,1.951890579879247620e-02,-5.012486502133909998e-03,7.309641271030466701e-03 +3.320447178704457047e-01,-3.988737932947247246e-03,-1.640226125569029317e-02,-1.265402522272325754e-03,1.954857677941507568e-02,-5.036990156094160904e-03,7.383558226080721119e-03 +3.340208718789632880e-01,-4.017255650743878012e-03,-1.656204731466935989e-02,-1.271086441841994680e-03,1.957839023464142195e-02,-5.061327274657201510e-03,7.458108428795543095e-03 +3.359955587529934729e-01,-4.045866304278686841e-03,-1.672312097994621202e-02,-1.276810480809485509e-03,1.960834530297229233e-02,-5.085495728109004285e-03,7.533298933276208822e-03 +3.379687698190326217e-01,-4.074568696544362753e-03,-1.688549667227992601e-02,-1.282574736879086696e-03,1.963844111360040293e-02,-5.109495567007085737e-03,7.609138932779381179e-03 +3.399404964100594673e-01,-4.103360350375113812e-03,-1.704919033393751532e-02,-1.288379308412811869e-03,1.966867678872531722e-02,-5.133325182868378640e-03,7.685634482499853655e-03 +3.419107298655730265e-01,-4.132239445145761622e-03,-1.721421643202647456e-02,-1.294224298136411244e-03,1.969905144030507926e-02,-5.156984934738406343e-03,7.762794002036199278e-03 +3.438794615316307368e-01,-4.161203118349170345e-03,-1.738058999084963335e-02,-1.300109806529761410e-03,1.972956417614721292e-02,-5.180473948776838887e-03,7.840624340887128452e-03 +3.458466827608864258e-01,-4.190248275709954583e-03,-1.754832510781439633e-02,-1.306035936643093693e-03,1.976021409019192848e-02,-5.203791608752486933e-03,7.919133430685142139e-03 +3.478123849126281697e-01,-4.219372295144757472e-03,-1.771743621727813953e-02,-1.312002790075921833e-03,1.979100027247216245e-02,-5.226938833622729423e-03,7.998327778967426541e-03 +3.497765593528163741e-01,-4.248571157034212488e-03,-1.788793673707415877e-02,-1.318010471286545048e-03,1.982192180393331157e-02,-5.249914514288383204e-03,8.078214865056717459e-03 +3.517391974541219102e-01,-4.277841290162474885e-03,-1.805984042013521543e-02,-1.324059083799621582e-03,1.985297775336350087e-02,-5.272718900265168859e-03,8.158800460053221987e-03 +3.537002905959633070e-01,-4.307179325425763115e-03,-1.823315971548822995e-02,-1.330148735832762307e-03,1.988416718559167518e-02,-5.295353045804890302e-03,8.240091551283445712e-03 +3.556598301645456095e-01,-4.336580652612918169e-03,-1.840790686467094761e-02,-1.336279531034468030e-03,1.991548915595168362e-02,-5.317815992167124542e-03,8.322094136532537378e-03 +3.576178075528972378e-01,-4.366041401954122089e-03,-1.858409355007026309e-02,-1.342451578467851458e-03,1.994694270763379754e-02,-5.340108350929254066e-03,8.404813733644118240e-03 +3.595742141609086784e-01,-4.395557631205019265e-03,-1.876173023525806380e-02,-1.348664982998729143e-03,1.997852687901416693e-02,-5.362230625923267158e-03,8.488256469762669315e-03 +3.615290413953694548e-01,-4.425125034595592255e-03,-1.894082753716589893e-02,-1.354919853026546970e-03,2.001024069787052731e-02,-5.384182490755384624e-03,8.572426365037367699e-03 +3.634822806700064302e-01,-4.454739877286584618e-03,-1.912139442817336124e-02,-1.361216297371470926e-03,2.004208318316403475e-02,-5.405964340647305584e-03,8.657328566658677171e-03 +3.654339234055213326e-01,-4.484398517520062945e-03,-1.930343950441492512e-02,-1.367554425048632134e-03,2.007405334638867958e-02,-5.427576171455564952e-03,8.742966842815017015e-03 +3.673839610296283364e-01,-4.514097384088833556e-03,-1.948697015189535323e-02,-1.373934345730439118e-03,2.010615018913394780e-02,-5.449017287520364732e-03,8.829345029662300989e-03 +3.693323849770918654e-01,-4.543833671916912460e-03,-1.967199289172718094e-02,-1.380356168900975355e-03,2.013837270301251092e-02,-5.470287330356697619e-03,8.916466282209559169e-03 +3.712791866897643955e-01,-4.573605128563596085e-03,-1.985851329519715269e-02,-1.386820004212532863e-03,2.017071987230598454e-02,-5.491385577218271981e-03,9.004333149100518172e-03 +3.732243576166232035e-01,-4.603409933465224535e-03,-2.004653585218356404e-02,-1.393325961969171800e-03,2.020319067133567736e-02,-5.512310388014069092e-03,9.092947755833687834e-03 +3.751678892138092802e-01,-4.633247492264936339e-03,-2.023606421503663239e-02,-1.399874152778653180e-03,2.023578406472531857e-02,-5.533060342900301271e-03,9.182311308176845446e-03 +3.771097729446636349e-01,-4.663118278066971309e-03,-2.042710061673507285e-02,-1.406464688575088506e-03,2.026849901132071752e-02,-5.553633568697126377e-03,9.272425171620793036e-03 +3.790500002797654311e-01,-4.693023162667986879e-03,-2.061964618587249637e-02,-1.413097677075070367e-03,2.030133445827233402e-02,-5.574026085385906833e-03,9.363290300777560651e-03 +3.809885626969692352e-01,-4.722965585388918738e-03,-2.081370186527512628e-02,-1.419773228766377194e-03,2.033428934381279612e-02,-5.594235451734704684e-03,9.454905572073086378e-03 +3.829254516814425413e-01,-4.752949485975274148e-03,-2.100926681408528049e-02,-1.426491455620410107e-03,2.036736260054123823e-02,-5.614256448450864745e-03,9.547270874091967272e-03 +3.848606587257031308e-01,-4.782980841344887900e-03,-2.120633957791907484e-02,-1.433252467782582728e-03,2.040055314687393151e-02,-5.634083501271188420e-03,9.640385050788981974e-03 +3.867941753296565421e-01,-4.813067998530910600e-03,-2.140491764137301620e-02,-1.440056375519420569e-03,2.043385989605457642e-02,-5.653710832142023850e-03,9.734246924335646436e-03 +3.887259930006330966e-01,-4.843220916094862806e-03,-2.160499787274338182e-02,-1.446903286550228025e-03,2.046728175221788557e-02,-5.673130552508873041e-03,9.828854695843458958e-03 +3.906561032534255906e-01,-4.873452058477463662e-03,-2.180657696777300175e-02,-1.453793311307852642e-03,2.050081761046589415e-02,-5.692333848099905350e-03,9.924205401943969607e-03 +3.925844976103263773e-01,-4.903776622846215891e-03,-2.200965034432225340e-02,-1.460726559603689858e-03,2.053446635748726670e-02,-5.711310909099772716e-03,1.002029733381388718e-02 +3.945111676011643920e-01,-4.934212233350155491e-03,-2.221421381722009716e-02,-1.467703139844651590e-03,2.056822687311317746e-02,-5.730049863213631731e-03,1.011712730436526916e-02 +3.964361047633429003e-01,-4.964779343952270205e-03,-2.242026243629438934e-02,-1.474723161255414019e-03,2.060209802481694982e-02,-5.748537044475264361e-03,1.021469329878035674e-02 +3.983593006418760796e-01,-4.995502319417923290e-03,-2.262779189527363080e-02,-1.481786729621698039e-03,2.063607867419705449e-02,-5.766758546364767965e-03,1.031299237152089737e-02 +4.002807467894267113e-01,-5.026408383559635012e-03,-2.283679824096510741e-02,-1.488893954815502085e-03,2.067016767689829060e-02,-5.784697795757551969e-03,1.041202179951062658e-02 +4.022004347663427071e-01,-5.057528220010591713e-03,-2.304727755053439381e-02,-1.496044942480974967e-03,2.070436388068603792e-02,-5.802336251492031145e-03,1.051178036659047681e-02 +4.041183561406945790e-01,-5.088896058250748713e-03,-2.325922781155729049e-02,-1.503239799452581790e-03,2.073866612355868000e-02,-5.819653171857695761e-03,1.061226558996369862e-02 +4.060345024883124099e-01,-5.120550241151038996e-03,-2.347264746303964755e-02,-1.510478632478595967e-03,2.077307323597530828e-02,-5.836626326923568218e-03,1.071347728106932309e-02 +4.079488653928228237e-01,-5.152533045745131580e-03,-2.368753737458891701e-02,-1.517761547754844370e-03,2.080758404088690894e-02,-5.853231361140912720e-03,1.081541473312151377e-02 +4.098614364456857340e-01,-5.184891041386194450e-03,-2.390389983310186456e-02,-1.525088652550366354e-03,2.084219735686541203e-02,-5.869442222598809066e-03,1.091807959276704966e-02 +4.117722072462318139e-01,-5.217674061140877353e-03,-2.412174024985065321e-02,-1.532460052625222568e-03,2.087691199332612768e-02,-5.885229060987146391e-03,1.102147370205019787e-02 +4.136811694016988561e-01,-5.250937173458166893e-03,-2.434106663462694276e-02,-1.539875856794813060e-03,2.091172675150293134e-02,-5.900561828910963653e-03,1.112560119684509116e-02 +4.155883145272687984e-01,-5.284739298218006816e-03,-2.456189044601890178e-02,-1.547336170863165914e-03,2.094664043111839000e-02,-5.915407679577401427e-03,1.123046809071205979e-02 +4.174936342461046945e-01,-5.319142808050428446e-03,-2.478422751859379186e-02,-1.554841103325234385e-03,2.098165182211317314e-02,-5.929730309428947119e-03,1.133608177636367606e-02 +4.193971201893874623e-01,-5.354215148315637392e-03,-2.500809788617461282e-02,-1.562390762803617019e-03,2.101675970772012847e-02,-5.943493194200972357e-03,1.144245265342985936e-02 +4.212987639963524100e-01,-5.390027268887524585e-03,-2.523352704703000354e-02,-1.569985260457716679e-03,2.105196286940767478e-02,-5.956656965253691489e-03,1.154959310264423722e-02 +4.231985573143263735e-01,-5.426653576945308401e-03,-2.546054581195857092e-02,-1.577624708771401939e-03,2.108726008060220203e-02,-5.969179765903306727e-03,1.165751915961044625e-02 +4.250964917987639091e-01,-5.464172047514387169e-03,-2.568919140465522263e-02,-1.585309221617238907e-03,2.112265010825969611e-02,-5.981018041508984889e-03,1.176624987426397491e-02 +4.269925591132845422e-01,-5.502664361830934131e-03,-2.591950804700784289e-02,-1.593038914567930899e-03,2.115813171320848549e-02,-5.992127529541242323e-03,1.187580765950705178e-02 +4.288867509297087377e-01,-5.542214985064179700e-03,-2.615154762502591712e-02,-1.600813908749112611e-03,2.119370365748197982e-02,-6.002462417554685348e-03,1.198621850108808930e-02 +4.307790589280949822e-01,-5.582909419594278208e-03,-2.638536965093086356e-02,-1.608634327494432676e-03,2.122936469180241800e-02,-6.011973120580636071e-03,1.209751348678393505e-02 +4.326694747967760879e-01,-5.624836943339354793e-03,-2.662104254773439704e-02,-1.616500297232027284e-03,2.126511356186557011e-02,-6.020612640999317700e-03,1.220972784174256635e-02 +4.345579902323956079e-01,-5.668086888308678482e-03,-2.685864418269194934e-02,-1.624411947509670851e-03,2.130094901223520201e-02,-6.028330996688483628e-03,1.232290134248186994e-02 +4.364445969399446956e-01,-5.712749865396877028e-03,-2.709826247034092736e-02,-1.632369416765538797e-03,2.133686978090784669e-02,-6.035079109234661732e-03,1.243707855465810315e-02 +4.383292866327981319e-01,-5.758915598417672053e-03,-2.733999499872560510e-02,-1.640372845343969054e-03,2.137287460216981405e-02,-6.040806525662572625e-03,1.255231083860073107e-02 +4.402120510327509062e-01,-5.806673513138604567e-03,-2.758395149233434929e-02,-1.648422382802023668e-03,2.140896220178114748e-02,-6.045464500283435731e-03,1.266865295347319696e-02 +4.420928818700546881e-01,-5.856111188039061534e-03,-2.783025168384414133e-02,-1.656518184591339662e-03,2.144513130547208088e-02,-6.049005264033020801e-03,1.278616816724268898e-02 +4.439717708834537979e-01,-5.907312558797056003e-03,-2.807902858522715386e-02,-1.664660414443093006e-03,2.148138063017760932e-02,-6.051381029419427605e-03,1.290492311909763150e-02 +4.458487098202219556e-01,-5.960358683388444594e-03,-2.833042594608248255e-02,-1.672849245481478802e-03,2.151770888736635398e-02,-6.052547992406184275e-03,1.302499343531959321e-02 +4.477236904361980296e-01,-6.015324909314569667e-03,-2.858460144709046649e-02,-1.681084859276921542e-03,2.155411478580117335e-02,-6.052463781524565663e-03,1.314645842302762484e-02 +4.495967044958226744e-01,-6.072280093664014257e-03,-2.884172407942784538e-02,-1.689367450776781783e-03,2.159059702419018414e-02,-6.051088943469162644e-03,1.326940647322343361e-02 +4.514677437721743569e-01,-6.131286216320160250e-03,-2.910197651764229870e-02,-1.697697221782993628e-03,2.162715429642985202e-02,-6.048389346116053722e-03,1.339393088466632117e-02 +4.533368000470053838e-01,-6.192395476053540182e-03,-2.936555379507025179e-02,-1.706074389606007744e-03,2.166378528950165319e-02,-6.044334032813460447e-03,1.352013238003166654e-02 +4.552038651107780942e-01,-6.255650296386309345e-03,-2.963266396410517423e-02,-1.714499182193093916e-03,2.170048868220618432e-02,-6.038898690570141010e-03,1.364811763485383571e-02 +4.570689307627009978e-01,-6.321080458658900544e-03,-2.990352725982366525e-02,-1.722971843406477261e-03,2.173726314557982223e-02,-6.032063875395069635e-03,1.377800032255495181e-02 +4.589319888107645240e-01,-6.388702621438057706e-03,-3.017837623911077960e-02,-1.731492629524881499e-03,2.177410733872935794e-02,-6.023817838455475075e-03,1.390990001608701128e-02 +4.607930310717773814e-01,-6.458518054089089008e-03,-3.045745483372435183e-02,-1.740061812620949909e-03,2.181101991031266646e-02,-6.014156109544320332e-03,1.404394278714514444e-02 +4.626520493714022519e-01,-6.530511995257185821e-03,-3.074101869017712915e-02,-1.748679680710222509e-03,2.184799949799701504e-02,-6.003084210499970430e-03,1.418025903087754246e-02 +4.645090355441917063e-01,-6.604650304558434999e-03,-3.102933280212347850e-02,-1.757346541439927523e-03,2.188504472885508673e-02,-5.990615350608158307e-03,1.431898598322700437e-02 +4.663639814336241196e-01,-6.680879403990288949e-03,-3.132267186821040966e-02,-1.766062715856822488e-03,2.192215421204878403e-02,-5.976774318846795990e-03,1.446026472291815981e-02 +4.682168788921395874e-01,-6.759123691730986420e-03,-3.162131854876392562e-02,-1.774828546364182625e-03,2.195932654537544743e-02,-5.961596633843657678e-03,1.460424068828313632e-02 +4.700677197811755081e-01,-6.839284006040245723e-03,-3.192556240167153392e-02,-1.783644391986261403e-03,2.199656030541717947e-02,-5.945129625428148025e-03,1.475106247887521260e-02 +4.719164959712024987e-01,-6.921236653020058915e-03,-3.223569809379505519e-02,-1.792510634341951776e-03,2.203385405374771663e-02,-5.927434498959946799e-03,1.490088155741225127e-02 +4.737631993417600329e-01,-7.004830707474545178e-03,-3.255202359965397862e-02,-1.801427671194487119e-03,2.207120633008117019e-02,-5.908585047288287921e-03,1.505385150534348411e-02 +4.756078217814919684e-01,-7.089887399943956003e-03,-3.287483881954184756e-02,-1.810395922310852491e-03,2.210861565270205373e-02,-5.888670117441235918e-03,1.521012601091809371e-02 +4.774503551881826846e-01,-7.176198161378245788e-03,-3.320444277767425206e-02,-1.819415825683168546e-03,2.214608051332387154e-02,-5.867793358839409196e-03,1.536985903186826805e-02 +4.792907914687918880e-01,-7.263523996324780518e-03,-3.354113200819298229e-02,-1.828487841031582435e-03,2.218359938018107241e-02,-5.846075213624860913e-03,1.553320223938686913e-02 +4.811291225394909166e-01,-7.351593185708179753e-03,-3.388519727822560113e-02,-1.837612447641295962e-03,2.222117069130850886e-02,-5.823651476787685723e-03,1.570030508855999388e-02 +4.829653403256978783e-01,-7.440101590088470454e-03,-3.423692111234542812e-02,-1.846790143632214476e-03,2.225879285638718980e-02,-5.800676383519312064e-03,1.587131289858685623e-02 +4.847994367621128453e-01,-7.528710139598149970e-03,-3.459657526743380868e-02,-1.856021444705333121e-03,2.229646425094027051e-02,-5.777320019979813662e-03,1.604636454711796251e-02 +4.866314037927538805e-01,-7.617045758319038878e-03,-3.496441707336248733e-02,-1.865306887454019945e-03,2.233418321553738359e-02,-5.753771718368806687e-03,1.622559182488410995e-02 +4.884612333709922316e-01,-7.704699324493669531e-03,-3.534068605180852585e-02,-1.874647022447709392e-03,2.237194805096912736e-02,-5.730237384624308102e-03,1.640911782044719713e-02 +4.902889174595871369e-01,-7.791227726540060280e-03,-3.572560123203804422e-02,-1.884042418437585631e-03,2.240975702048588966e-02,-5.706943940843874075e-03,1.659705357282786203e-02 +4.921144480307220181e-01,-7.876150737006005001e-03,-3.611935637427192425e-02,-1.893493658997974461e-03,2.244760834737983754e-02,-5.684133431282939117e-03,1.678949824975733973e-02 +4.939378170660390643e-01,-7.958953823686033824e-03,-3.652211684529899083e-02,-1.903001340534749139e-03,2.248550020531381657e-02,-5.662067585671452102e-03,1.698653586440346946e-02 +4.957590165566744811e-01,-8.039087674043005755e-03,-3.693401540274449113e-02,-1.912566070061653837e-03,2.252343072367533547e-02,-5.641025491263996529e-03,1.718823371350069670e-02 +4.975780385032944064e-01,-8.115969335281005004e-03,-3.735514827057275639e-02,-1.922188463891480589e-03,2.256139798034636351e-02,-5.621303512922369745e-03,1.739463998973126563e-02 +4.993948749161288836e-01,-8.188983473234772922e-03,-3.778557110213057874e-02,-1.931869145880659886e-03,2.259940000234394314e-02,-5.603214603447381065e-03,1.760578137556912184e-02 +5.012095178150079988e-01,-8.257483989277723466e-03,-3.822529398373108317e-02,-1.941608744594590678e-03,2.263743476126854962e-02,-5.587087404326728478e-03,1.782166226355719890e-02 +5.030219592293964093e-01,-8.320796264870273379e-03,-3.867427806104966476e-02,-1.951407886979981705e-03,2.267550017352020983e-02,-5.573265597326365443e-03,1.804226082300748601e-02 +5.048321911984281485e-01,-8.378219005290508600e-03,-3.913243069433854776e-02,-1.961267199956767335e-03,2.271359409702352131e-02,-5.562105535515687697e-03,1.826752778231116939e-02 +5.066402057709425977e-01,-8.429027447105679263e-03,-3.959960140188734101e-02,-1.971187302270768424e-03,2.275171432591331297e-02,-5.553975425329500844e-03,1.849738376422161573e-02 +5.084459950055177924e-01,-8.472477722810206613e-03,-4.007557769101675882e-02,-1.981168806620262068e-03,2.278985859535497330e-02,-5.549255638216870311e-03,1.873171694133810017e-02 +5.102495509705069487e-01,-8.507808253315127939e-03,-4.056008020055430763e-02,-1.991212309470677400e-03,2.282802457904871754e-02,-5.548332381226748523e-03,1.897038217645119831e-02 +5.120508657440723255e-01,-8.534245734409414558e-03,-4.105275959699721672e-02,-2.001318390149890887e-03,2.286620988118102590e-02,-5.551598891296783377e-03,1.921319715730549285e-02 +5.138499314142201957e-01,-8.551009496902892137e-03,-4.155319186720575486e-02,-2.011487603281723793e-03,2.290441204233718830e-02,-5.559452473392495660e-03,1.945994191405672410e-02 +5.156467400788354860e-01,-8.557316223297129626e-03,-4.206087548787569574e-02,-2.021720477501651788e-03,2.294262853644034664e-02,-5.572291091862373660e-03,1.971035529166049668e-02 +5.174412838457173036e-01,-8.552385578971114996e-03,-4.257522701868181325e-02,-2.032017507785019439e-03,2.298085676944017566e-02,-5.590510559449345281e-03,1.996413495361441803e-02 +5.192335548326121319e-01,-8.535447156746053463e-03,-4.309557878265419389e-02,-2.042379152586382748e-03,2.301909408136680601e-02,-5.614503078189769618e-03,2.022093415213215353e-02 +5.210235451672501350e-01,-8.505745348521527929e-03,-4.362117480364286620e-02,-2.052805823855410969e-03,2.305733774780553344e-02,-5.644650797961844359e-03,2.048036255375485643e-02 +5.228112469873781309e-01,-8.462547804161628731e-03,-4.415117005308154263e-02,-2.063297883258384362e-03,2.309558497518103201e-02,-5.681325028984720964e-03,2.074198175732702709e-02 +5.245966524407953413e-01,-8.405152151789229559e-03,-4.468462694031544863e-02,-2.073855637766952413e-03,2.313383290968825182e-02,-5.724881155989104686e-03,2.100530698755850509e-02 +5.263797536853872527e-01,-8.332893540265624843e-03,-4.522051439945604101e-02,-2.084479331257903245e-03,2.317207863142384944e-02,-5.775654097641230028e-03,2.126980503950222021e-02 +5.281605428891603671e-01,-8.245153950127780129e-03,-4.575770666602817921e-02,-2.095169138448770701e-03,2.321031916287885979e-02,-5.833956139074831897e-03,2.153489403477737082e-02 +5.299390122302762851e-01,-8.141369294101889406e-03,-4.629498289997578675e-02,-2.105925160186611182e-03,2.324855146917602020e-02,-5.900069739483487438e-03,2.179994293188593918e-02 +5.317151538970863456e-01,-8.021039372316574045e-03,-4.683102686977198836e-02,-2.116747414246006407e-03,2.328677246016547936e-02,-5.974244886155550931e-03,2.206427237086706905e-02 +5.334889600881657090e-01,-7.883737398767191179e-03,-4.736442833135730279e-02,-2.127635831227694865e-03,2.332497899814150316e-02,-6.056694921818323550e-03,2.232715389515103899e-02 +5.352604230123479967e-01,-7.729117922246329410e-03,-4.789368374494799852e-02,-2.138590245346406061e-03,2.336316790361677234e-02,-6.147588734034384328e-03,2.258781207203454378e-02 +5.370295348887590414e-01,-7.556927561396148928e-03,-4.841719956775283884e-02,-2.149610392599756673e-03,2.340133595041684125e-02,-6.247047698303609745e-03,2.284542353798750536e-02 +5.387962879468511934e-01,-7.367016037720833938e-03,-4.893329313987086759e-02,-2.160695899210036963e-03,2.343947988501373855e-02,-6.355142773449088886e-03,2.309912240341342737e-02 +5.405606744264380703e-01,-7.159343170136812061e-03,-4.944019964253687505e-02,-2.171846278670488441e-03,2.347759642527044721e-02,-6.471883743663565947e-03,2.334799620406366807e-02 +5.423226865777274197e-01,-6.933992250777284289e-03,-4.993607310440625197e-02,-2.183060929671061610e-03,2.351568226883066909e-02,-6.597220280697330824e-03,2.359109502230406427e-02 +5.440823166613565354e-01,-6.691177452073140616e-03,-5.041899422753531174e-02,-2.194339119598223697e-03,2.355373410378035562e-02,-6.731031815556150558e-03,2.382742990132969557e-02 +5.458395569484247867e-01,-6.431255698261235595e-03,-5.088697627098484610e-02,-2.205679990074687052e-03,2.359174861409204030e-02,-6.873125936389397339e-03,2.405597704072734433e-02 +5.475943997205290348e-01,-6.154735155864461682e-03,-5.133797176666511641e-02,-2.217082545306781061e-03,2.362972249225550256e-02,-7.023230765808836581e-03,2.427568262089667070e-02 +5.493468372697968283e-01,-5.862285733978941926e-03,-5.176988166442862693e-02,-2.228545653051090380e-03,2.366765244550439809e-02,-7.180991596890741978e-03,2.448546529997237126e-02 +5.510968618989195988e-01,-5.554748277989330317e-03,-5.218056318092108292e-02,-2.240068036763580035e-03,2.370553520969506320e-02,-7.345965748236548548e-03,2.468422339439502525e-02 +5.528444659211878554e-01,-5.233143738284640883e-03,-5.256784130853529857e-02,-2.251648273694236736e-03,2.374336756179383154e-02,-7.517618225898701004e-03,2.487083746291443923e-02 +5.545896416605240464e-01,-4.898680754711357520e-03,-5.292951879001729643e-02,-2.263284792800318405e-03,2.378114632910645690e-02,-7.695315459241471555e-03,2.504417803261203140e-02 +5.563323814515162002e-01,-4.552765501506348018e-03,-5.326338894556995052e-02,-2.274975873190434282e-03,2.381886840219986715e-02,-7.878324514641751874e-03,2.520311022559760389e-02 +5.580726776394524524e-01,-4.197007517668454972e-03,-5.356724760321823764e-02,-2.286719642653747483e-03,2.385653075206659729e-02,-8.065806219859252349e-03,2.534650228520387086e-02 +5.598105225803533536e-01,-3.833227398022070846e-03,-5.383890786309698806e-02,-2.298514075651529077e-03,2.389413043905134343e-02,-8.256813415854937291e-03,2.547323095972323093e-02 +5.615459086410068412e-01,-3.463462421118819988e-03,-5.407621442295477976e-02,-2.310356996833883188e-03,2.393166463069095959e-02,-8.450287180827022529e-03,2.558218981629637942e-02 +5.632788281990006585e-01,-3.089972112715807899e-03,-5.427705925165200729e-02,-2.322246082430124824e-03,2.396913061462675743e-02,-8.645054975902576738e-03,2.567229705457151415e-02 +5.650092736427564377e-01,-2.715241927480746467e-03,-5.443939738720886962e-02,-2.334178861645709330e-03,2.400652581629260027e-02,-8.839827504919071585e-03,2.574250501622542611e-02 +5.667372373715630074e-01,-2.341986827127301007e-03,-5.456126508117736806e-02,-2.346152719762767836e-03,2.404384781100373306e-02,-9.033197819509689031e-03,2.579180710977529642e-02 +5.684627117956096987e-01,-1.973153207730181206e-03,-5.464079679312977555e-02,-2.358164906103779948e-03,2.408109434231964219e-02,-9.223639961704757986e-03,2.581924857300049725e-02 +5.701856893360195411e-01,-1.611920242422606271e-03,-5.467624417693709576e-02,-2.370212537279375083e-03,2.411826333676219541e-02,-9.409509329403655323e-03,2.582393503274327262e-02 +5.719061624248831244e-01,-1.261698808045422140e-03,-5.466599489115611665e-02,-2.382292607897670833e-03,2.415535292272042583e-02,-9.589041510925261705e-03,2.580504275845994441e-02 +5.736241235052911280e-01,-9.261304319102735489e-04,-5.460859229190894587e-02,-2.394401996968391951e-03,2.419236144123497120e-02,-9.760354327043948078e-03,2.576182844070237440e-02 +5.753395650313679610e-01,-6.090846435926782539e-04,-5.450275529619649090e-02,-2.406537481181657279e-03,2.422928747061102797e-02,-9.921450212287718715e-03,2.569363962782829358e-02 +5.770524794683048464e-01,-3.146526948866030203e-04,-5.434739836775441535e-02,-2.418695742378199932e-03,2.426612983582441749e-02,-1.007021526515019909e-02,2.559992569287383377e-02 +5.787628592923925730e-01,-4.714379648145546850e-05,-5.414165253601706945e-02,-2.430873382221655429e-03,2.430288762475949632e-02,-1.020442663032882467e-02,2.548024739025515559e-02 +5.804706969910552461e-01,1.889237184981392066e-04,-5.388488603642094948e-02,-2.443066936343243640e-03,2.433956020854500732e-02,-1.032175410265394680e-02,2.533428740036814536e-02 +5.821759850628824839e-01,3.888316357764968426e-04,-5.357672497652296584e-02,-2.455272895199278951e-03,2.437614725285134482e-02,-1.041976480919807983e-02,2.516186084179744195e-02 +5.838787160176629465e-01,5.476719133657354834e-04,-5.321707264114552971e-02,-2.467487715543401853e-03,2.441264873385428844e-02,-1.049593029227778411e-02,2.496292807626504864e-02 +5.855788823764169759e-01,6.603603485409064496e-04,-5.280613137302532512e-02,-2.479707838361745308e-03,2.444906495242986019e-02,-1.054763080760012794e-02,2.473760213799781893e-02 +5.872764766714295703e-01,7.216497454247711597e-04,-5.234442210400475082e-02,-2.491929712823222256e-03,2.448539654248415090e-02,-1.057216459663198709e-02,2.448615972446594943e-02 +5.889714914462828022e-01,7.261455481014917994e-04,-5.183280343239073107e-02,-2.504149818192163545e-03,2.452164449130256330e-02,-1.056675659934020600e-02,2.420905188770005809e-02 +5.906639192558894580e-01,6.683248746010447784e-04,-5.127249076395702654e-02,-2.516364684440150924e-03,2.455781014641653104e-02,-1.052856485659460842e-02,2.390691314159108447e-02 +5.923537526665245689e-01,5.425543765633647963e-04,-5.066507457121285224e-02,-2.528570920106984910e-03,2.459389522554116317e-02,-1.045469316045933607e-02,2.358057043773841358e-02 +5.940409842558590503e-01,3.431120193603384369e-04,-5.001253718769282930e-02,-2.540765235334604619e-03,2.462990182656391025e-02,-1.034220035514689558e-02,2.323105277910470293e-02 +5.957256066129916761e-01,6.420916297703325787e-05,-4.931726941269752584e-02,-2.552944470835794965e-03,2.466583243472990003e-02,-1.018811300656788203e-02,2.285959875345012701e-02 +5.974076123384820525e-01,-2.999850022790464805e-04,-4.858208539585035651e-02,-2.565105622902899649e-03,2.470168993052671361e-02,-9.989437493647902386e-03,2.246766452888225921e-02 +5.990869940443830366e-01,-7.553170291874799396e-04,-4.781023758723422501e-02,-2.577245881538098304e-03,2.473747759139326846e-02,-9.743172083249217880e-03,2.205692863776035836e-02 +6.007637443542724887e-01,-1.307623537035624032e-03,-4.700542602054139096e-02,-2.589362654209268356e-03,2.477319909430468828e-02,-9.446323981150615140e-03,2.162930397511467900e-02 +6.024378559032870228e-01,-1.962700942102987584e-03,-4.617181295464990037e-02,-2.601453595727806593e-03,2.480885852313084033e-02,-9.095921167331030843e-03,2.118693602222371936e-02 +6.041093213381530935e-01,-2.726275492482173324e-03,-4.531402950353675269e-02,-2.613516650528236070e-03,2.484446035563248750e-02,-8.689027999012947229e-03,2.073221205494587757e-02 +6.057781333172199689e-01,-3.603974281557623376e-03,-4.443718302168647716e-02,-2.625550075333210522e-03,2.488000947255432707e-02,-8.222765619996854139e-03,2.026776459945230438e-02 +6.074442845104918165e-01,-4.601289133137083549e-03,-4.354686417610582200e-02,-2.637552478298357990e-03,2.491551114823704199e-02,-7.694321724793521701e-03,1.979647085172149198e-02 +6.091077675996596774e-01,-5.723546295150138200e-03,-4.264914887548826306e-02,-2.649522857904028708e-03,2.495097104072629968e-02,-7.100973657974318266e-03,1.932145684280381012e-02 +6.107685752781343291e-01,-6.975869841777673601e-03,-4.175059915061378468e-02,-2.661460632200288688e-03,2.498639518428082129e-02,-6.440101860814127603e-03,1.884609841650281697e-02 +6.124267002510773716e-01,-8.363148657397264166e-03,-4.085826342918875975e-02,-2.673365676129242901e-03,2.502178997578895225e-02,-5.709211854045681891e-03,1.837401794895763413e-02 +6.140821352354338680e-01,-9.889998828787270255e-03,-3.997967205605180291e-02,-2.685238359575136975e-03,2.505716216482885284e-02,-4.905949085417988381e-03,1.790908437128641006e-02 +6.157348729599646520e-01,-1.156072837392766763e-02,-3.912283179423937246e-02,-2.697079581526661690e-03,2.509251883074142125e-02,-4.028119265645805844e-03,1.745540929425213814e-02 +6.173849061652770809e-01,-1.337930063421793360e-02,-3.829621712077783957e-02,-2.708890804882138625e-03,2.512786736717983679e-02,-3.073706918906662835e-03,1.701734316181776480e-02 +6.190322276038583427e-01,-1.534929758037369314e-02,-3.750875991549935257e-02,-2.720674091906283548e-03,2.516321545949439312e-02,-2.040894088058007195e-03,1.659946828964073764e-02 +6.206768300401065419e-01,-1.747388226285521984e-02,-3.676983554475805882e-02,-2.732432140192114344e-03,2.519857105750356041e-02,-9.280773314672289068e-04,1.620659241587034394e-02 +6.223187062503623412e-01,-1.975576427351993097e-02,-3.608924700475986402e-02,-2.744168318538670655e-03,2.523394234679885925e-02,2.661101732345848379e-04,1.584373955259643293e-02 +6.239578490229409358e-01,-2.219716238003700479e-02,-3.547720480760925554e-02,-2.755886696666568110e-03,2.526933772583676505e-02,1.542782812663559197e-03,1.551614242260830438e-02 +6.255942511581639165e-01,-2.479976908144861797e-02,-3.494430747190726411e-02,-2.767592077439142914e-03,2.530476576545731895e-02,2.902784562649525762e-03,1.522922733699686761e-02 +6.272279054683905786e-01,-2.756471715253102500e-02,-3.450151552026646529e-02,-2.779290032506842069e-03,2.534023517898514877e-02,4.346669192613888434e-03,1.498860482657846278e-02 +6.288588047780496737e-01,-3.049254433229192304e-02,-3.416012524624118668e-02,-2.790986925716468833e-03,2.537575478259691472e-02,5.874686328357048640e-03,1.480005432300485375e-02 +6.304869419236707184e-01,-3.358316197469096709e-02,-3.393173944857825336e-02,-2.802689945941406503e-03,2.541133345084909886e-02,7.486762937802413954e-03,1.466950821658355789e-02 +6.321123097539158575e-01,-3.683582440689327914e-02,-3.382823456333931139e-02,-2.814407131655888121e-03,2.544698007894420633e-02,9.182486815502747049e-03,1.460303646631724503e-02 +6.337349011296107282e-01,-4.024909780343995103e-02,-3.386172638837021270e-02,-2.826147394280910212e-03,2.548270353494160381e-02,1.096109477152863547e-02,1.460682779050210753e-02 +6.353547089237763235e-01,-4.382083454972976466e-02,-3.404453325661354474e-02,-2.837920543019131683e-03,2.551851261005610647e-02,1.282145479078914893e-02,1.468716976060114622e-02 +6.369717260216599675e-01,-4.754814596227195655e-02,-3.438913503952886752e-02,-2.849737302398633234e-03,2.555441597492817338e-02,1.476205599469151801e-02,1.485043104774827830e-02 +6.385859453207668457e-01,-5.142737897440792516e-02,-3.490813328861434861e-02,-2.861609327074971908e-03,2.559042211866505157e-02,1.678099633691516532e-02,1.510303611905997392e-02 +6.401973597308906472e-01,-5.545409589792069799e-02,-3.561420584243248783e-02,-2.873549222947639065e-03,2.562653930144264194e-02,1.887597013631898274e-02,1.545144517802832761e-02 +6.418059621741456500e-01,-5.962305425088464811e-02,-3.652006113705127954e-02,-2.885570550710630806e-03,2.566277549473815894e-02,2.104426150876158308e-02,1.590212984682605468e-02 +6.434117455849966971e-01,-6.392819230187350710e-02,-3.763839128392340583e-02,-2.897687842994271529e-03,2.569913832629884196e-02,2.328273350439414788e-02,1.646154669635844364e-02 +6.450147029102913931e-01,-6.836261402435273027e-02,-3.898182006406720329e-02,-2.909916602683461535e-03,2.573563501910090812e-02,2.558782515390236115e-02,1.713611622768985987e-02 +6.466148271092899691e-01,-7.291858035103639113e-02,-4.056285504375364143e-02,-2.922273306081833461e-03,2.577227232774071536e-02,2.795554386121653842e-02,1.793219045965002006e-02 +6.482121111536969238e-01,-7.758750181879725538e-02,-4.239383246977530828e-02,-2.934775407543647411e-03,2.580905647934581340e-02,3.038146267640845152e-02,1.885603083078868369e-02 +6.498065480276917771e-01,-8.235993496110502465e-02,-4.448686440911468754e-02,-2.947441325227855360e-03,2.584599311341970185e-02,3.286071833137788056e-02,1.991377914019339807e-02 +6.513981307279594901e-01,-8.722558040480082875e-02,-4.685378529879280496e-02,-2.960290440664983206e-03,2.588308721413366820e-02,3.538801413918501310e-02,2.111142733648085565e-02 +6.529868522637218842e-01,-9.217328808452519839e-02,-4.950609405853014650e-02,-2.973343082546300344e-03,2.592034304480566806e-02,3.795761784416495227e-02,2.245479388808599777e-02 +6.545727056567677282e-01,-9.719106240717521883e-02,-5.245490068644492743e-02,-2.986620506876932514e-03,2.595776408929963142e-02,4.056336806543879570e-02,2.394949054972953309e-02 +6.561556839414840470e-01,-1.022660723895481988e-01,-5.571086900884465037e-02,-3.000144883347812159e-03,2.599535298071329117e-02,4.319868008085662320e-02,2.560089570019581304e-02 +6.577357801648857638e-01,-1.073846664679499741e-01,-5.928416067179945181e-02,-3.013939267403014863e-03,2.603311143891909665e-02,4.585655164482158042e-02,2.741412528732436321e-02 +6.593129873866474533e-01,-1.125323895910403244e-01,-6.318437880995432521e-02,-3.028027574021293581e-03,2.607104020792661361e-02,4.852957340697641037e-02,2.939400490152264608e-02 +6.608872986791325399e-01,-1.176940042945176373e-01,-6.742051271649823763e-02,-3.042434539643781161e-03,2.610913899075900657e-02,5.120994079482572847e-02,3.154504109430325554e-02 +6.624587071274248284e-01,-1.228535165236212534e-01,-7.200088395639206440e-02,-3.057185692505725964e-03,2.614740638684929022e-02,5.388946593045026295e-02,3.387139171908003232e-02 +6.640272058293580359e-01,-1.279942037319446169e-01,-7.693309056304671600e-02,-3.072307307910984286e-03,2.618583983212866007e-02,5.655959417890139146e-02,3.637684220462571866e-02 +6.655927878955469890e-01,-1.330986480956236340e-01,-8.222395646391167379e-02,-3.087826363097438079e-03,2.622443553748722220e-02,5.921141992571390261e-02,3.906477497575311375e-02 +6.671554464494171555e-01,-1.381487717665791370e-01,-8.787947912269913897e-02,-3.103770491491329026e-03,2.626318843278601864e-02,6.183570722933440927e-02,4.193814577979520369e-02 +6.687151746272345099e-01,-1.431258768054148034e-01,-9.390478077055465500e-02,-3.120167929239756836e-03,2.630209210280084683e-02,6.442291023123673799e-02,4.499945745149643017e-02 +6.702719655781371744e-01,-1.480106900343777510e-01,-1.003040601326070647e-01,-3.137047458525947692e-03,2.634113874057313412e-02,6.696319266399394787e-02,4.825073751474656669e-02 +6.718258124641639517e-01,-1.527834071611781641e-01,-1.070805481510108403e-01,-3.154438345247920390e-03,2.638031909484446491e-02,6.944645679755100109e-02,5.169351364268310461e-02 +6.733767084602854114e-01,-1.574237443624530997e-01,-1.142364648821332235e-01,-3.172370278462692452e-03,2.641962241687963522e-02,7.186236617625370304e-02,5.532879284053045243e-02 +6.749246467544329775e-01,-1.619109906993773107e-01,-1.217729791647540888e-01,-3.190873300139900667e-03,2.645903641830076383e-02,7.420037423768664764e-02,5.915704191198484230e-02 +6.764696205475297930e-01,-1.662240640010620440e-01,-1.296901720006503611e-01,-3.209977734332061013e-03,2.649854722493517542e-02,7.644975349645005691e-02,6.317816797794566230e-02 +6.780116230535196964e-01,-1.703415705682612691e-01,-1.379870026097376745e-01,-3.229714114413499357e-03,2.653813934107926431e-02,7.859962421837321522e-02,6.739150150467185241e-02 +6.795506474993976420e-01,-1.742418649586419677e-01,-1.466612774544785447e-01,-3.250113106427972558e-03,2.657779561389653905e-02,8.063898914378601901e-02,7.179578177317390286e-02 +6.810866871252394539e-01,-1.779031146187610157e-01,-1.557096231530630537e-01,-3.271205426831820876e-03,2.661749719590360275e-02,8.255676483971248480e-02,7.638914352161418897e-02 +6.826197351842306915e-01,-1.813033664169590442e-01,-1.651274636051876521e-01,-3.293021760892233477e-03,2.665722352208102497e-02,8.434181468410693883e-02,8.116910457707847415e-02 +6.841497849426976252e-01,-1.844206133697644245e-01,-1.749090000306214310e-01,-3.315592678505868696e-03,2.669695228499403827e-02,8.598298613330969398e-02,8.613255730368635232e-02 +6.856768296801356577e-01,-1.872328652133459226e-01,-1.850471955462376084e-01,-3.338948545318284021e-03,2.673665941408543203e-02,8.746914484682105639e-02,9.127576117281753498e-02 +6.872008626892394112e-01,-1.897182194498953089e-01,-1.955337640721674097e-01,-3.363119433907154916e-03,2.677631906441212475e-02,8.878921138267414914e-02,9.659433716205866050e-02 +6.887218772759319263e-01,-1.918549329103002299e-01,-2.063591631941707194e-01,-3.388135033160661735e-03,2.681590360536516629e-02,8.993219975745252170e-02,1.020832649572136486e-01 +6.902398667593944159e-01,-1.936214962194405809e-01,-2.175125916977307672e-01,-3.414024556930381898e-03,2.685538361599640669e-02,9.088725274684049393e-02,1.077368818478899176e-01 +6.917548244720952422e-01,-1.949967062879729984e-01,-2.289819907848657798e-01,-3.440816650441525914e-03,2.689472788802634018e-02,9.164368240303530366e-02,1.135488853595878339e-01 +6.932667437598194482e-01,-1.959597413236340013e-01,-2.407540505798519970e-01,-3.468539292729137455e-03,2.693390342447450972e-02,9.219100668286683842e-02,1.195123367549100202e-01 +6.947756179816976241e-01,-1.964902357299166402e-01,-2.528142217178618867e-01,-3.497219704591135198e-03,2.697287545821702021e-02,9.251898642940542539e-02,1.256196658873245386e-01 +6.962814405102357718e-01,-1.965683522919435822e-01,-2.651467295948212732e-01,-3.526884253439843860e-03,2.701160746092651915e-02,9.261766688210379406e-02,1.318626819996472077e-01 +6.977842047313435048e-01,-1.961748588226544798e-01,-2.777345954535794803e-01,-3.557558356348758822e-03,2.705006116808693697e-02,9.247740955996842926e-02,1.382325826422553805e-01 +6.992839040443636911e-01,-1.952911983035212629e-01,-2.905596604402163607e-01,-3.589266389479725625e-03,2.708819660956256897e-02,9.208893437266563176e-02,1.447199679163332087e-01 +7.007805318621018742e-01,-1.938995612620801057e-01,-3.036026143475309969e-01,-3.622031589847676727e-03,2.712597213555254308e-02,9.144335486295204762e-02,1.513148567307797954e-01 +7.022740816108536954e-01,-1.919829561115501682e-01,-3.168430300677910205e-01,-3.655875964542858614e-03,2.716334445224866806e-02,9.053221389641541439e-02,1.580067030170407560e-01 +7.037645467304356472e-01,-1.895252791675064719e-01,-3.302594005191138438e-01,-3.690820197897301883e-03,2.720026866894137765e-02,8.934751589265543803e-02,1.647844177821697620e-01 +7.052519206742123847e-01,-1.865113793029324063e-01,-3.438291822319352598e-01,-3.726883562056362668e-03,2.723669834821310781e-02,8.788176436701472527e-02,1.716363889173506962e-01 +7.067361969091264795e-01,-1.829271232114020040e-01,-3.575288410159535579e-01,-3.764083833654254235e-03,2.727258555449208963e-02,8.612799292673906093e-02,1.785505070702771424e-01 +7.082173689157260643e-01,-1.787594571194537907e-01,-3.713339014348144285e-01,-3.802437203055265835e-03,2.730788091306861690e-02,8.407979687520285295e-02,1.855141940197172490e-01 +7.096954301881950311e-01,-1.739964665052156356e-01,-3.852190027006928696e-01,-3.841958188324095386e-03,2.734253367452679426e-02,8.173136188155485926e-02,1.925144282881262048e-01 +7.111703742343798984e-01,-1.686274309840504471e-01,-3.991579566986546146e-01,-3.882659561302815969e-03,2.737649178222066604e-02,7.907749444128026051e-02,1.995377756288114479e-01 +7.126421945758196763e-01,-1.626428788332861253e-01,-4.131238085612063959e-01,-3.924552267741201160e-03,2.740970194498439788e-02,7.611364517139845853e-02,2.065704231518987743e-01 +7.141108847477731780e-01,-1.560346345798507284e-01,-4.270889028535198584e-01,-3.967645354531402178e-03,2.744210971818659278e-02,7.283593673004790103e-02,2.135982108660799406e-01 +7.155764382992484407e-01,-1.487958652811644245e-01,-4.410249507215481812e-01,-4.011945900723998260e-03,2.747365957878268594e-02,6.924118529519238119e-02,2.206066688299615497e-01 +7.170388487930305921e-01,-1.409211227450218040e-01,-4.549031012544411667e-01,-4.057458950457584930e-03,2.750429501561537510e-02,6.532692029341159723e-02,2.275810532629677085e-01 +7.184981098057098281e-01,-1.324063805801055094e-01,-4.686940148658860772e-01,-4.104187450074489422e-03,2.753395861897216940e-02,6.109140402136453013e-02,2.345063851481253847e-01 +7.199542149277103897e-01,-1.232490675413011050e-01,-4.823679402931709270e-01,-4.152132193978716988e-03,2.756259217395897951e-02,5.653364801755921681e-02,2.413674877164432575e-01 +7.214071577633178745e-01,-1.134480968538220114e-01,-4.958947910213242216e-01,-4.201291771718547799e-03,2.759013675732365986e-02,5.165342642134634205e-02,2.481490300372869162e-01 +7.228569319307079910e-01,-1.030038898936565667e-01,-5.092442267389474253e-01,-4.251662517623250488e-03,2.761653283785601987e-02,4.645128904814942866e-02,2.548355655424100474e-01 +7.243035310619742040e-01,-9.191839693346641471e-02,-5.223857338881044976e-01,-4.303238470776202772e-03,2.764172037778348315e-02,4.092856883231715120e-02,2.614115759381091530e-01 +7.257469488031560445e-01,-8.019511174273917520e-02,-5.352887095203682222e-01,-4.356011335626478777e-03,2.766563894344057847e-02,3.508738943434068502e-02,2.678615119128830435e-01 +7.271871788142661996e-01,-6.783908150489421918e-02,-5.479225445073671041e-01,-4.409970455976650570e-03,2.768822781101833164e-02,2.893067009233759798e-02,2.741698373424529267e-01 +7.286242147693199334e-01,-5.485691191944318951e-02,-5.602567072279777127e-01,-4.465102782427047938e-03,2.770942607668628460e-02,2.246212783365863705e-02,2.803210741703939801e-01 +7.300580503563610657e-01,-4.125676827333332741e-02,-5.722608300493529088e-01,-4.521392853881620738e-03,2.772917276589709906e-02,1.568627549241219041e-02,2.862998428957011510e-01 +7.314886792774907276e-01,-2.704837086070711535e-02,-5.839047929575642559e-01,-4.578822784315339858e-03,2.774740695242769450e-02,8.608418535231653762e-03,2.920909085230357083e-01 +7.329160952488951164e-01,-1.224298492385302375e-02,-5.951588087742235578e-01,-4.637372251584418945e-03,2.776406786935170859e-02,1.234650382171824249e-03,2.976792229303369952e-01 +7.343402920008724744e-01,3.146592514479088377e-03,-6.059935076168793122e-01,-4.697018497090830157e-03,2.777909502978521267e-02,-6.428158163210730310e-03,3.030499673213165046e-01 +7.357612632778612882e-01,1.910605428002015022e-02,-6.163800195710220642e-01,-4.757736329318987267e-03,2.779242833904160825e-02,-1.437236523404859280e-02,3.081885959808093878e-01 +7.371790028384670457e-01,3.561960494799773502e-02,-6.262900570374296816e-01,-4.819498131125545502e-03,2.780400820697085176e-02,-2.258957517452872890e-02,3.130808778805191372e-01 +7.385935044554904350e-01,5.266998995582020809e-02,-6.356959954188966533e-01,-4.882273874352496129e-03,2.781377566883234953e-02,-3.107065356102963263e-02,3.177129379446340107e-01 +7.400047619159539902e-01,7.023852924339824000e-02,-6.445709525950515939e-01,-4.946031140459415838e-03,2.782167250179699500e-02,-3.980574476308030435e-02,3.220712964542320478e-01 +7.414127690211299582e-01,8.830515587463683147e-02,-6.528888662084167960e-01,-5.010735151278093887e-03,2.782764133699551501e-02,-4.878429091101843529e-02,3.261429077609437344e-01 +7.428175195865669433e-01,1.068484572941059974e-01,-6.606245674450763605e-01,-5.076348801714179911e-03,2.783162577142955091e-02,-5.799505634609512794e-02,3.299152001383475752e-01 +7.442190074421178858e-01,1.258457221751451971e-01,-6.677538534724845043e-01,-5.142832693887981754e-03,2.783357048172168183e-02,-6.742615164674353734e-02,3.333761121002933869e-01 +7.456172264319661513e-01,1.452729904733237043e-01,-6.742535581198620953e-01,-5.210145183406600031e-03,2.783342133559879583e-02,-7.706506130487957018e-02,3.365141253733681670e-01 +7.470121704146531760e-01,1.651051092550770150e-01,-6.801016174619439747e-01,-5.278242432096110458e-03,2.783112549927831483e-02,-8.689866979735268171e-02,3.393183002233632894e-01 +7.484038332631057777e-01,1.853157895241867581e-01,-6.852771334257988789e-01,-5.347078461758018007e-03,2.782663153826232805e-02,-9.691329454095853890e-02,3.417783067562932819e-01 +7.497922088646621352e-01,2.058776675317930127e-01,-6.897604334575986318e-01,-5.416605216043217721e-03,2.781988952683274452e-02,-1.070947186945647722e-01,3.438844553766033374e-01 +7.511772911210995440e-01,2.267623708592703957e-01,-6.935331269737508020e-01,-5.486772624500842535e-03,2.781085114557031723e-02,-1.174282227369681242e-01,3.456277245822504951e-01 +7.525590739486605063e-01,2.479405855587577256e-01,-6.965781584433328177e-01,-5.557528677268242304e-03,2.779946977654421808e-02,-1.278986212309241033e-01,3.469997859565936982e-01 +7.539375512780798205e-01,2.693821268896287036e-01,-6.988798547777975712e-01,-5.628819501607351325e-03,2.778570059516374119e-02,-1.384902994353127137e-01,3.479930303262124824e-01 +7.553127170546113378e-01,2.910560118674273555e-01,-7.004239695626024442e-01,-5.700589437248178466e-03,2.776950066198757838e-02,-1.491872525099322833e-01,3.486005899823371168e-01 +7.566845652380541631e-01,3.129305356704149554e-01,-7.011977250504888293e-01,-5.772781123052851161e-03,2.775082900728581201e-02,-1.599731228602978561e-01,3.488163549318202272e-01 +7.580530898027795228e-01,3.349733484515136639e-01,-7.011898464017448651e-01,-5.845335591651271492e-03,2.772964671058822383e-02,-1.708312414622561448e-01,3.486349932491152326e-01 +7.594182847377570766e-01,3.571515346346019193e-01,-7.003905928673253012e-01,-5.918192355596876668e-03,2.770591697712301502e-02,-1.817446686230491382e-01,3.480519664973741323e-01 +7.607801440465815634e-01,3.794316932673672538e-01,-6.987917859039340929e-01,-5.991289507819665995e-03,2.767960521082256029e-02,-1.926962362007074714e-01,3.470635400540983406e-01 +7.621386617474984471e-01,4.017800202925613240e-01,-6.963868298661858303e-01,-6.064563820302531875e-03,2.765067908020727230e-02,-2.036685890038230640e-01,3.456667963643343944e-01 +7.634938318734313389e-01,4.241623905903087466e-01,-6.931707309925291272e-01,-6.137950847314359824e-03,2.761910858168548089e-02,-2.146442287322393727e-01,3.438596403020371395e-01 +7.648456484720069781e-01,4.465444422992255191e-01,-6.891401092121284266e-01,-6.211385032630302797e-03,2.758486609484049265e-02,-2.256055550896446660e-01,3.416408068434254597e-01 +7.661941056055823207e-01,4.688916592364656966e-01,-6.842932070087707208e-01,-6.284799816218169209e-03,2.754792643454662127e-02,-2.365349112399459108e-01,3.390098630935487467e-01 +7.675391973512698529e-01,4.911694560509112240e-01,-6.786298928036910150e-01,-6.358127746224749403e-03,2.750826689695783436e-02,-2.474146242573231624e-01,3.359672094502713646e-01 +7.688809178009646805e-01,5.133432600705993831e-01,-6.721516599432637573e-01,-6.431300591185603113e-03,2.746586729995142279e-02,-2.582270512391051098e-01,3.325140779358277121e-01 +7.702192610613689538e-01,5.353785960041873571e-01,-6.648616209822654222e-01,-6.504249454066061802e-03,2.742071002136185071e-02,-2.689546188873283006e-01,3.286525283816582044e-01 +7.715542212540194011e-01,5.572411674147623728e-01,-6.567644979783675563e-01,-6.576904889087143921e-03,2.737278002407590727e-02,-2.795798673297605341e-01,3.243854412693722433e-01 +7.728857925153117536e-01,5.788969381334445607e-01,-6.478666070796127441e-01,-6.649197018862942378e-03,2.732206488379270923e-02,-2.900854916237732284e-01,3.197165106325503547e-01 +7.742139689965273908e-01,6.003122124957698258e-01,-6.381758398661425691e-01,-6.721055649251062719e-03,2.726855480564180020e-02,-3.004543824362367888e-01,3.146502326226516266e-01 +7.755387448638585424e-01,6.214537137731178662e-01,-6.277016404267161676e-01,-6.792410388638605266e-03,2.721224263910194605e-02,-3.106696664943078834e-01,3.091918919046592884e-01 +7.768601142984342678e-01,6.422886616135077276e-01,-6.164549776670062986e-01,-6.863190766804795791e-03,2.715312388325948414e-02,-3.207147447187003730e-01,3.033475471124135736e-01 +7.781780714963458800e-01,6.627848464043606391e-01,-6.044483136014263680e-01,-6.933326351262289293e-03,2.709119668758271787e-02,-3.305733314154092328e-01,2.971240142725593003e-01 +7.794926106686721479e-01,6.829107017496691689e-01,-5.916955686941056669e-01,-7.002746864361615933e-03,2.702646185072270224e-02,-3.402294917756508719e-01,2.905288465475783233e-01 +7.808037260415053860e-01,7.026353760733401455e-01,-5.782120824809164894e-01,-7.071382301039361162e-03,2.695892280977461516e-02,-3.496676752995990234e-01,2.835703139890816682e-01 +7.821114118559759909e-01,7.219287987740369283e-01,-5.640145715463171783e-01,-7.139163044625981315e-03,2.688858562509165509e-02,-3.588727532617032590e-01,2.762573798151170545e-01 +7.834156623682784204e-01,7.407617469487944861e-01,-5.491210827089636481e-01,-7.206019982327041030e-03,2.681545896297358664e-02,-3.678300485039894818e-01,2.685996776610638515e-01 +7.847164718496960623e-01,7.591059062550206127e-01,-5.335509440972745487e-01,-7.271884612230110519e-03,2.673955406651178437e-02,-3.765253692135269725e-01,2.606074852244993356e-01 +7.860138345866267695e-01,7.769339308266665078e-01,-5.173247145851060935e-01,-7.336689158868166423e-03,2.666088473049170460e-02,-3.849450370980679326e-01,2.522916938892030836e-01 +7.873077448806070633e-01,7.942194990410431643e-01,-5.004641267854561537e-01,-7.400366681881730135e-03,2.657946726012429262e-02,-3.930759156282569999e-01,2.436637838167442782e-01 +7.885981970483383341e-01,8.109373651814404393e-01,-4.829920313406964594e-01,-7.462851179411697684e-03,2.649532043761961345e-02,-4.009054379575764382e-01,2.347357904948531582e-01 +7.898851854217111557e-01,8.270634103067260323e-01,-4.649323365144044651e-01,-7.524077697055743530e-03,2.640846547294365548e-02,-4.084216280398086374e-01,2.255202744955397687e-01 +7.911687043478299319e-01,8.425746858262657124e-01,-4.463099454863786453e-01,-7.583982423811093387e-03,2.631892595653619049e-02,-4.156131268972171888e-01,2.160302904623360998e-01 +7.924487481890386542e-01,8.574494570257178427e-01,-4.271506941467073659e-01,-7.642502791772386173e-03,2.622672780534866799e-02,-4.224692102495987145e-01,2.062793506153198786e-01 +7.937253113229446599e-01,8.716672396337584727e-01,-4.074812840734240993e-01,-7.699577572238246355e-03,2.613189920651913176e-02,-4.289798102911926425e-01,1.962813932775482328e-01 +7.949983881424437238e-01,8.852088357862697432e-01,-3.873292164002694604e-01,-7.755146964652956561e-03,2.603447055693137654e-02,-4.351355295603863138e-01,1.860507462637016329e-01 +7.962679730557449265e-01,8.980563636017671048e-01,-3.667227233951789733e-01,-7.809152687517999757e-03,2.593447439634885901e-02,-4.409276576493223465e-01,1.756020916350204408e-01 +7.975340604863950800e-01,9.101932842675084512e-01,-3.456906987445367285e-01,-7.861538060409720141e-03,2.583194534056571651e-02,-4.463481833727414028e-01,1.649504304968070767e-01 +7.987966448733025970e-01,9.216044245073047314e-01,-3.242626281287616097e-01,-7.912248085074419929e-03,2.572692001087570693e-02,-4.513898063523661963e-01,1.541110454052248835e-01 +8.000557206707632485e-01,9.322759965423206285e-01,-3.024685180424109832e-01,-7.961229521682744101e-03,2.561943695903641863e-02,-4.560459440535487019e-01,1.430994647560939470e-01 +8.013112823484830338e-01,9.421956124470586280e-01,-2.803388253723940737e-01,-8.008430961425125685e-03,2.550953658800748231e-02,-4.603107401107622287e-01,1.319314249714528542e-01 +8.025633243916036053e-01,9.513522956682081144e-01,-2.579043856538646429e-01,-8.053802895040713949e-03,2.539726107361358828e-02,-4.641790688107446661e-01,1.206228349050475213e-01 +8.038118413007259155e-01,9.597364882003091013e-01,-2.351963425054621470e-01,-8.097297775284317475e-03,2.528265428310484675e-02,-4.676465386451497674e-01,1.091897382866242466e-01 +8.050568275919344208e-01,9.673400551879572928e-01,-2.122460767480701394e-01,-8.138870078094466096e-03,2.516576169155794462e-02,-4.707094917311720472e-01,9.764827750798597827e-02 +8.062982777968216164e-01,9.741562846190556790e-01,-1.890851358044381703e-01,-8.178476355415681756e-03,2.504663029245855418e-02,-4.733650036235998448e-01,8.601465811625019775e-02 +8.075361864625111297e-01,9.801798829841713578e-01,-1.657451651593589981e-01,-8.216075287247385367e-03,2.492530851363480154e-02,-4.756108820482198296e-01,7.430511114793551808e-02 +8.087705481516828110e-01,9.854069694237573840e-01,-1.422578385765292108e-01,-8.251627729125408581e-03,2.480184612900436167e-02,-4.774456600236547943e-01,6.253585995269574094e-02 +8.100013574425958263e-01,9.898350647892654175e-01,-1.186547907638914739e-01,-8.285096750044292774e-03,2.467629416592492797e-02,-4.788685902923340842e-01,5.072308502153977799e-02 +8.112286089291126379e-01,9.934630776021626808e-01,-9.496755173228249580e-02,-8.316447671841311132e-03,2.454870481525637041e-02,-4.798796375820301718e-01,3.888288865364012553e-02 +8.124522972207226523e-01,9.962912870433002777e-01,-7.122748082601915165e-02,-8.345648101495403010e-03,2.441913133952221954e-02,-4.804794687118335017e-01,2.703126364999995992e-02 +8.136724169425666453e-01,9.983213224249321005e-01,-4.746570398731066870e-02,-8.372667957551356219e-03,2.428762798267646711e-02,-4.806694418511772149e-01,1.518405973366668965e-02 +8.148889627354590770e-01,9.995561407968461820e-01,-2.371305185430090667e-02,-8.397479494595546171e-03,2.415424987703311543e-02,-4.804515921344985618e-01,3.356952552691055535e-03 +8.161019292559127392e-01,1.000000000000000000e+00,-0.000000000000000000e+00,-8.420057321244801482e-03,2.401905295009302069e-02,-4.798286189842131400e-01,-8.434586850958382745e-03 +8.173113111761618477e-01,9.996584301630696556e-01,2.364338877968385350e-02,-8.440378415498443060e-03,2.388209383483639453e-02,-4.788038698558251816e-01,-2.017532162331663395e-02 +8.185171031841850242e-01,9.985382024868035833e-01,4.718752034919261595e-02,-8.458422134651026603e-03,2.374342977698206542e-02,-4.773813229131531610e-01,-3.185027703002016319e-02 +8.197192999837294991e-01,9.966472951115886714e-01,7.060332284249659640e-02,-8.474170220738491341e-03,2.360311854458921679e-02,-4.755655693719481603e-01,-4.344476730848053497e-02 +8.209178962943334268e-01,9.939948570511553116e-01,9.386229771623846863e-02,-8.487606800784208011e-03,2.346121833839791976e-02,-4.733617940323752715e-01,-5.494442172933758362e-02 +8.221128868513501997e-01,9.905911698770779594e-01,1.169365681883387104e-01,-8.498718385722645388e-03,2.331778770386953165e-02,-4.707757549635836480e-01,-6.633520976063464469e-02 +8.233042664059700977e-01,9.864476077246173702e-01,1.397989261181688536e-01,-8.507493863227988742e-03,2.317288544305274103e-02,-4.678137616491102602e-01,-7.760346316335207939e-02 +8.244920297252450458e-01,9.815765948427841447e-01,1.624228751616237920e-01,-8.513924486816331141e-03,2.302657052890415626e-02,-4.644826534334213219e-01,-8.873589891290514786e-02 +8.256761715921098199e-01,9.759915622365923360e-01,1.847826722447456216e-01,-8.518003861943058280e-03,2.287890202116781707e-02,-4.607897757542142037e-01,-9.971963897210844863e-02 +8.268566868054064711e-01,9.697069022194109156e-01,2.068533658883802140e-01,-8.519727926831416678e-03,2.272993898342593605e-02,-4.567429567655271194e-01,-1.105422295472391070e-01 +8.280335701799065307e-01,9.627379221583812763e-01,2.286108317444562610e-01,-8.519094930661110301e-03,2.257974040245790473e-02,-4.523504823359265625e-01,-1.211916590559819623e-01 +8.292068165463334362e-01,9.551007966624428525e-01,2.500318053984010302e-01,-8.516105409770656151e-03,2.242836510834482142e-02,-4.476210712166284189e-01,-1.316563746322477746e-01 +8.303764207513859574e-01,9.468125185033209323e-01,2.710939133159477321e-01,-8.510762157691302565e-03,2.227587169890327470e-02,-4.425638502220876069e-01,-1.419252954718103199e-01 +8.315423776577602899e-01,9.378908494068251001e-01,2.917756990706703024e-01,-8.503070193344926600e-03,2.212231846670051696e-02,-4.371883276730921253e-01,-1.519878283605715086e-01 +8.327046821441730362e-01,9.283542701103493222e-01,3.120566491490311956e-01,-8.493036727531416002e-03,2.196776332555731848e-02,-4.315043666127083011e-01,-1.618338773231687289e-01 +8.338633291053832997e-01,9.182219290278059720e-01,3.319172140007431993e-01,-8.480671122081959515e-03,2.181226374125259590e-02,-4.255221594030303423e-01,-1.714538555304901402e-01 +8.350183134522153328e-01,9.075135914260984427e-01,3.513388269876194792e-01,-8.465984853070176314e-03,2.165587666525701019e-02,-4.192522004968812777e-01,-1.808386945265886558e-01 +8.361696301115808527e-01,8.962495876944783779e-01,3.703039213981604294e-01,-8.448991466011327930e-03,2.149865847140559894e-02,-4.127052604018106030e-01,-1.899798505164995710e-01 +8.373172740265014680e-01,8.844507626774944509e-01,3.887959436740527353e-01,-8.429706530115094534e-03,2.134066489395827423e-02,-4.058923574889829466e-01,-1.988693112646179040e-01 +8.384612401561303274e-01,8.721384232088920108e-01,4.067993642195798509e-01,-8.408147589841834593e-03,2.118195096853252649e-02,-3.988247333524813643e-01,-2.074996011247569327e-01 +8.396015234757753243e-01,8.593342876586412826e-01,4.242996857009611045e-01,-8.384334117118013435e-03,2.102257097898150434e-02,-3.915138248978705660e-01,-2.158637844841471976e-01 +8.407381189769198571e-01,8.460604349491780418e-01,4.412834492590722579e-01,-8.358287457069516618e-03,2.086257840216782303e-02,-3.839712382457373674e-01,-2.239554669520552321e-01 +8.418710216672458113e-01,8.323392537394583846e-01,4.577382368071956642e-01,-8.330030776066792847e-03,2.070202586020136390e-02,-3.762087234432471838e-01,-2.317687978603640842e-01 +8.430002265706553199e-01,8.181933930916099884e-01,4.736526729334472052e-01,-8.299589005204071127e-03,2.054096507309825098e-02,-3.682381478089406524e-01,-2.392984676301580238e-01 +8.441257287272921905e-01,8.036457128393650340e-01,4.890164220346039925e-01,-8.266988783307897132e-03,2.037944681549978126e-02,-3.600714715265372901e-01,-2.465397083703161074e-01 +8.452475231935638877e-01,7.887192355263481502e-01,5.038201852643148237e-01,-8.232258399822797376e-03,2.021752087666432371e-02,-3.517207222479979967e-01,-2.534882892935767518e-01 +8.463656050421637378e-01,7.734370990442956950e-01,5.180556939367054747e-01,-8.195427733833830428e-03,2.005523602207719830e-02,-3.431979705854124041e-01,-2.601405134371228556e-01 +8.474799693620920227e-01,7.578225095311232895e-01,5.317157011441935310e-01,-8.156528194229540132e-03,1.989263995992216968e-02,-3.345153075509421092e-01,-2.664932125891539272e-01 +8.485906112586771854e-01,7.418986968634790902e-01,5.447939713290174124e-01,-8.115592658731657361e-03,1.972977931087058906e-02,-3.256848197860306993e-01,-2.725437411177461522e-01 +8.496975258535985898e-01,7.256888699893576655e-01,5.572852681478320802e-01,-8.072655410992547048e-03,1.956669957769072501e-02,-3.167185679482795169e-01,-2.782899682819627851e-01 +8.508007082849067260e-01,7.092161740343645437e-01,5.691853397148720628e-01,-8.027752078067305203e-03,1.940344512236883909e-02,-3.076285648188966104e-01,-2.837302709622501196e-01 +8.519001537070450825e-01,6.925036492309335490e-01,5.804909028824958295e-01,-7.980919567442198645e-03,1.924005914148742627e-02,-2.984267532113398791e-01,-2.888635239287807854e-01 +8.529958572908716841e-01,6.755741898221653274e-01,5.911996253128813938e-01,-7.932196003254468608e-03,1.907658364822786140e-02,-2.891249873044618446e-01,-2.936890902178919127e-01 +8.540878142236794091e-01,6.584505060840007973e-01,6.013101063068617691e-01,-7.881620661588002036e-03,1.891305945655206988e-02,-2.797350116479078719e-01,-2.982068100427276436e-01 +8.551760197092181937e-01,6.411550870164003380e-01,6.108218554524216248e-01,-7.829233907365248588e-03,1.874952616692391466e-02,-2.702684425594826245e-01,-3.024169902365073659e-01 +8.562604689677154601e-01,6.237101645877068945e-01,6.197352710949484145e-01,-7.775077129572419664e-03,1.858602215468417237e-02,-2.607367503682180288e-01,-3.063203907170365947e-01 +8.573411572358974331e-01,6.061376793339033897e-01,6.280516155630997410e-01,-7.719192678374710292e-03,1.842258456411947909e-02,-2.511512429724577222e-01,-3.099182139651301959e-01 +8.584180797670095675e-01,5.884592490089123329e-01,6.357729917294061561e-01,-7.661623803167939492e-03,1.825924930172423757e-02,-2.415230476144548943e-01,-3.132120891754029413e-01 +8.594912318308379762e-01,5.706961367618071934e-01,6.429023167975184139e-01,-7.602414586022354821e-03,1.809605103268063439e-02,-2.318630975475257650e-01,-3.162040590255776329e-01 +8.605606087137299687e-01,5.528692226538561849e-01,6.494432948891505353e-01,-7.541609880598387078e-03,1.793302318213814070e-02,-2.221821165005092025e-01,-3.188965663576120435e-01 +8.616262057186147016e-01,5.349989761450204551e-01,6.554003897809317358e-01,-7.479255251185191328e-03,1.777019793677054801e-02,-2.124906054238398234e-01,-3.212924384791391308e-01 +8.626880181650236068e-01,5.171054307332098032e-01,6.607787966788722933e-01,-7.415396909808485808e-03,1.760760624947969172e-02,-2.027988293258015939e-01,-3.233948714088031862e-01 +8.637460413891117073e-01,4.992081596885938333e-01,6.655844123095930875e-01,-7.350081656059247355e-03,1.744527784591137776e-02,-1.931168061959828153e-01,-3.252074156515417092e-01 +8.648002707436770464e-01,4.813262543246990544e-01,6.698238059534658007e-01,-7.283356817343013961e-03,1.728324123424898953e-02,-1.834542953145038846e-01,-3.267339588119829163e-01 +8.658507015981822263e-01,4.634783032825852689e-01,6.735041884125623213e-01,-7.215270189175378678e-03,1.712152371502409856e-02,-1.738207877936525370e-01,-3.279787109103751708e-01 +8.668973293387736145e-01,4.456823738443556615e-01,6.766333818539053135e-01,-7.145869978233177627e-03,1.696015139586735249e-02,-1.642254974247767674e-01,-3.289461871083230182e-01 +8.679401493683025492e-01,4.279559951575275800e-01,6.792197883625598065e-01,-7.075204744722232535e-03,1.679914920502256770e-02,-1.546773520201885910e-01,-3.296411921199092210e-01 +8.689791571063447684e-01,4.103161424508994792e-01,6.812723591809227708e-01,-7.003323347362268823e-03,1.663854090987183440e-02,-1.451849869307272334e-01,-3.300688028115828510e-01 +8.700143479892213927e-01,3.927792243417290163e-01,6.828005630928510872e-01,-6.930274887012193019e-03,1.647834913339953078e-02,-1.357567367494965371e-01,-3.302343520835478441e-01 +8.710457174700181326e-01,3.753610692926037662e-01,6.838143547792756571e-01,-6.856108654369834875e-03,1.631859537550185354e-02,-1.264006325303649592e-01,-3.301434126785205980e-01 +8.720732610186056055e-01,3.580769167653047313e-01,6.843241434111733135e-01,-6.780874078304592947e-03,1.615930003543106924e-02,-1.171243940494517688e-01,-3.298017806079392988e-01 +8.730969741216594304e-01,3.409414071255714718e-01,6.843407617660495212e-01,-6.704620674934293895e-03,1.600048243372345289e-02,-1.079354281800540688e-01,-3.292154578401711995e-01 +8.741168522826797682e-01,3.239685754130192108e-01,6.838754346675073315e-01,-6.627397996800146913e-03,1.584216083824506113e-02,-9.884082370570680676e-02,-3.283906366747441630e-01 +8.751328910220110835e-01,3.071718449200338541e-01,6.829397481561285677e-01,-6.549255585557381583e-03,1.568435248748404134e-02,-8.984735008392019662e-02,-3.273336833278033819e-01 +8.761450858768620176e-01,2.905640230448373607e-01,6.815456187387771703e-01,-6.470242924642139078e-03,1.552707361919573176e-02,-8.096145543885736873e-02,-3.260511221100576007e-01 +8.771534324013248174e-01,2.741572981056251135e-01,6.797052632885556278e-01,-6.390409394534433442e-03,1.537033949775147047e-02,-7.218926614040521716e-02,-3.245496192729344642e-01 +8.781579261663950975e-01,2.579632383960722697e-01,6.774311690558071364e-01,-6.309804227832551023e-03,1.521416444424086682e-02,-6.353658545850741912e-02,-3.228359676756273333e-01 +8.791585627599906028e-01,2.419927916157117165e-01,6.747360641340477416e-01,-6.228476468120382019e-03,1.505856186375165683e-02,-5.500889467800403054e-02,-3.209170717600384748e-01 +8.801553377869717476e-01,2.262562857598159061e-01,6.716328890409748453e-01,-6.146474928306472352e-03,1.490354427842003129e-02,-4.661135452146469155e-02,-3.187999317193923043e-01 +8.811482468691598235e-01,2.107634320274463791e-01,6.681347682339500160e-01,-6.063848150450397954e-03,1.474912335681848562e-02,-3.834880571002414335e-02,-3.164916291889133815e-01 +8.821372856453569833e-01,1.955233277589504270e-01,6.642549822853418551e-01,-5.980644367811404835e-03,1.459530994617087331e-02,-3.022577226593019598e-02,-3.139993132053370273e-01 +8.831224497713646704e-01,1.805444612706539331e-01,6.600069409806418319e-01,-5.896911468929478053e-03,1.444211410409662437e-02,-2.224646392691237082e-02,-3.113301860397398313e-01 +8.841037349200038253e-01,1.658347174267698620e-01,6.554041571083696338e-01,-5.812696962385888004e-03,1.428954513203991444e-02,-1.441477971765287198e-02,-3.084914894267608565e-01 +8.850811367811323160e-01,1.514013845411945891e-01,6.504602211129256739e-01,-5.728047942392538137e-03,1.413761160685848145e-02,-6.734311436021186807e-03,-3.054904909502892396e-01 +8.860546510616654770e-01,1.372511615142588959e-01,6.451887756674407060e-01,-5.643011056766074340e-03,1.398632141517575783e-02,7.916510255236069776e-04,-3.023344724269155792e-01 +8.870242734855938727e-01,1.233901669215756447e-01,6.396034924006066547e-01,-5.557632476989259572e-03,1.383568178639229748e-02,8.160115875540678923e-03,-2.990307161634236932e-01 +8.879899997940023937e-01,1.098239482501272846e-01,6.337180482847739604e-01,-5.471957867235294377e-03,1.368569932568816823e-02,1.536838388522017082e-02,-2.955864938193450975e-01 +8.889518257450892413e-01,9.655749202434019129e-02,6.275461033836498004e-01,-5.386032357915410650e-03,1.353638004660604084e-02,2.241404264140352959e-02,-2.920090547398457304e-01 +8.899097471141842464e-01,8.359523466665989833e-02,6.211012793912016328e-01,-5.299900517958953255e-03,1.338772940515452030e-02,2.929496043140230807e-02,-2.883056148665352758e-01 +8.908637596937675207e-01,7.094107417804927651e-02,6.143971387814910479e-01,-5.213606331435059922e-03,1.323975233205671766e-02,3.600928005098042589e-02,-2.844833465992374633e-01 +8.918138592934876652e-01,5.859838207520070424e-02,6.074471658699489574e-01,-5.127193171907821687e-03,1.309245326713255782e-02,4.255541160929561528e-02,-2.805493671977438708e-01 +8.927600417401808652e-01,4.657001672368455553e-02,6.002647469495695498e-01,-5.040703780137770901e-03,1.294583619107447965e-02,4.893202659455523490e-02,-2.765107310632766713e-01 +8.937023028778884326e-01,3.485833632416520528e-02,5.928631530274989236e-01,-4.954180243560689714e-03,1.279990465788219574e-02,5.513805005892639943e-02,-2.723744193036329864e-01 +8.946406385678755679e-01,2.346521306986682279e-02,5.852555224457017813e-01,-4.867663976049569664e-03,1.265466182547976895e-02,6.117265389300534373e-02,-2.681473316833188347e-01 +8.955750446886495686e-01,1.239204670997110652e-02,5.774548452165796375e-01,-4.781195698860733409e-03,1.251011048785269711e-02,6.703524800094669278e-02,-2.638362775278939587e-01 +8.965055171359771480e-01,1.639779691705765428e-03,5.694739470033393092e-01,-4.694815422964298754e-03,1.236625310603800543e-02,7.272547356892757053e-02,-2.594479694607622622e-01 +8.974320518229038646e-01,-8.791088251639095699e-03,5.613254753165481947e-01,-4.608562434656101985e-03,1.222309183846632952e-02,7.824319454353050041e-02,-2.549890151416415729e-01 +8.983546446797705531e-01,-1.890048470533193040e-02,5.530218863050053724e-01,-4.522475278352723442e-03,1.208062857058673535e-02,8.358848998503255745e-02,-2.504659100338452737e-01 +8.992732916542326427e-01,-2.868874912548549996e-02,5.445754316786395322e-01,-4.436591743215543941e-03,1.193886494373440528e-02,8.876164598895747160e-02,-2.458850320477788676e-01 +9.001879887112764766e-01,-3.815661744646463294e-02,5.359981474661980050e-01,-4.350948849451385415e-03,1.179780238282519128e-02,9.376314708004128418e-02,-2.412526348447199265e-01 +9.010987318332384088e-01,-4.730520627114222915e-02,5.273018423486995898e-01,-4.265582838014476789e-03,1.165744212515671362e-02,9.859366806577239684e-02,-2.365748440083721704e-01 +9.020055170198213457e-01,-5.613599683507438515e-02,5.184980890092623085e-01,-4.180529159435309242e-03,1.151778524581847582e-02,1.032540660895563678e-01,-2.318576496012124677e-01 +9.029083402881132869e-01,-6.465081943080062954e-02,5.095982138835991604e-01,-4.095822462947809259e-03,1.137883268483947351e-02,1.077453716910684450e-01,-2.271069037244571875e-01 +9.038071976726038681e-01,-7.285183704550857176e-02,5.006132896738594296e-01,-4.011496588339020543e-03,1.124058527092835005e-02,1.120687813016259110e-01,-2.223283148623407535e-01 +9.047020852252026790e-01,-8.074153012307870581e-02,4.915541269367061372e-01,-3.927584557375816098e-03,1.110304374737509751e-02,1.162256476777421277e-01,-2.175274458859590976e-01 +9.055929990152560283e-01,-8.832267968771555233e-02,4.824312681542416437e-01,-3.844118568445401094e-03,1.096620879389233541e-02,1.202174735353882251e-01,-2.127097092800652178e-01 +9.064799351295638186e-01,-9.559835299520672014e-02,4.732549814648944198e-01,-3.761129988424077603e-03,1.083008105059672307e-02,1.240459006517171253e-01,-2.078803648382117342e-01 +9.073628896723984205e-01,-1.025718859916759235e-01,4.640352555751937613e-01,-3.678649348336809032e-03,1.069466113905809604e-02,1.277127052332515666e-01,-2.030445169243938197e-01 +9.082418587655194386e-01,-1.092468693649708555e-01,4.547817950332960213e-01,-3.596706337649027523e-03,1.055994968137847871e-02,1.312197868986738347e-01,-1.982071127850537906e-01 +9.091168385481928071e-01,-1.156271324115511479e-01,4.455040165373365113e-01,-3.515329802126415833e-03,1.042594732180697654e-02,1.345691621734736165e-01,-1.933729405952666902e-01 +9.099878251772063331e-01,-1.217167279804717978e-01,4.362110459512985594e-01,-3.434547739204133268e-03,1.029265474422326812e-02,1.377629564432291676e-01,-1.885466277975703098e-01 +9.108548148268875710e-01,-1.275199174954327330e-01,4.269117152828679207e-01,-3.354387297624682619e-03,1.016007268998418873e-02,1.408033962956322938e-01,-1.837326410741997595e-01 +9.117178036891198101e-01,-1.330411563553189902e-01,4.176145618817269090e-01,-3.274874774742753993e-03,1.002820197467464876e-02,1.436928017419323578e-01,-1.789352836602340902e-01 +9.125767879733592824e-01,-1.382850793048271132e-01,4.083278265464425627e-01,-3.196035614414291428e-03,9.897043503640677239e-03,1.464335791328062186e-01,-1.741586961484841434e-01 +9.134317639066518169e-01,-1.432564864706469543e-01,3.990594524754489947e-01,-3.117894407663666673e-03,9.766598287219596664e-03,1.490282135104962769e-01,-1.694068570974589794e-01 +9.142827277336491587e-01,-1.479603288172466891e-01,3.898170858346209910e-01,-3.040474894305885897e-03,9.636867453124618038e-03,1.514792627749290133e-01,-1.646835819339003693e-01 +9.151296757166257345e-01,-1.524016957007766071e-01,3.806080762961042696e-01,-2.963799961206071802e-03,9.507852259559917626e-03,1.537893486797934639e-01,-1.599925231745967791e-01 +9.159726041354947501e-01,-1.565858005896807736e-01,3.714394770284063507e-01,-2.887891645459275940e-03,9.379554107571934016e-03,1.559611520642976701e-01,-1.553371728818624398e-01 +9.168115092878250660e-01,-1.605179688128360316e-01,3.623180473504115096e-01,-2.812771135703733341e-03,9.251974550949145296e-03,1.579974051121963985e-01,-1.507208612686314897e-01 +9.176463874888565186e-01,-1.642036244413731805e-01,3.532502535990732495e-01,-2.738458773914310713e-03,9.125115306056873904e-03,1.599008860914110930e-01,-1.461467597372574367e-01 +9.184772350715175726e-01,-1.676482787680801667e-01,3.442422718669390092e-01,-2.664974059148935391e-03,8.998978260264193368e-03,1.616744119783241340e-01,-1.416178814367783367e-01 +9.193040483864395318e-01,-1.708575176457699518e-01,3.352999904656112640e-01,-2.592335650865089120e-03,8.873565479796754335e-03,1.633208341222507354e-01,-1.371370832992748201e-01 +9.201268238019744139e-01,-1.738369909496380594e-01,3.264290133254206250e-01,-2.520561372812246074e-03,8.748879215510698901e-03,1.648430308456283466e-01,-1.327070672274082708e-01 +9.209455577042096053e-01,-1.765924010650332387e-01,3.176346632839875395e-01,-2.449668216551350100e-03,8.624921910414723447e-03,1.662439028040637590e-01,-1.283303823389783660e-01 +9.217602464969846254e-01,-1.791294922442286941e-01,3.089219855661096692e-01,-2.379672346599161475e-03,8.501696203480194058e-03,1.675263676612351038e-01,-1.240094276444919374e-01 +9.225708866019060039e-01,-1.814540401883845411e-01,3.002957523103205184e-01,-2.310589105833674769e-03,8.379204933745195219e-03,1.686933552942932180e-01,-1.197464534981968892e-01 +9.233774744583637117e-01,-1.835718424344017496e-01,2.917604668210679875e-01,-2.242433019425932924e-03,8.257451142434370575e-03,1.697478024487538983e-01,-1.155435642444470612e-01 +9.241800065235469264e-01,-1.854887088994196798e-01,2.833203675125431364e-01,-2.175217802204101013e-03,8.136438076057643867e-03,1.706926480601041463e-01,-1.114027220696607035e-01 +9.249784792724585758e-01,-1.872104527034345878e-01,2.749794338113802361e-01,-2.108956364633565901e-03,8.016169187348404224e-03,1.715308290171937977e-01,-1.073257477563118872e-01 +9.257728891979321029e-01,-1.887428813862113419e-01,2.667413904670608016e-01,-2.043660818498028563e-03,7.896648135676349531e-03,1.722652761481464068e-01,-1.033143249465595520e-01 +9.265632328106458981e-01,-1.900917890284878897e-01,2.586097132659423425e-01,-1.979342484318138799e-03,7.777878786451144014e-03,1.728989094692044148e-01,-9.937000224827913231e-02 +9.273495066391393982e-01,-1.912629479080211192e-01,2.505876343377373416e-01,-1.916011897458985832e-03,7.659865210759278162e-03,1.734346352587338436e-01,-9.549419653148710774e-02 +9.281317072298272963e-01,-1.922621010766110972e-01,2.426781475648709940e-01,-1.853678816103258202e-03,7.542611681640473513e-03,1.738753423497217399e-01,-9.168819641133441056e-02 +9.289098311470160851e-01,-1.930949557546425832e-01,2.348840143741670605e-01,-1.792352229336167989e-03,7.426122672621373808e-03,1.742238978256999693e-01,-8.795316534874482450e-02 +9.296838749729178231e-01,-1.937671759197199528e-01,2.272077700047417659e-01,-1.732040365029136053e-03,7.310402853586281217e-03,1.744831451527208421e-01,-8.429014414890752260e-02 +9.304538353076660107e-01,-1.942843767431918778e-01,2.196517292817754541e-01,-1.672750696713515631e-03,7.195457086866678502e-03,1.746558997593916640e-01,-8.070005463540790014e-02 +9.312197087693302455e-01,-1.946521179364291143e-01,2.122179921616098308e-01,-1.614489953378613453e-03,7.081290422668571650e-03,1.747449475595425350e-01,-7.718370396174224768e-02 +9.319814919939308773e-01,-1.948758985990578785e-01,2.049084506665383998e-01,-1.557264128101244717e-03,6.967908092798312525e-03,1.747530415420111005e-01,-7.374178648731349872e-02 +9.327391816354543286e-01,-1.949611519292948070e-01,1.977247945803971008e-01,-1.501078486473088249e-03,6.855315505350186023e-03,1.746828994857199391e-01,-7.037488811005117306e-02 +9.334927743658669730e-01,-1.949132400365332918e-01,1.906685179382310025e-01,-1.445937575858166277e-03,6.743518237176920893e-03,1.745372023319389232e-01,-6.708348920798051784e-02 +9.342422668751307890e-01,-1.947374500825358179e-01,1.837409248795004446e-01,-1.391845235098184471e-03,6.632522026843648034e-03,1.743185908955466878e-01,-6.386796883837286831e-02 +9.349876558712167940e-01,-1.944389894645556782e-01,1.769431365710172344e-01,-1.338804604031453840e-03,6.522332767534042008e-03,1.740296651833854180e-01,-6.072860696336539188e-02 +9.357289380801203649e-01,-1.940229820482553269e-01,1.702760966488337802e-01,-1.286818133162892508e-03,6.412956497103232748e-03,1.736729825342563649e-01,-5.766558945431533856e-02 +9.364661102458753383e-01,-1.934944652275905619e-01,1.637405783463696529e-01,-1.235887594466133403e-03,6.304399390458631167e-03,1.732510549773092767e-01,-5.467900990777905396e-02 +9.371991691305681105e-01,-1.928583857917375854e-01,1.573371901115500071e-01,-1.186014090263509332e-03,6.196667749741303216e-03,1.727663495743467503e-01,-5.176887424051968695e-02 +9.379281115143518477e-01,-1.921195980937540626e-01,1.510663820235492727e-01,-1.137198065186692053e-03,6.089767994697716826e-03,1.722212851710678005e-01,-4.893510370576030499e-02 +9.386529341954613637e-01,-1.912828603200459610e-01,1.449284520008997934e-01,-1.089439315610463751e-03,5.983706653219341559e-03,1.716182334112936192e-01,-4.617753819727464515e-02 +9.393736339902262200e-01,-1.903528328872317321e-01,1.389235515844517987e-01,-1.042737001930395691e-03,5.878490349170595319e-03,1.709595164410261936e-01,-4.349594022468322024e-02 +9.400902077330849371e-01,-1.893340764190848957e-01,1.330516926879910800e-01,-9.970896585274251298e-04,5.774125793507615262e-03,1.702474060552501001e-01,-4.088999692763178873e-02 +9.408026522765996491e-01,-1.882310493485909453e-01,1.273127528634944994e-01,-9.524952050963948714e-04,5.670619771929497760e-03,1.694841241790762121e-01,-3.835932470879149098e-02 +9.415109644914688714e-01,-1.870481069872487467e-01,1.217064814251768018e-01,-9.089509584132232992e-04,5.567979133673583297e-03,1.686718412056879557e-01,-3.590347207235720234e-02 +9.422151412665417114e-01,-1.857894999361514032e-01,1.162325051737153891e-01,-8.664536446487697968e-04,5.466210779670113186e-03,1.678126761520606358e-01,-3.352192301121836165e-02 +9.429151795088319687e-01,-1.844593734610499025e-01,1.108903346716127725e-01,-8.249994102955867650e-04,5.365321651509856633e-03,1.669086955672672290e-01,-3.121409914469278415e-02 +9.436110761435310135e-01,-1.830617658744735643e-01,1.056793692481227953e-01,-7.845838337712304896e-04,5.265318718887734666e-03,1.659619148381835940e-01,-2.897936400805519591e-02 +9.443028281140214419e-01,-1.816006087285474135e-01,1.005989024819246014e-01,-7.452019384723490092e-04,5.166208966993410950e-03,1.649742966125075372e-01,-2.681702620356332320e-02 +9.449904323818909546e-01,-1.800797260202371963e-01,9.564812815936511725e-02,-7.068482052327397036e-04,5.067999385031644703e-03,1.639477515616180880e-01,-2.472634140540753284e-02 +9.456738859269452346e-01,-1.785028336713423980e-01,9.082614541342129422e-02,-6.695165833972835986e-04,4.970696952625352875e-03,1.628841390934254174e-01,-2.270651563624430380e-02 +9.463531857472214925e-01,-1.768735402461454609e-01,8.613196321259919408e-02,-6.332005048330555629e-04,4.874308627026598117e-03,1.617852661861181041e-01,-2.075670948886995659e-02 +9.470283288590011228e-01,-1.751953469978727584e-01,8.156450636364204199e-02,-5.978928970553859610e-04,4.778841332100053005e-03,1.606528878991312359e-01,-1.887603922780110161e-02 +9.476993122968236927e-01,-1.734716476266185137e-01,7.712262028019932492e-02,-5.635861950140156045e-04,4.684301944968938258e-03,1.594887088212467163e-01,-1.706357991575583807e-02 +9.483661331134992656e-01,-1.717057292593879014e-01,7.280507541672472105e-02,-5.302723543867243197e-04,4.590697283240031609e-03,1.582943826009083066e-01,-1.531836887316582151e-02 +9.490287883801215019e-01,-1.699007727913896260e-01,6.861057219489337389e-02,-4.979428654122211844e-04,4.498034092185188883e-03,1.570715130260588732e-01,-1.363940791434748237e-02 +9.496872751860808703e-01,-1.680598540809154495e-01,6.453774552447144597e-02,-4.665887661955643909e-04,4.406319032432155607e-03,1.558216538340264634e-01,-1.202566604012104447e-02 +9.503415906390768608e-01,-1.661859444787519846e-01,6.058516962951127072e-02,-4.362006560660228291e-04,4.315558667604597193e-03,1.545463100784810750e-01,-1.047608125079883302e-02 +9.509917318651310847e-01,-1.642819119853526355e-01,5.675136178585819119e-02,-4.067687086330956225e-04,4.225759451653625029e-03,1.532469386080482221e-01,-8.989564090698830626e-03 +9.516376960085998205e-01,-1.623505224222832610e-01,5.303478686490203237e-02,-3.782826867193092601e-04,4.136927716286425451e-03,1.519249487870483473e-01,-7.564999369694755914e-03 +9.522794802321865593e-01,-1.603944409303059426e-01,4.943386140415667596e-02,-3.507319549721486817e-04,4.049069659495491830e-03,1.505817028528857182e-01,-6.201248459701528930e-03 +9.529170817169542174e-01,-1.584162328479029325e-01,4.594695715361138411e-02,-3.241054951771396770e-04,3.962191333115126952e-03,1.492185176653260947e-01,-4.897152261566090937e-03 +9.535504976623380147e-01,-1.564183657679067418e-01,4.257240554640722568e-02,-2.983919193768773187e-04,3.876298631373199192e-03,1.478366644585673850e-01,-3.651532126763376573e-03 +9.541797252861573542e-01,-1.544032102729479272e-01,3.930850060553650605e-02,-2.735794847527081661e-04,3.791397278846183726e-03,1.464373712640813785e-01,-2.463193400494336719e-03 +9.548047618246282564e-01,-1.523730426095019352e-01,3.615350339585250705e-02,-2.496561072117121742e-04,3.707492819114257925e-03,1.450218218751985477e-01,-1.330925757817993417e-03 +9.554256045323753499e-01,-1.503300455093910803e-01,3.310564446215518569e-02,-2.266093758659202412e-04,3.624590604424660946e-03,1.435911584446021572e-01,-2.535070084402308655e-04 +9.560422506824440836e-01,-1.482763103180773645e-01,3.016312776531379983e-02,-2.044265678404828433e-04,3.542695783927528146e-03,1.421464817751483811e-01,7.702962119774890140e-04 +9.566546975663126062e-01,-1.462138386678963342e-01,2.732413350099176241e-02,-1.830946627703439699e-04,3.461813293416640593e-03,1.406888525914521959e-01,1.741725957274185008e-03 +9.572629424939035347e-01,-1.441445445745539733e-01,2.458682152139844007e-02,-1.626003570539846538e-04,3.381947844870222993e-03,1.392192921307212616e-01,2.662031915022696101e-03 +9.578669827935962777e-01,-1.420702561213416648e-01,2.194933381610261400e-02,-1.429300785943104374e-04,3.303103916805741509e-03,1.377387836047703484e-01,3.532468881081621704e-03 +9.584668158122380266e-01,-1.399927175533140500e-01,1.940979771929949790e-02,-1.240700012066416893e-04,3.225285744408463735e-03,1.362482729777233226e-01,4.354295886363292005e-03 +9.590624389151560791e-01,-1.379135910306555357e-01,1.696632802775262239e-02,-1.060060597153502195e-04,3.148497310155397560e-03,1.347486704536853819e-01,5.128773566867901243e-03 +9.596538494861691637e-01,-1.358344591050542061e-01,1.461703028164529697e-02,-8.872396449260142384e-05,3.072742334900794865e-03,1.332408506672465576e-01,5.857163995006566155e-03 +9.602410449275984305e-01,-1.337568262092010407e-01,1.236000262599368932e-02,-7.220921589903356785e-05,2.998024269611552876e-03,1.317256547705384917e-01,6.540728125426087045e-03 +9.608240226602797751e-01,-1.316821211052982832e-01,1.019333846526914063e-02,-5.644711914481633088e-05,2.924346286577122252e-03,1.302038907566581416e-01,7.180724998522237425e-03 +9.614027801235747184e-01,-1.296116986832824003e-01,8.115128389862894670e-03,-4.142279921594362632e-05,2.851711272022226622e-03,1.286763350261238836e-01,7.778409843797648958e-03 +9.619773147753815090e-01,-1.275468422514836331e-01,6.123462807294144383e-03,-2.712121518670796737e-05,2.780121818141386651e-03,1.271437330431002821e-01,8.335033763987259245e-03 +9.625476240921463367e-01,-1.254887655555265713e-01,4.216433296350016041e-03,-1.352717498000631392e-05,2.709580216676606406e-03,1.256068005044071179e-01,8.851841278263369861e-03 +9.631137055688746562e-01,-1.234386147911452486e-01,2.392135183002803110e-03,-6.253499703793493915e-07,2.640088451834550314e-03,1.240662245138556968e-01,9.330070403927149744e-03 +9.636755567191421790e-01,-1.213974708472432135e-01,6.486686497880124903e-04,1.159971048320689637e-05,2.571648194487479236e-03,1.225226643099234930e-01,9.770950229934861550e-03 +9.642331750751055308e-01,-1.193663512327061416e-01,-1.015858858224933520e-03,2.316356426724240950e-05,2.504260796290594566e-03,1.209767525630558865e-01,1.017570115701082502e-02 +9.647865581875129104e-01,-1.173462120445598889e-01,-2.603331433318089271e-03,3.408186266571769032e-05,2.437927284280141851e-03,1.194290965644017044e-01,1.054553310604178923e-02 +9.653357036257157464e-01,-1.153379502548589897e-01,-4.115622962859768683e-03,4.437033586079226576e-05,2.372648355601379710e-03,1.178802787784468536e-01,1.088164496605883123e-02 +9.658806089776785786e-01,-1.133424057973587140e-01,-5.554596055352411209e-03,5.404477857335806683e-05,2.308424373823773045e-03,1.163308577257676502e-01,1.118522300590746137e-02 +9.664212718499893828e-01,-1.113603631544592271e-01,-6.922100092334411425e-03,6.312103590673340486e-05,2.245255364373369405e-03,1.147813697512120035e-01,1.145744113539892868e-02 +9.669576898678714505e-01,-1.093925539657085444e-01,-8.219970536067893255e-03,7.161498974769894171e-05,2.183141011152558032e-03,1.132323288189845401e-01,1.169945901612072514e-02 +9.674898606751921593e-01,-1.074396585648374342e-01,-9.450027399537114930e-03,7.954254431166474281e-05,2.122080653804607822e-03,1.116842282537611336e-01,1.191242194084298975e-02 +9.680177819344745194e-01,-1.055023079927968205e-01,-1.061407413450522579e-02,8.691961298038537590e-05,2.062073284651960627e-03,1.101375415211612169e-01,1.209746012078949558e-02 +9.685414513269068326e-01,-1.035810860777054143e-01,-1.171389669466195958e-02,9.376210468982722939e-05,2.003117546660173297e-03,1.085927228100868058e-01,1.225568783016748285e-02 +9.690608665523531284e-01,-1.016765311506106634e-01,-1.275126270147943809e-02,1.000859103453454943e-04,1.945211731713915228e-03,1.070502082208778516e-01,1.238820253748421953e-02 +9.695760253293631559e-01,-9.978913807360020871e-02,-1.372792019535836253e-02,1.059068896149217934e-04,1.888353779292404862e-03,1.055104162871660783e-01,1.249608498784900157e-02 +9.700869253951824867e-01,-9.791935983275935895e-02,-1.464559703947219092e-02,1.112408582670870922e-04,1.832541275655264704e-03,1.039737492292384796e-01,1.258039822873017501e-02 +9.705935645057625072e-01,-9.606760959424034962e-02,-1.550600014992538356e-02,1.161035752131961895e-04,1.777771453179710160e-03,1.024405932518081819e-01,1.264218712282936564e-02 +9.710959404357700775e-01,-9.423426245046126470e-02,-1.631081495668240330e-02,1.205107295730647592e-04,1.724041190970057201e-03,1.009113193326008767e-01,1.268247758381741211e-02 +9.715940509785973012e-01,-9.241965692007791944e-02,-1.706170417891769014e-02,1.244779285677251606e-04,1.671347015058695720e-03,9.938628438016111621e-02,1.270227724738715383e-02 +9.720878939463715174e-01,-9.062409684418311351e-02,-1.776030784013840624e-02,1.280206857843619261e-04,1.619685099432326353e-03,9.786583155398347056e-02,1.270257396198082485e-02 +9.725774671699647378e-01,-8.884785286967439299e-02,-1.840824248067103250e-02,1.311544088351213272e-04,1.569051267244078555e-03,9.635029126229538443e-02,1.268433592312958810e-02 +9.730627684990029724e-01,-8.709116432291404142e-02,-1.900710080210995731e-02,1.338943878583291017e-04,1.519440992829274462e-03,9.483998133011815879e-02,1.264851113112413529e-02 +9.735437958018759996e-01,-8.535424043421498130e-02,-1.955845096543746067e-02,1.362557842070165467e-04,1.470849403864231755e-03,9.333520828525362911e-02,1.259602762607485611e-02 +9.740205469657464699e-01,-8.363726218332842577e-02,-2.006383669553925622e-02,1.382536194098737535e-04,1.423271283934669654e-03,9.183626737981248622e-02,1.252779233374866179e-02 +9.744930198965594537e-01,-8.194038345491064246e-02,-2.052477636931990604e-02,1.399027640950918216e-04,1.376701075592391644e-03,9.034344380461951929e-02,1.244469193488291318e-02 +9.749612125190515455e-01,-8.026373278992793292e-02,-2.094276305669904789e-02,1.412179282297627337e-04,1.331132883508943151e-03,8.885701268489278049e-02,1.234759206063162397e-02 +9.754251227767596344e-01,-7.860741471210733189e-02,-2.131926442528314300e-02,1.422136502796456710e-04,1.286560478690885502e-03,8.737723977111563900e-02,1.223733685167596730e-02 +9.758847486320304521e-01,-7.697151093477229422e-02,-2.165572227610442976e-02,1.429042873808277793e-04,1.242977302372752849e-03,8.590438225259276750e-02,1.211474931231634050e-02 +9.763400880660293435e-01,-7.535608187482892295e-02,-2.195355221747949392e-02,1.433040060568270730e-04,1.200376470578684920e-03,8.443868889355787832e-02,1.198063150430874260e-02 +9.767911390787489267e-01,-7.376116795964299255e-02,-2.221414403145884758e-02,1.434267731486816514e-04,1.158750779179484663e-03,8.298040045663601649e-02,1.183576353651922576e-02 +9.772378996890179748e-01,-7.218679057202598282e-02,-2.243886120241352089e-02,1.432863463461592278e-04,1.118092709154497062e-03,8.152975070414629488e-02,1.168090421542125265e-02 +9.776803679345105191e-01,-7.063295342633114926e-02,-2.262904086803003978e-02,1.428962663556743553e-04,1.078394431586197650e-03,8.008696649137046764e-02,1.151679098955783395e-02 +9.781185418717538438e-01,-6.909964419976825922e-02,-2.278599417034530075e-02,1.422698480370362267e-04,1.039647814424772727e-03,7.865226728217923624e-02,1.134413922558374054e-02 +9.785524195761373667e-01,-6.758683461023555494e-02,-2.291100548705004777e-02,1.414201730191362745e-04,1.001844428397207897e-03,7.722586747951006025e-02,1.116364366175887600e-02 +9.789819991419210776e-01,-6.609448244911146630e-02,-2.300533321471956438e-02,1.403600824492869722e-04,9.649755530949449215e-04,7.580797499348909674e-02,1.097597702556824678e-02 +9.794072786822439758e-01,-6.462253207429742574e-02,-2.307020951063511183e-02,1.391021694067329424e-04,9.290321843698426266e-04,7.439879260096343705e-02,1.078179067433761990e-02 +9.798282563291319525e-01,-6.317091560158319408e-02,-2.310684029080126156e-02,1.376587724717365355e-04,8.940050407951710774e-04,7.299851790155574560e-02,1.058171482245758716e-02 +9.802449302335066728e-01,-6.173955399156676876e-02,-2.311640550244647291e-02,1.360419693115274403e-04,8.598845716119039645e-04,7.160734337685585593e-02,1.037635831712776027e-02 +9.806572985651929031e-01,-6.032835754600310990e-02,-2.310005907599839078e-02,1.342635707447663068e-04,8.266609638410530887e-04,7.022545746584983595e-02,1.016630907532106720e-02 +9.810653595129272819e-01,-5.893722737366090952e-02,-2.305892919952682793e-02,1.323351152222306546e-04,7.943241503435526096e-04,6.885304373312502169e-02,9.952133972878968518e-03 +9.814691112843658694e-01,-5.756605566400107427e-02,-2.299411834219272990e-02,1.302678636552249915e-04,7.628638178529504200e-04,6.749028218477173824e-02,9.734379258036812005e-03 +9.818685521060921406e-01,-5.621472699857221061e-02,-2.290670361216727915e-02,1.280727946774648269e-04,7.322694151867419933e-04,6.613734855273267765e-02,9.513570388603729763e-03 +9.822636802236245357e-01,-5.488311851895467247e-02,-2.279773684150825444e-02,1.257606000931753623e-04,7.025301617070970782e-04,6.479441563925587844e-02,9.290212428964453317e-03 +9.826544939014246749e-01,-5.357110130990804658e-02,-2.266824489404798554e-02,1.233416809044216695e-04,6.736350555817300929e-04,6.346165230089345632e-02,9.064790074116403640e-03 +9.830409914229045754e-01,-5.227854093321979839e-02,-2.251922971489836792e-02,1.208261436095893200e-04,6.455728834445788496e-04,6.213922394491317586e-02,8.837768200615974540e-03 +9.834231710904342005e-01,-5.100529752848368542e-02,-2.235166854508515571e-02,1.182237973668139025e-04,6.183322286631732232e-04,6.082729375991757781e-02,8.609592147834726344e-03 +9.838010312253490097e-01,-4.975122715524348677e-02,-2.216651446828321950e-02,1.155441510640530618e-04,5.919014803882638193e-04,5.952602154304467014e-02,8.380687413922355972e-03 +9.841745701679575076e-01,-4.851618216386038157e-02,-2.196469659615397918e-02,1.127964106269628612e-04,5.662688430663631199e-04,5.823556427459311569e-02,8.151460064419050952e-03 +9.845437862775482385e-01,-4.730001186032094934e-02,-2.174712012109456666e-02,1.099894770446563342e-04,5.414223459915674897e-04,5.695607606996401506e-02,7.922297422269410430e-03 +9.849086779323972252e-01,-4.610256243089171785e-02,-2.151466670893004141e-02,1.071319449570685414e-04,5.173498523899328983e-04,5.568770945000853928e-02,7.693568150982014672e-03 +9.852692435297746298e-01,-4.492367816834432620e-02,-2.126819476770326969e-02,1.042321014365203848e-04,4.940390684979584354e-04,5.443061410367644459e-02,7.465622598439858315e-03 +9.856254814859526370e-01,-4.376320216521992906e-02,-2.100854000672876601e-02,1.012979250896689200e-04,4.714775541100035263e-04,5.318493658486505432e-02,7.238792623035209331e-03 +9.859773902362116704e-01,-4.262097597544267646e-02,-2.073651561834519499e-02,9.833708517929503137e-05,4.496527322227478586e-04,5.195082188080083624e-02,7.013392151489614493e-03 +9.863249682348473879e-01,-4.149684030128544809e-02,-2.045291227899684663e-02,9.535694185159839442e-05,4.285518982700010221e-04,5.072841299712994856e-02,6.789718098080991387e-03 +9.866682139551777864e-01,-4.039063542631832898e-02,-2.015849879134840039e-02,9.236454657638758291e-05,4.081622298080385381e-04,4.951785096076506687e-02,6.568050102695554941e-03 +9.870071258895497524e-01,-3.930220196621843354e-02,-1.985402245837172841e-02,8.936664263560296017e-05,3.884707965071546379e-04,4.831927417036931127e-02,6.348650790320617930e-03 +9.873417025493456123e-01,-3.823138066697611553e-02,-1.954020931800482669e-02,8.636966602601870520e-05,3.694645702034762445e-04,4.713281947874953942e-02,6.131766309395721148e-03 +9.876719424649894608e-01,-3.717801305734641515e-02,-1.921776441807956232e-02,8.337974695886821478e-05,3.511304342813406599e-04,4.595862162139083967e-02,5.917626810321809268e-03 +9.879978441859542659e-01,-3.614194165947108572e-02,-1.888737229941587353e-02,8.040271158377787053e-05,3.334551939522057210e-04,4.479681342347893724e-02,5.706446547856398703e-03 +9.883194062807676428e-01,-3.512301007061307406e-02,-1.854969733131955900e-02,7.744408396680040366e-05,3.164255854253827656e-04,4.364752620062732624e-02,5.498424272916540081e-03 +9.886366273370181812e-01,-3.412106405250689423e-02,-1.820538398389700760e-02,7.450908849750016835e-05,3.000282864507256421e-04,4.251088822446034282e-02,5.293743754137957870e-03 +9.889495059613619965e-01,-3.313595020538635111e-02,-1.785505714074498221e-02,7.160265274491005414e-05,2.842499255817324964e-04,4.138702767309830610e-02,5.092574233586316841e-03 +9.892580407795287245e-01,-3.216751780891270035e-02,-1.749932247879869979e-02,6.872941067582534925e-05,2.690770915072855261e-04,4.027606960181813467e-02,4.895070765153091558e-03 +9.895622304363274058e-01,-3.121561811342249762e-02,-1.713876714601679160e-02,6.589370587392432159e-05,2.544963434269734886e-04,3.917813765448904806e-02,4.701374001764525509e-03 +9.898620735956522587e-01,-3.028010465160565698e-02,-1.677395961800718163e-02,6.309959513607583731e-05,2.404942199263213000e-04,3.809335382372301682e-02,4.511611528177928918e-03 +9.901575689404893410e-01,-2.936083377620486018e-02,-1.640545030899743031e-02,6.035085289328305410e-05,2.270572488889371747e-04,3.702183775109069153e-02,4.325897784312627420e-03 +9.904487151729213235e-01,-2.845766416186903611e-02,-1.603377229904767720e-02,5.765097501690136245e-05,2.141719567048303539e-04,3.596370793533271853e-02,4.144333774313019997e-03 +9.907355110141340404e-01,-2.757045737094739046e-02,-1.565944110695459413e-02,5.500318337249199730e-05,2.018248772474946686e-04,3.491908091315302359e-02,3.967008567718174721e-03 +9.910179552044214857e-01,-2.669907814459913931e-02,-1.528295516863957884e-02,5.241043094999326221e-05,1.900025614391593113e-04,3.388807092441101232e-02,3.793999479470020267e-03 +9.912960465031916968e-01,-2.584339398727954554e-02,-1.490479638608829815e-02,4.987540705011484600e-05,1.786915862199740400e-04,3.287079087309786180e-02,3.625372118178188559e-03 +9.915697836889719730e-01,-2.500327568480187682e-02,-1.452543052124221669e-02,4.740054240330185902e-05,1.678785634042842122e-04,3.186735151061943772e-02,3.461180724886131922e-03 +9.918391655594147593e-01,-2.417859702113750106e-02,-1.414530725996708191e-02,4.498801497479015609e-05,1.575501485140583453e-04,3.087786209562524281e-02,3.301469129056599193e-03 +9.921041909313020879e-01,-2.336923525690464201e-02,-1.376486108605283129e-02,4.263975564047607701e-05,1.476930494171602886e-04,2.990242960156964946e-02,3.146270183657264366e-03 +9.923648586405513505e-01,-2.257507075731322865e-02,-1.338451097316083541e-02,4.035745426515153659e-05,1.382940349915145562e-04,2.894115949631782814e-02,2.995607413477657452e-03 +9.926211675422206282e-01,-2.179598737427575900e-02,-1.300466129470104031e-02,3.814256616874578518e-05,1.293399433408379154e-04,2.799415508656714902e-02,2.849494375953142474e-03 +9.928731165105130207e-01,-2.103187228991064764e-02,-1.262570186575545997e-02,3.599631839525270076e-05,1.208176902712320517e-04,2.706151785237928542e-02,2.707935644734240678e-03 +9.931207044387818650e-01,-2.028261608927149282e-02,-1.224800852975619671e-02,3.391971649676000578e-05,1.127142772180906243e-04,2.614334733348551432e-02,2.570926766330971624e-03 +9.933639302395360637e-01,-1.954811268775598007e-02,-1.187194323217670086e-02,3.191355122856113244e-05,1.050167992765286147e-04,2.523974127107253204e-02,2.438455172391381081e-03 +9.936027928444443047e-01,-1.882825942550740705e-02,-1.149785461064750658e-02,2.997840577894910082e-05,9.771245280532341732e-05,2.435079542060543042e-02,2.310500116708639768e-03 +9.938372912043398344e-01,-1.812295713425914750e-02,-1.112607832828713637e-02,2.811466258227638383e-05,9.078854325371596391e-05,2.347660340180397071e-02,2.187033086073249775e-03 +9.940674242892252321e-01,-1.743210990420360901e-02,-1.075693710925728838e-02,2.632251090702755720e-05,8.423249247159166671e-05,2.261725709763925257e-02,2.068018761097285482e-03 +9.942931910882769619e-01,-1.675562527392303672e-02,-1.039074150500869133e-02,2.460195412627679673e-05,7.803184602644162393e-05,2.177284624788227568e-02,1.953414597984882636e-03 +9.945145906098497024e-01,-1.609341404632142966e-02,-1.002778980718211331e-02,2.295281737304863249e-05,7.217428026869416159e-05,2.094345873268208327e-02,1.843172000725134055e-03 +9.947316218814804545e-01,-1.544539034014216425e-02,-9.668368857219264423e-03,2.137475506260408414e-05,6.664760924819830568e-05,2.012918040429068772e-02,1.737235801708923158e-03 +9.949442839498933155e-01,-1.481147155888976746e-02,-9.312753808373548658e-03,1.986725869943623693e-05,6.143979144877438918e-05,1.933009506451538462e-02,1.635545695782528782e-03 +9.951525758810034761e-01,-1.419157832225636899e-02,-8.961209067180165438e-03,1.842966466640209614e-05,5.653893641129894513e-05,1.854628450425669858e-02,1.538035451386865048e-03 +9.953564967599211055e-01,-1.358563432426024113e-02,-8.613987908228518467e-03,1.706116209795179256e-05,5.193331101474173064e-05,1.777782867311672454e-02,1.444634597336275908e-03 +9.955560456909556821e-01,-1.299356643465783730e-02,-8.271333528829966433e-03,1.576080095027484885e-05,4.761134569664633265e-05,1.702480538645283203e-02,1.355267397281080055e-03 +9.957512217976196567e-01,-1.241530457239088669e-02,-7.933478843031950556e-03,1.452749967994392974e-05,4.356164048549677722e-05,1.628729045416770979e-02,1.269854174962800272e-03 +9.959420242226324493e-01,-1.185078167552634960e-02,-7.600646772854617074e-03,1.336005365126459944e-05,3.977297086970644525e-05,1.556535762373962087e-02,1.188311693584624034e-03 +9.961284521279242243e-01,-1.129993355425476173e-02,-7.273051079898272889e-03,1.225714290955972323e-05,3.623429341285483657e-05,1.485907873828575670e-02,1.110552508089381442e-03 +9.963105046946395538e-01,-1.076269897815272311e-02,-6.950895919279506154e-03,1.121734028051062000e-05,3.293475126005698496e-05,1.416852345196205863e-02,1.036486698598667938e-03 +9.964881811231409703e-01,-1.023901936245098267e-02,-6.634376614696700647e-03,1.023911959859312390e-05,2.986367942871721114e-05,1.349375969437385803e-02,9.660212993219736326e-04 +9.966614806330126308e-01,-9.728838812789568358e-03,-6.323679976257461609e-03,9.320863524037732159e-06,2.701060978282888644e-05,1.283485346058796557e-02,8.990605695048242956e-04 +9.968304024630633142e-01,-9.232104349056110063e-03,-6.018984213489051750e-03,8.460871635440704765e-06,2.436527613955787988e-05,1.219186826372438935e-02,8.355070076356367875e-04 +9.969949458713304180e-01,-8.748765309325749404e-03,-5.720459649461205956e-03,7.657368409958847030e-06,2.191761903182578271e-05,1.156486612373027953e-02,7.752608473843208619e-04 +9.971551101350828450e-01,-8.278773609536289185e-03,-5.428268581158579575e-03,6.908511057999187795e-06,1.965779016906932063e-05,1.095390695103116638e-02,7.182211378383504411e-04 +9.973108945508237788e-01,-7.822083523423936646e-03,-5.142565669704866199e-03,6.212397601178030333e-06,1.757615692088821565e-05,1.035904882953972601e-02,6.642858133065830239e-04 +9.974622984342947918e-01,-7.378651898616417348e-03,-4.863498523242124928e-03,5.567074468886495102e-06,1.566330656748850616e-05,9.780347482144428548e-03,6.133513846051392299e-04 +9.976093211204778433e-01,-6.948437646650437206e-03,-4.591207367798139961e-03,4.970544279610468817e-06,1.391005048851608361e-05,9.217857098287691661e-03,5.653143227406439179e-04 +9.977519619635988324e-01,-6.531401874763990457e-03,-4.325825882668780237e-03,4.420773487492777672e-06,1.230742784260145955e-05,8.671629959238518620e-03,5.200702411381425208e-04 +9.978902203371300406e-01,-6.127507989920371288e-03,-4.067480890988649234e-03,3.915699824198285166e-06,1.084670956297098056e-05,8.141716116873198850e-03,4.775152060853734550e-04 +9.980240956337933511e-01,-5.736720986144222545e-03,-3.816292824655071435e-03,3.453239933127513338e-06,9.519401708787280408e-06,7.628164605448173145e-03,4.375455766449899632e-04 +9.981535872655622477e-01,-5.359008368580337432e-03,-3.572376328054645885e-03,3.031296435056826571e-06,8.317249012432557793e-06,7.131021586517123471e-03,4.000575654016282892e-04 +9.982786946636650338e-01,-4.994339185706967582e-03,-3.335839791568366141e-03,2.647765127996628392e-06,7.232238240998270458e-06,6.650332043374117784e-03,3.649487866226967521e-04 +9.983994172785870536e-01,-4.642684465226085307e-03,-3.106785981385238194e-03,2.300542115273978128e-06,6.256601216170443058e-06,6.186138846088796994e-03,3.321177293825311439e-04 +9.985157545800730228e-01,-4.304016884212515624e-03,-2.885312241752144031e-03,1.987530594501292145e-06,5.382817876479706515e-06,5.738483255074413800e-03,3.014640125717156266e-04 +9.986277060571295827e-01,-3.978310819645307014e-03,-2.671510565929782408e-03,1.706647564623055681e-06,4.603619063070790429e-06,5.307404714388958521e-03,2.728888660812052815e-04 +9.987352712180274095e-01,-3.665542210473101119e-03,-2.465467708335562780e-03,1.455830451539447255e-06,3.911989239549590146e-06,4.892941001537783399e-03,2.462955144935107281e-04 +9.988384495903031013e-01,-3.365688646769479046e-03,-2.267265786095052846e-03,1.233043423051855739e-06,3.301169077564892413e-06,4.495127954789562320e-03,2.215886225900322022e-04 +9.989372407207620652e-01,-3.078728972927052485e-03,-2.076981902901714645e-03,1.036283549243232362e-06,2.764657866998271338e-06,4.114000115082276643e-03,1.986755533331593937e-04 +9.990316441754796273e-01,-2.804643691976337668e-03,-1.894688636901552389e-03,8.635869251551509009e-07,2.296215812153098987e-06,3.749589869434042343e-03,1.774659848327026076e-04 +9.991216595398032529e-01,-2.543414396914920120e-03,-1.720454331509790056e-03,7.130343463425631678e-07,1.889866252060460025e-06,3.401928422375527158e-03,1.578718756697975172e-04 +9.992072864183545455e-01,-2.295024133554283193e-03,-1.554342886247533050e-03,5.827569563531444280e-07,1.539897629834340417e-06,3.071045024589097363e-03,1.398083463007169440e-04 +9.992885244350309115e-01,-2.059457107960801104e-03,-1.396414296304939037e-03,4.709416180030933114e-07,1.240865529832216701e-06,2.756967434317333758e-03,1.231931351121621923e-04 +9.993653732330070039e-01,-1.836698607879872847e-03,-1.246724440430505487e-03,3.758360643265014741e-07,9.875943825947473918e-07,2.459721981514412931e-03,1.079474420815683351e-04 +9.994378324747364983e-01,-1.626735172701814685e-03,-1.105325412682650957e-03,2.957538874722902748e-07,7.751792452629376594e-07,2.179333169995226974e-03,9.399573086658502303e-05 +9.995059018419534258e-01,-1.429554247641565872e-03,-9.722656868078960804e-04,2.290792008661241901e-07,5.989873394557187879e-07,1.915824250792590036e-03,8.126582264516812024e-05 +9.995695810356738376e-01,-1.245144488634565647e-03,-8.475901103220174256e-04,1.742711225405102311e-07,4.546595777539535790e-07,1.669216579561803556e-03,6.968928707724419535e-05 +9.996288697761966935e-01,-1.073495244864381207e-03,-7.313401876252177995e-04,1.298679900878838228e-07,3.381119064844291873e-07,1.439530520070086885e-03,5.920126900654717203e-05 +9.996837678031056384e-01,-9.145969749582459141e-04,-6.235540054429734511e-04,9.449133012437933857e-08,2.455365711596457151e-07,1.226784601308217489e-03,4.974096332241033412e-05 +9.997342748752695574e-01,-7.684409899849103552e-04,-5.242663981669936366e-04,6.684961034703529577e-08,1.734033237201767020e-07,1.030995941073458355e-03,4.125161743727475837e-05 +9.997803907708440185e-01,-6.350194250793644076e-04,-4.335092111871701273e-04,4.574167470858940454e-08,1.184604863064570160e-07,8.521802451393129451e-04,3.368032732294278057e-05 +9.998221152872722728e-01,-5.143251664124431173e-04,-3.513110494462930188e-04,3.005995513519191915e-08,7.773592113264225598e-08,6.903518944425371439e-04,2.697877596109041090e-05 +9.998594482412856976e-01,-4.063519102345642081e-04,-2.776976634104294907e-04,1.879342967883480850e-08,4.853792215126924754e-08,5.455237889310121137e-04,2.110275373545859824e-05 +9.998923894689052405e-01,-3.110941117152204484e-04,-2.126918760101745970e-04,1.103026164395963603e-08,2.845601532535955110e-08,4.177074023491449584e-04,1.601251651657159116e-05 +9.999209388254417519e-01,-2.285469527285761157e-04,-1.563135655309518673e-04,5.960228782963606721e-09,1.536167519899422237e-08,3.069128058186614775e-04,1.167301578754831288e-05 +9.999450961854965403e-01,-1.587062173448171873e-04,-1.085800918782262395e-04,2.876816059576935398e-09,7.408930812848458131e-09,2.131488687944680348e-04,8.053274298460492197e-06 +9.999648614429623716e-01,-1.015685083271660865e-04,-6.950580241239230183e-05,1.179067087713578288e-09,3.034911746389828074e-09,1.364228249625791002e-04,5.127464331123271171e-06 +9.999802345110231361e-01,-5.713097211520982576e-05,-3.910252678857763643e-05,3.731588654434305282e-10,9.602973493015872819e-10,7.674076406058295114e-05,2.874108337536535870e-06 +9.999912153221552913e-01,-2.539143461237474631e-05,-1.737940420300304942e-05,7.366828647285503959e-11,1.896798212121883235e-10,3.410735776743149507e-05,1.276508016380705142e-06 +9.999978038281271964e-01,-6.348308155885659235e-06,-4.342936922034114525e-06,4.580574948692394836e-12,1.184942236291030780e-11,8.526018966727354561e-06,3.227257789899316567e-07 +1.000000000000000000e+00,-7.239436301828963784e-10,2.953069315321496946e-09,-1.811720345834967971e-16,3.373818578172117256e-17,-1.357394309818077933e-09,5.537004964380798958e-09 diff --git a/examples/TS_channel/box.nmsh b/examples/TS_channel/box.nmsh new file mode 100644 index 00000000000..230ce148757 Binary files /dev/null and b/examples/TS_channel/box.nmsh differ diff --git a/examples/advecting_cone/.gitignore b/examples/advecting_cone/.gitignore new file mode 100644 index 00000000000..afed0735dc9 --- /dev/null +++ b/examples/advecting_cone/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/examples/advecting_cone/README.md b/examples/advecting_cone/README.md new file mode 100644 index 00000000000..60bdf376c34 --- /dev/null +++ b/examples/advecting_cone/README.md @@ -0,0 +1,41 @@ +# Advecting cone + +This is a classical test case for demonstrating the benefit of a high order +discretization. It can be found in this classical reference +https://epubs.siam.org/doi/book/10.1137/1.9781611970425 in chapter 11. We +consider scalar advection of a cone of diameter 1 and height 1, with its center +initially positioned at (1, 0). The advecting velocity is a vortex with $u=-x +\pi$ and $v = y \pi$, meaning that the cone makes a full rotation in 2 seconds. + +The case is run for 1 rotation and the shape of the cone is examined. Ideally it +should be preserved, but in practice numerical error lead to the distortion of +the shape. The idea is to see how the order of the polynomial basis affects the +error, while keeping the total number of DoFs roughly the same. + +## Case setup +To keep the advecting velocity constant, we enable `freeze` for the `fluid`. The +initial velocity distribution is set in the user file, and similarly for the +scalar. We output generously: 100 data dumps during one rotation, so that one +can examine the dynamics during post-processing. + +To analyze the results, we use the `probes` simcomp to extract the values of the +scalar along a line aligned with $y$ in the center of the domain. A Python +script to create the probes is provided. + +The mesh is generated with `genmeshbox` with periodic conditions in all +directions, and a single element across $z$, to mimic a 2D case. + +The script `run.sh` can be used to run the study, computing the results for +several meshes and polynomial orders. We suggest inspecting the script and +modifying it to your liking. Note that the `change_order.py` script is used to +change the polynomial order in the case file. + +A Python script to plot the results is also provided for convenience. + + +## Expected results +The following curves will be obtained with the settings in the `run.sh` script. +One can see that the results improve with higher order, even if the number of +DoFs stays roughly the same. + +Results diff --git a/examples/advecting_cone/advecting_cone.case b/examples/advecting_cone/advecting_cone.case new file mode 100755 index 00000000000..9ad04ed7a66 --- /dev/null +++ b/examples/advecting_cone/advecting_cone.case @@ -0,0 +1,61 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 2, + "timestep": 0.001, + "numerics": { + "time_order": 2, + "polynomial_order": 5, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "mu": 1, + "rho": 1, + "freeze": true, + "initial_condition": { + "type": "user" + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-07, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 4, + "absolute_tolerance": 1e-07, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 100 + }, + "scalar": { + "enabled": true, + "lambda": 1e-16, + "cp": 1.0, + "initial_condition": { + "type": "user" + } + }, + "simulation_components": [ + { + "type": "probes", + "compute_control": "nsamples", + "compute_value": 1, + "points_file": "probes.csv", + "output_file": "output.csv", + "fields": [ + "s" + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/advecting_cone/advecting_cone.f90 b/examples/advecting_cone/advecting_cone.f90 new file mode 100644 index 00000000000..77ec4f8d66f --- /dev/null +++ b/examples/advecting_cone/advecting_cone.f90 @@ -0,0 +1,77 @@ +module user + use neko + implicit none + +contains + !> Register user defined functions (see user_intf.f90) + subroutine user_setup(u) + type(user_t), intent(inout) :: u + u%scalar_user_ic => set_s_ic + u%fluid_user_ic => set_velocity + end subroutine user_setup + + !> User initial condition for the scalar + subroutine set_s_ic(s, params) + type(field_t), intent(inout) :: s + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: cone_radius, mux, muy, x, y, r, theta + + ! Center of the cone + mux = 1 + muy = 0 + + cone_radius = 0.5 + + do i = 1, s%dof%size() + x = s%dof%x(i,1,1,1) - mux + y = s%dof%y(i,1,1,1) - muy + + r = sqrt(x**2 + y**2) + theta = atan2(y, x) + + ! Check if the point is inside the cone's base + if (r > cone_radius) then + s%x(i,1,1,1) = 0.0 + else + s%x(i,1,1,1) = 1.0 - r / cone_radius + endif + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + end subroutine set_s_ic + + !> Set the advecting velocity field. + subroutine set_velocity(u, v, w, p, params) + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: x, y + + do i = 1, u%dof%size() + x = u%dof%x(i,1,1,1) + y = u%dof%y(i,1,1,1) + + ! Angular velocity is pi, giving a full rotation in 2 sec + u%x(i,1,1,1) = -y*pi + v%x(i,1,1,1) = x*pi + w%x(i,1,1,1) = 0 + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + end subroutine set_velocity + +end module user diff --git a/examples/advecting_cone/box.nmsh b/examples/advecting_cone/box.nmsh new file mode 100644 index 00000000000..c9a36ca816f Binary files /dev/null and b/examples/advecting_cone/box.nmsh differ diff --git a/examples/advecting_cone/change_order.py b/examples/advecting_cone/change_order.py new file mode 100644 index 00000000000..c1015afaa7a --- /dev/null +++ b/examples/advecting_cone/change_order.py @@ -0,0 +1,28 @@ +import argparse +import json + +def modify_polynomial_order(json_file_path, new_polynomial_order): + # Open the JSON file for reading + with open(json_file_path, 'r') as file: + data = json.load(file) + + # Update the polynomial_order value under the specified path + path = ["case", "numerics", "polynomial_order"] + current_node = data + for key in path[:-1]: + current_node = current_node[key] + current_node[path[-1]] = new_polynomial_order + + # Save the modified data back to the JSON file + with open(json_file_path, 'w') as file: + json.dump(data, file, indent=2) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Modify polynomial order in the case file") + parser.add_argument("new_polynomial_order", type=int, help="New polynomial order value") + + args = parser.parse_args() + + new_polynomial_order = args.new_polynomial_order + + modify_polynomial_order("advecting_cone.case", new_polynomial_order) diff --git a/examples/advecting_cone/create_probes.py b/examples/advecting_cone/create_probes.py new file mode 100644 index 00000000000..42027cfecc7 --- /dev/null +++ b/examples/advecting_cone/create_probes.py @@ -0,0 +1,7 @@ +import numpy as np + +x = np.linspace(-0.5, 1.75, 200) + +p = np.column_stack((x, np.zeros(x.shape) + 1e-10, 0.05*np.ones(x.shape))) + +np.savetxt("probes.csv", p, delimiter=',') diff --git a/examples/advecting_cone/results.png b/examples/advecting_cone/results.png new file mode 100755 index 00000000000..5a0d4a19887 Binary files /dev/null and b/examples/advecting_cone/results.png differ diff --git a/examples/advecting_cone/run.sh b/examples/advecting_cone/run.sh new file mode 100755 index 00000000000..b1f5aa8d63e --- /dev/null +++ b/examples/advecting_cone/run.sh @@ -0,0 +1,28 @@ +# Compile the case +makeneko advecting_cone.f90 +# Generate probes +python create_probes.py + +# Remove results if exist +rm output.csv + +# Set order +python change_order.py 2 +# Generate mesh +genmeshbox -2 2 -2 2 0 0.1 30 30 1 .true .true. .true. +# Run the code +mpirun -n 8 ./neko advecting_cone.case +# Save result +mv output.csv output_2.csv + +# Order 3 +python change_order.py 3 +genmeshbox -2 2 -2 2 0 0.1 22 22 1 .true .true. .true. +mpirun -n 8 ./neko advecting_cone.case +mv output.csv output_3.csv + +# Order 5 +python change_order.py 5 +genmeshbox -2 2 -2 2 0 0.1 15 15 1 .true .true. .true. +mpirun -n 8 ./neko advecting_cone.case +mv output.csv output_5.csv diff --git a/examples/cyl_boundary_layer/cyl_bl.f90 b/examples/cyl_boundary_layer/cyl_bl.f90 index a89dc83dab1..d3291b10cbf 100644 --- a/examples/cyl_boundary_layer/cyl_bl.f90 +++ b/examples/cyl_boundary_layer/cyl_bl.f90 @@ -25,8 +25,7 @@ subroutine user_setup(u) type(user_t), intent(inout) :: u u%fluid_user_ic => user_ic u%fluid_user_if => user_inflow_eval - !u%user_check => user_do_stuff - u%user_mesh_setup => cylinder_deform + u%user_dirichlet_update => dirichlet_update end subroutine user_setup subroutine cylinder_deform(msh) @@ -149,4 +148,84 @@ subroutine user_ic(u, v, w, p, params) end do end subroutine user_ic + ! Initial example of using user specified dirichlet bcs + ! Note: This subroutine will be called two times, once in the fluid solver, and once + ! in the scalar solver (if enabled). + !! Parameters: + !! ----------- + !! field_bc_list: List of fields from which the BC conditions zill be extracted. + !! If which_solver = "fluid", contains (u,v,w,p). + !! If which_solver = "scalar", contains (s). + !! bc_bc_list: List of BCs containing field_dirichlet_t BC objects only. + !! If which_solver = "fluid", contains the bc objects + !! (d_vel_u, d_vel_v, d_vel_w, d_pres). + !! If which_solver = "scalar", contains the bc object (d_s). + !! coef: Coef object. + !! t: Current time. + !! tstep: Current time step. + !! which_solver: Indicates wether the fields provided come from "fluid" or "scalar". + subroutine dirichlet_update(field_bc_list, bc_bc_list, coef, t, tstep, which_solver) + type(field_list_t), intent(inout) :: field_bc_list + type(bc_list_t), intent(inout) :: bc_bc_list + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + character(len=*), intent(in) :: which_solver + + integer :: i + real(kind=rp) :: y,z + + ! Only do this at the first time step since our BCs are constants. + if (tstep .ne. 1) return + + ! Check that we are being called by `fluid` + if (trim(which_solver) .eq. "fluid") then + + associate(u => field_bc_list%fields(1)%f, & + v => field_bc_list%fields(2)%f, & + w => field_bc_list%fields(3)%f, & + p => field_bc_list%fields(4)%f) + + ! + ! Perform operations on u%x, v%x, w%x and p%x here + ! Note that we are checking if fields are allocated. If the + ! boundary type only contains e.g. "d_vel_u/d_pres", the fields + ! v%x and w%x will not be allocated. + ! + ! Here we are applying very simple uniform boundaries (u,v,w) = (1,0,0) + ! and nonsensical pressure outlet of p = -1 + ! + if (allocated(u%x)) u = 1.0_rp + if (allocated(v%x)) v = 0.0_rp + if (allocated(w%x)) w = 0.0_rp + if (allocated(p%x)) p = -1.0_rp + + end associate + + ! Check that we are being called by `scalar` + else if (trim(which_solver) .eq. "scalar") then + + associate( s => field_bc_list%fields(1)%f, & + s_bc => bc_bc_list%bc(1)%bcp) + + ! + ! Perform operations on the scalar field here + ! Note that we are checking if the field is allocated, in + ! case the boundary is empty. + ! + if (allocated(s%x)) then + + do i = 1, s_bc%msk(0) + y = s_bc%dof%y(s_bc%msk(i), 1, 1, 1) + z = s_bc%dof%z(s_bc%msk(i), 1, 1, 1) + s%x(s_bc%msk(i), 1, 1, 1) = sin(y)*sin(z) + end do + + end if + end associate + + end if + + end subroutine dirichlet_update + end module user diff --git a/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case b/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case new file mode 100644 index 00000000000..282e3a65793 --- /dev/null +++ b/examples/cyl_boundary_layer/cyl_bl_user_bc_test.case @@ -0,0 +1,72 @@ +{ +"version": 1.0, +"case": { + "mesh_file": "cyl.nmsh", + "output_at_end": false, + "output_boundary": true, + "output_checkpoints": false, + "end_time": 100, + "output_directory": "results_dirichlet_bcs", + "timestep": 8e-4, + "numerics": { + "time_order": 3, + "polynomial_order": 5, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 500, + "source_term": { + "type": "noforce" + }, + "initial_condition": { + "type": "uniform", + "value": [1.0, 0.0, 0.0] + }, + "inflow_condition": { + "type": "user", + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 3, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 10, + "absolute_tolerance": 1e-5, + "max_iterations": 800 + }, + "boundary_types": [ + "d_vel_u/d_vel_v/d_vel_w", + "d_vel_u/d_vel_v/d_vel_w/d_pres", + "sym", + "w", + "on", + "on", + "w" + ], + "output_control": "nsamples", + "output_value": 20 + }, + "scalar": { + "enabled": true, + "Pe": 0.71, + "boundary_types": [ + "d_s", + "d_s", + "", + "", + "", + "" + ], + "initial_condition": { + "type": "uniform", + "value": 0.0 + } + } +} +} diff --git a/examples/immersed_bunny/.gitignore b/examples/immersed_bunny/.gitignore new file mode 100644 index 00000000000..1df57c6ce3e --- /dev/null +++ b/examples/immersed_bunny/.gitignore @@ -0,0 +1,7 @@ +# Only include the files needed for the example +* +!.gitignore +!*.case +!readme.md +!run.sh +!bunny.stl diff --git a/examples/immersed_bunny/bunny.stl b/examples/immersed_bunny/bunny.stl new file mode 100644 index 00000000000..e879a37fba4 Binary files /dev/null and b/examples/immersed_bunny/bunny.stl differ diff --git a/examples/immersed_bunny/immersed_bunny.case b/examples/immersed_bunny/immersed_bunny.case new file mode 100644 index 00000000000..70b04ea172c --- /dev/null +++ b/examples/immersed_bunny/immersed_bunny.case @@ -0,0 +1,95 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_directory": "fields", + "output_at_end": true, + "output_boundary": true, + "output_checkpoints": false, + "end_time": 20.0, + "timestep": 1e-4, + "numerics": { + "time_order": 3, + "polynomial_order": 8, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 1000.0, + "initial_condition": { + "type": "uniform", + "value": [ + 1.0, + 0.0, + 0.0 + ] + }, + "inflow_condition": { + "type": "uniform", + "value": [ + 1.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 250, + "boundary_types": [ + "v", + "o", + "w", + "w", + "w", + "w" + ], + "source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "boundary_mesh", + "name": "bunny.stl", + "distance_transform": { + "type": "smooth_step", + "value": 0.05 + }, + "mesh_transform": { + "type": "bounding_box", + "box_min": [ + 0.75, + 0.25, + 0.00 + ], + "box_max": [ + 1.25, + 0.75, + 0.50 + ], + "keep_aspect_ratio": true + } + } + ], + "brinkman": { + "limits": [ + 0.0, + 1000.0 + ], + "penalty": 1.0 + }, + } + ] + } + } +} \ No newline at end of file diff --git a/examples/immersed_bunny/readme.md b/examples/immersed_bunny/readme.md new file mode 100644 index 00000000000..c48f5b1051d --- /dev/null +++ b/examples/immersed_bunny/readme.md @@ -0,0 +1,19 @@ +# Immersed boundary of a Stanford Bunny + +This example illustrates the brinkman condition source term used to simulate an +immersed boundary. A Stanford Bunny model is placed in side a square duct and +flow is simulated. + +``` +Bunny url: https://www.thingiverse.com/thing:151081 +Bunny domain: [ [-24, 85], [-41, 45], [5, 112] ] +Design domain: [ [-200, 600], [-100, 100], [0, 200] ] +``` + +The run script will construct a design domain as above with 16 x 8 x 8 elements. +The user can decide to specify another resolution from the terminal. + +The user can change how sharp the interface between fluid and solid is by +changing the `distance_transform.value`, the value specifies the width of the +transition region from no to full resistance. The size of the Brinkman +resistance limits should be adjusted if the Reynolds number is modified. diff --git a/examples/immersed_bunny/run.sh b/examples/immersed_bunny/run.sh new file mode 100755 index 00000000000..e857c2b2b07 --- /dev/null +++ b/examples/immersed_bunny/run.sh @@ -0,0 +1,57 @@ +#!/usr/bin/bash + +# ============================================================================ # +# Define the help function + +function help() { + echo -e "run.sh [Nx] [Ny] [Nz]" + echo -e " Generate a mesh and run the immersed_bunny case." + echo -e " The input arguments are the number of cells in the x, y, and z" + echo -e " directions, respectively." + echo -e "" + echo -e " If no input arguments are provided, the default mesh size is" + echo -e " 16x8x8." + echo -e "" + echo -e " See Readme for additional details." + exit 0 +} + +# Search for "-h" or "--help" in the input arguments +for arg in $@; do + if [[ $arg == "-h" ]] || [[ $arg == "--help" ]]; then + help + fi +done + +# ============================================================================ # +# Ensure Neko can be found and set default mesh size + +if [ "$NEKO_DIR" ]; then + export PATH=$NEKO_DIR:$PATH +fi + +if [[ -z $(which neko) ]]; then + echo -e "Neko not found." >&2 + echo -e "Please ensure Neko is installed and in your PATH." >&2 + echo -e "Alternatively, set the NEKO_DIR environment variable." >&2 + exit 1 +fi + +if [ $# == 0 ]; then + Nx=16 && Ny=8 && Nz=8 +elif [ $# == 3 ]; then + Nx=$1 && Ny=$2 && Nz=$3 +else + echo -e "Invalid number of input arguments." >&2 + help +fi + +# ============================================================================ # +# Generate mesh and run case + +echo "Generating mesh with dimensions: $Nx $Ny $Nz" +genmeshbox 0 4 0 1 0 1 $Nx $Ny $Nz .false. .false. .false. +neko immersed_bunny.case + +# End of file +# ============================================================================ # diff --git a/examples/immersed_zones/.gitignore b/examples/immersed_zones/.gitignore new file mode 100644 index 00000000000..11a072298f4 --- /dev/null +++ b/examples/immersed_zones/.gitignore @@ -0,0 +1,6 @@ +# Only include the files needed for the example +* +!.gitignore +!*.case +!readme.md +!run.sh diff --git a/examples/immersed_zones/block.case b/examples/immersed_zones/block.case new file mode 100644 index 00000000000..6539de1e7bf --- /dev/null +++ b/examples/immersed_zones/block.case @@ -0,0 +1,95 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_directory": "block_fields", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 0.01, + "timestep": 5e-5, + "numerics": { + "time_order": 3, + "polynomial_order": 8, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 2000.0, + "initial_condition": { + "type": "uniform", + "value": [ + 1.0, + 0.0, + 0.0 + ] + }, + "inflow_condition": { + "type": "uniform", + "value": [ + 1.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 250, + "boundary_types": [ + "v", + "o", + "w", + "w", + "w", + "w" + ], + "source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "point_zone", + "name": "block" + } + ], + "brinkman": { + "limits": [ + 0.0, + 1500.0 + ], + "penalty": 1.0 + } + } + ] + }, + "point_zones": [ + { + "name": "block", + "geometry": "box", + "x_bounds": [ + 0.90, + 1.10 + ], + "y_bounds": [ + 0.0, + 1.0 + ], + "z_bounds": [ + 0.0, + 0.5 + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/immersed_zones/block_sphere.case b/examples/immersed_zones/block_sphere.case new file mode 100644 index 00000000000..83da59c37dc --- /dev/null +++ b/examples/immersed_zones/block_sphere.case @@ -0,0 +1,109 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_directory": "block_sphere_fields", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 0.01, + "timestep": 5e-5, + "numerics": { + "time_order": 3, + "polynomial_order": 8, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 2000.0, + "initial_condition": { + "type": "uniform", + "value": [ + 1.0, + 0.0, + 0.0 + ] + }, + "inflow_condition": { + "type": "uniform", + "value": [ + 1.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 250, + "boundary_types": [ + "v", + "o", + "w", + "w", + "w", + "w" + ], + "source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "point_zone", + "name": "block" + }, + { + "type": "point_zone", + "name": "ball" + } + ], + "brinkman": { + "limits": [ + 0.0, + 1500.0 + ], + "penalty": 1.0 + } + }, + ] + }, + "point_zones": [ + { + "name": "ball", + "geometry": "sphere", + "center": [ + 1.0, + 0.5, + 0.5 + ], + "radius": 0.1 + }, + { + "name": "block", + "geometry": "box", + "x_bounds": [ + 0.90, + 1.10 + ], + "y_bounds": [ + 0.0, + 1.0 + ], + "z_bounds": [ + 0.0, + 0.5 + ] + } + ] + } +} \ No newline at end of file diff --git a/examples/immersed_zones/cylinder.case b/examples/immersed_zones/cylinder.case new file mode 100644 index 00000000000..b897c1f0030 --- /dev/null +++ b/examples/immersed_zones/cylinder.case @@ -0,0 +1,94 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_directory": "cylinder_fields", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 20.0, + "timestep": 5e-5, + "numerics": { + "time_order": 3, + "polynomial_order": 8, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 2000.0, + "initial_condition": { + "type": "uniform", + "value": [ + 1.0, + 0.0, + 0.0 + ] + }, + "inflow_condition": { + "type": "uniform", + "value": [ + 1.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 250, + "boundary_types": [ + "v", + "o", + "w", + "w", + "w", + "w" + ], + "source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "point_zone", + "name": "cylinder" + } + ], + "brinkman": { + "limits": [ + 0.0, + 1500.0 + ], + "penalty": 1.0 + } + } + ] + }, + "point_zones": [ + { + "name": "cylinder", + "geometry": "cylinder", + "start": [ + 1.0, + 0.0, + 0.5 + ], + "end": [ + 1.0, + 1.0, + 0.5 + ], + "radius": 0.1 + } + ] + } +} \ No newline at end of file diff --git a/examples/immersed_zones/readme.md b/examples/immersed_zones/readme.md new file mode 100644 index 00000000000..c3a36b631fc --- /dev/null +++ b/examples/immersed_zones/readme.md @@ -0,0 +1,29 @@ +# Immersed Zones + +This set of examples highlights the use of the Brinkman body force to model +immersed zones in a flow. The Brinkman body force is a penalization method +that is used to model the presence of solid objects in a flow. The method +applies a force to the fluid in the vicinity of the solid object to mimic the +effect of the solid object on the flow. The force is proportional to the fluid +velocity and is directed opposite to the fluid velocity. + +## Examples + +All the examples will be conducted in a 3D domain with a size of 4x1x1. The +domain is discretized with a uniform grid of 32x8x8 elements. The number of +elements can be modified by using the `-x`, `-y`, and `-z` command line +arguments. The domain have an inflow boundary condition at the surface at +x=0, and an outflow boundary condition at the surface at x=4. All other +boundaries are treated as walls. + +The following examples are included in this directory: + +- `block`: A simple example of a flow past a block placed as a half height wall + half way through the domain. +- `sphere`: A flow past a sphere with radius 0.1 placed in the middle of the + domain. +- `block_sphere`: A flow past a block and a sphere both with size and placement + as above. +- `cylinder`: A flow past a cylinder with radius 0.1 placed horizontally in the + middle of the domain. + \ No newline at end of file diff --git a/examples/immersed_zones/run.sh b/examples/immersed_zones/run.sh new file mode 100755 index 00000000000..e32bd6e2320 --- /dev/null +++ b/examples/immersed_zones/run.sh @@ -0,0 +1,90 @@ +#!/usr/bin/bash + +# ============================================================================ # +# Define the help function + +function help() { + echo -e "run.sh case" + echo -e " Generate a mesh and run all the desired case." + echo -e " The input arguments are the number of cells in the x, y, and z" + echo -e " directions, respectively." + echo -e "" + echo -e " If no input arguments are provided, the default mesh size is" + echo -e " 32x8x8." + echo -e "" + echo -e " Example usage:" + echo -e " run.sh -x32 -y8 -z8 case_name.case" + echo -e "" + echo -e " Options:" + echo -e " -h, --help Show this help message and exit." + echo -e " -x# Number of cells in the x direction." + echo -e " -y# Number of cells in the y direction." + echo -e " -z# Number of cells in the z direction." + echo -e " -a, --all Run all cases." + echo -e " -q, --quiet Suppress output." + echo -e "" + echo -e " See Readme for additional details." + exit 0 +} +if [ $# -eq 0 ]; then help; fi + +# Handle options +Nx=32 && Ny=8 && Nz=8 +for arg in "$@"; do + if [ "${arg:0:2}" == "--" ]; then + case ${arg:2} in + help) help ;; + all) ALL=1 ;; + quiet) QUIET=1 ;; + *) echo -e "Invalid option: $arg" >&2 && help ;; + esac + elif [ "${arg:0:1}" == "-" ]; then + case ${arg:1:1} in + h) help ;; + x) Nx=${arg:2} ;; + y) Ny=${arg:2} ;; + z) Nz=${arg:2} ;; + a) ALL=1 ;; + q) QUIET=1 ;; + *) echo -e "Invalid option: ${arg:1}" >&2 && help ;; + esac + else + cases="$cases $arg" + fi +done + +if [ "$ALL" ]; then + cases=$(find $(dirname $0) -name "*.case") +fi + +# ============================================================================ # +# Ensure Neko can be found and set default mesh size + +if [ "$NEKO_DIR" ]; then + PATH=$NEKO_DIR/bin:$PATH +fi + +if [[ -z $(which neko) ]]; then + echo -e "Neko not found." >&2 + echo -e "Please ensure Neko is installed and in your PATH." >&2 + echo -e "Alternatively, set the NEKO_DIR environment variable." >&2 + exit 1 +fi + +# ============================================================================ # +# Generate mesh and run case + +echo "Generating mesh with dimensions: $Nx $Ny $Nz" +genmeshbox 0 4 0 1 0 1 $Nx $Ny $Nz .false. .false. .false. + +for case in $cases; do + echo "Running case: $case" + if [ "$QUIET" ]; then + neko $case >/dev/null + else + neko $case >${case%.case}.log + fi +done + +# End of file +# ============================================================================ # diff --git a/examples/immersed_zones/sphere.case b/examples/immersed_zones/sphere.case new file mode 100644 index 00000000000..b6395db583f --- /dev/null +++ b/examples/immersed_zones/sphere.case @@ -0,0 +1,89 @@ +{ + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_directory": "sphere_fields", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 20.0, + "timestep": 5e-5, + "numerics": { + "time_order": 3, + "polynomial_order": 8, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "Re": 2000.0, + "initial_condition": { + "type": "uniform", + "value": [ + 1.0, + 0.0, + 0.0 + ] + }, + "inflow_condition": { + "type": "uniform", + "value": [ + 1.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-8, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 250, + "boundary_types": [ + "v", + "o", + "w", + "w", + "w", + "w" + ], + "source_terms": [ + { + "type": "brinkman", + "objects": [ + { + "type": "point_zone", + "name": "ball" + } + ], + "brinkman": { + "limits": [ + 0.0, + 1500.0 + ], + "penalty": 1.0 + } + } + ] + }, + "point_zones": [ + { + "name": "ball", + "geometry": "sphere", + "center": [ + 1.0, + 0.5, + 0.5 + ], + "radius": 0.1 + } + ] + } +} \ No newline at end of file diff --git a/examples/poisson/driver.f90 b/examples/poisson/driver.f90 index 128da2a63b3..400f21350f1 100644 --- a/examples/poisson/driver.f90 +++ b/examples/poisson/driver.f90 @@ -52,7 +52,7 @@ program poisson n = Xh%lx * Xh%ly * Xh%lz * msh%nelv - call dir_bc%init(dm) + call dir_bc%init(coef) call dir_bc%set_g(real(0.0d0,rp)) !user specified @@ -61,7 +61,7 @@ program poisson call dir_bc%finalize() call bc_list_init(bclst) call bc_list_add(bclst,dir_bc) - call solver%init(n, abs_tol = tol) + call solver%init(n, niter, abs_tol = tol) allocate(f(n)) diff --git a/examples/rayleigh-benard-cylinder/rayleigh.case b/examples/rayleigh-benard-cylinder/rayleigh.case index caafd9b2070..f60ea65040c 100644 --- a/examples/rayleigh-benard-cylinder/rayleigh.case +++ b/examples/rayleigh-benard-cylinder/rayleigh.case @@ -1,66 +1,78 @@ { -"version": 1.0, -"case": { - "mesh_file": "cylinder.nmsh", - "output_at_end": true, - "output_boundary": true, - "end_time": 250, - "timestep": 2e-3, - "output_checkpoints": true, - "checkpoint_control": "simulationtime", - "checkpoint_value": 50, - "job_timelimit": "12:00:00", - "numerics": { - "time_order": 3, - "polynomial_order": 7, - "dealias": true - }, - "fluid": { - "scheme": "pnpn", - "Ra": 1e8, - "source_terms": - [ - { - "type": "user_vector" - } - ], - "initial_condition": { - "type": "user" + "version": 1.0, + "case": { + "mesh_file": "cylinder.nmsh", + "output_at_end": true, + "output_boundary": true, + "end_time": 250, + "timestep": 2e-3, + "output_checkpoints": true, + "checkpoint_control": "simulationtime", + "checkpoint_value": 50, + "job_timelimit": "12:00:00", + "numerics": { + "time_order": 3, + "polynomial_order": 7, + "dealias": true }, - "velocity_solver": { - "type": "cg", - "preconditioner": "jacobi", - "projection_space_size": 0, - "absolute_tolerance": 1e-6, - "max_iterations": 800 + "fluid": { + "scheme": "pnpn", + "Ra": 1e8, + "source_terms": [ + { + "type": "boussinesq", + "g" : [0, 0, 1], + "reference_value": 0, + "beta": 1 + } + ], + "initial_condition": { + "type": "uniform", + "value": [ + 0.0, + 0.0, + 0.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-6, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 0, + "absolute_tolerance": 1e-4, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 10, + "boundary_types": [ + "w" + ] }, - "pressure_solver": { - "type": "gmres", - "preconditioner": "hsmg", - "projection_space_size": 0, - "absolute_tolerance": 1e-4, - "max_iterations": 800 + "scalar": { + "enabled": true, + "Pr": 1.0, + "initial_condition": { + "type": "user" + } }, - "output_control": "simulationtime", - "output_value": 1, - "boundary_types": [ - "w" + "simulation_components": [ + { + "type": "probes", + "compute_control": "simulationtime", + "compute_value": 1, + "points_file": "probes.csv", + "output_file": "output.csv", + "fields": [ + "w", + "s" + ] + } ] - }, - "scalar": { - "enabled": true, - "Pr": 1.0 - }, - "simulation_components": - [ - { - "type": "probes", - "compute_control": "simulationtime", - "compute_value" : 1, - "points_file": "probes.csv", - "output_file": "output.csv", - "fields": ["w","s"] - } - ] -} -} + } +} \ No newline at end of file diff --git a/examples/rayleigh-benard-cylinder/rayleigh.f90 b/examples/rayleigh-benard-cylinder/rayleigh.f90 index 19cd856aa86..1c85d608963 100644 --- a/examples/rayleigh-benard-cylinder/rayleigh.f90 +++ b/examples/rayleigh-benard-cylinder/rayleigh.f90 @@ -13,9 +13,8 @@ module user ! Register user defined functions (see user_intf.f90) subroutine user_setup(u) type(user_t), intent(inout) :: u - u%fluid_user_ic => set_initial_conditions_for_u_and_s + u%scalar_user_ic => set_initial_conditions_for_s u%scalar_user_bc => set_scalar_boundary_conditions - u%fluid_user_f_vector => set_bousinesq_forcing_term u%material_properties => set_material_properties end subroutine user_setup @@ -36,8 +35,8 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params) rho = 1.0_rp cp = 1.0_rp end subroutine set_material_properties - - + + subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) real(kind=rp), intent(inout) :: s real(kind=rp), intent(in) :: x @@ -55,33 +54,23 @@ subroutine set_scalar_boundary_conditions(s, x, y, z, nx, ny, nz, ix, iy, iz, ie !> Variables for bias real(kind=rp) :: arg, bias - + ! This will be used on all zones without labels ! e.g. the ones hardcoded to 'v', 'w', etcetc s = 1.0_rp - z end subroutine set_scalar_boundary_conditions - subroutine set_initial_conditions_for_u_and_s(u, v, w, p, params) - type(field_t), intent(inout) :: u - type(field_t), intent(inout) :: v - type(field_t), intent(inout) :: w - type(field_t), intent(inout) :: p + subroutine set_initial_conditions_for_s(s, params) + type(field_t), intent(inout) :: s type(json_file), intent(inout) :: params - type(field_t), pointer :: s integer :: i, j, k, e real(kind=rp) :: rand, r,z - s => neko_field_registry%get_field('s') - - !> Initialize with zero velocity - call rzero(u%x,u%dof%size()) - call rzero(v%x,v%dof%size()) - call rzero(w%x,w%dof%size()) !> Initialize with rand perturbations on temperature - call rzero(s%x,w%dof%size()) + call rzero(s%x,s%dof%size()) do i = 1, s%dof%size() - s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1) + s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1) end do ! perturb not on element boundaries ! Maybe not necessary, but lets be safe @@ -102,30 +91,9 @@ subroutine set_initial_conditions_for_u_and_s(u, v, w, p, params) end do end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE) + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if - end subroutine set_initial_conditions_for_u_and_s - - subroutine set_bousinesq_forcing_term(f, t) - class(fluid_user_source_term_t), intent(inout) :: f - real(kind=rp), intent(in) :: t - integer :: i - type(field_t), pointer :: u, v, w, s - real(kind=rp) :: rapr, ta2pr - u => neko_field_registry%get_field('u') - v => neko_field_registry%get_field('v') - w => neko_field_registry%get_field('w') - s => neko_field_registry%get_field('s') - - if (NEKO_BCKND_DEVICE .eq. 1) then - call device_rzero(f%u_d,f%dm%size()) - call device_rzero(f%v_d,f%dm%size()) - call device_copy(f%w_d,s%x_d,f%dm%size()) - else - call rzero(f%u,f%dm%size()) - call rzero(f%v,f%dm%size()) - call copy(f%w,s%x,f%dm%size()) - end if - end subroutine set_bousinesq_forcing_term + end subroutine set_initial_conditions_for_s end module user diff --git a/examples/rayleigh-benard/rayleigh.case b/examples/rayleigh-benard/rayleigh.case index fc93138b7ea..0681be28e8b 100644 --- a/examples/rayleigh-benard/rayleigh.case +++ b/examples/rayleigh-benard/rayleigh.case @@ -1,64 +1,71 @@ { -"version": 1.0, -"case": { - "mesh_file": "box.nmsh", - "output_at_end": true, - "output_boundary": true, - "end_time": 100, - "timestep": 0.01, - "numerics": { - "time_order": 3, - "polynomial_order": 7, - "dealias": true - }, - "fluid": { - "scheme": "pnpn", - "Ra": 1750, - "source_terms": - [ - { - "type": "user_vector" - } - ], - "initial_condition": { - "type": "user" - }, - "velocity_solver": { - "type": "cg", - "preconditioner": "jacobi", - "projection_space_size": 4, - "absolute_tolerance": 1e-6, - "max_iterations": 800 + "version": 1.0, + "case": { + "mesh_file": "box.nmsh", + "output_at_end": true, + "output_boundary": true, + "end_time": 100, + "timestep": 0.01, + "numerics": { + "time_order": 3, + "polynomial_order": 7, + "dealias": true }, - "pressure_solver": { - "type": "gmres", - "preconditioner": "hsmg", - "projection_space_size": 4, - "absolute_tolerance": 1e-6, - "max_iterations": 800 + "fluid": { + "scheme": "pnpn", + "Ra": 1750, + "source_terms": [ + { + "type": "user_vector" + } + ], + "initial_condition": { + "type": "uniform", + "value": [ + 0.0, + 0.0, + 0.0 + ] + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 4, + "absolute_tolerance": 1e-6, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 4, + "absolute_tolerance": 1e-6, + "max_iterations": 800 + }, + "output_control": "simulationtime", + "output_value": 0.1, + "boundary_types": [ + "", + "", + "", + "", + "w", + "w" + ] }, - "output_control": "simulationtime", - "output_value": 0.1, - "boundary_types": [ - "", - "", - "", - "", - "w", - "w" - ] - }, - "scalar": { - "enabled": true, - "Pr": 0.71, - "boundary_types": [ - "", - "", - "", - "", - "d=1", - "d=0" - ] + "scalar": { + "enabled": true, + "Pr": 0.71, + "boundary_types": [ + "", + "", + "", + "", + "d=1", + "d=0" + ], + "initial_condition": { + "type": "user" + } + } } -} -} +} \ No newline at end of file diff --git a/examples/rayleigh-benard/rayleigh.f90 b/examples/rayleigh-benard/rayleigh.f90 index f580f5099cf..9b43b10f225 100644 --- a/examples/rayleigh-benard/rayleigh.f90 +++ b/examples/rayleigh-benard/rayleigh.f90 @@ -10,7 +10,7 @@ module user ! Register user defined functions (see user_intf.f90) subroutine user_setup(u) type(user_t), intent(inout) :: u - u%fluid_user_ic => set_ic + u%scalar_user_ic => set_ic u%fluid_user_f_vector => forcing u%scalar_user_bc => scalar_bc u%material_properties => set_material_properties @@ -27,7 +27,7 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params) call json_get(params, "case.scalar.Pr", Pr) Re = 1.0_rp / Pr - + mu = 1.0_rp / Re lambda = mu / Pr rho = 1.0_rp @@ -48,27 +48,18 @@ subroutine scalar_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) integer, intent(in) :: ie real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - ! If we set scalar_bcs(*) = 'user' instead + ! If we set scalar_bcs(*) = 'user' instead ! this will be used instead on that zone s = 1.0_rp-z end subroutine scalar_bc - + !> User initial condition - subroutine set_ic(u, v, w, p, params) - type(field_t), intent(inout) :: u - type(field_t), intent(inout) :: v - type(field_t), intent(inout) :: w - type(field_t), intent(inout) :: p + subroutine set_ic(s, params) + type(field_t), intent(inout) :: s type(json_file), intent(inout) :: params - type(field_t), pointer :: s integer :: i, e, k, j real(kind=rp) :: rand, z - s => neko_field_registry%get_field('s') - call rzero(u%x,u%dof%size()) - call rzero(v%x,v%dof%size()) - call rzero(w%x,w%dof%size()) - do i = 1, s%dof%size() s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1) end do @@ -92,9 +83,10 @@ subroutine set_ic(u, v, w, p, params) end do end do - if ((NEKO_BCKND_CUDA .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) & + if ((NEKO_BCKND_DEVICE .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) & .or. (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE) + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if diff --git a/examples/scalar_mms/.gitignore b/examples/scalar_mms/.gitignore new file mode 100644 index 00000000000..00aa28d964f --- /dev/null +++ b/examples/scalar_mms/.gitignore @@ -0,0 +1,2 @@ +*.csv +*.nmsh diff --git a/examples/scalar_mms/README.md b/examples/scalar_mms/README.md new file mode 100644 index 00000000000..784b0e27f61 --- /dev/null +++ b/examples/scalar_mms/README.md @@ -0,0 +1,53 @@ +# Method of manufactured solutions for the scalar + + +## The method of manufactured solutions + +The MMS is a verification technique based on the following. Consider some +equation $L(s) = 0$. We come up with some desired analytical solution $\hat +s(x,t)$, and then analytically compute $\hat f = L(\hat s)$. If we now set +$\hat f$ as the source term: $L(s) = \hat f$, we expect the solution to follow +$\hat s(x, t)$. +Initial conditions are obtained as $\hat s(x, 0)$ and boundary conditions should +be selected to be compatible with the behaviour of $\hat s$. + +## The case + +Here we provide a very simple example, where $\hat s(x, t) = \sin(x)$ is used +for the scalar advection-diffusion equation. The domain is fully periodic with +10 elements across $x$ and 1 in the other directions. The idea is to make this +example more interesting later. An additional purpose is to provide a test case +for scalar source terms. + +The advecting velocity is set to 1 in $x$. The source term $\hat f$ is easily +obtained as $\rho c_p u \cdot \cos(x) - \lambda \sin(x)$. +Here, we set $\rho = c_p =u = 1$, and $\lambda = 0.01$. + + +## Case setup +To keep the advecting velocity constant, we enable `freeze` for the `fluid`. The +initial velocity distribution is set in the user file, and similarly for the +scalar. + +The source term is split into two components for testing purposes. The user +source term is used to set the source to $\hat f - 1$ and a constant source term +is additionally added, equal to $1$. Therefore, the sum is equal to $\hat f$. + +To analyze the results, we use the `probes` simcomp to extract the values of the +scalar along a line across $x$ in the center of the domain. A Python script to +create the probes is provided. + +The mesh is generated with `genmeshbox` with periodic conditions in all +directions, and a single element across $z$ and $y$, to mimic a 1D case. + +The script `run.sh` can be used to run the study. We suggest inspecting the +script and the case file and modifying them to your liking. + +A Python script to plot the results is also provided for convenience. + +## Expected results +The following curves will be obtained with the settings in the `run.sh` script. +One can see that the solution in the end is the same as the initial conditions, +which is expected based on our form of $\hat s$. + +Results diff --git a/examples/scalar_mms/create_probes.py b/examples/scalar_mms/create_probes.py new file mode 100644 index 00000000000..22881c6cde7 --- /dev/null +++ b/examples/scalar_mms/create_probes.py @@ -0,0 +1,7 @@ +import numpy as np + +x = np.linspace(1e-6, 2*np.pi - 1e-6, 200) + +p = np.column_stack((x, np.zeros(x.shape) + 1e-10, 0.05*np.ones(x.shape))) + +np.savetxt("probes.csv", p, delimiter=',') diff --git a/examples/scalar_mms/plot.py b/examples/scalar_mms/plot.py new file mode 100644 index 00000000000..a8e3bea214d --- /dev/null +++ b/examples/scalar_mms/plot.py @@ -0,0 +1,32 @@ +import matplotlib.pyplot as plt +import numpy as np +from os.path import join + +DATA_PATH = "." + +n_probes = 200 + + +# %% read data +data = np.genfromtxt(join(DATA_PATH, "output.csv"), skip_header=n_probes+1, delimiter=",") + + +x = np.linspace(0, np.pi * 2, n_probes) +ic = np.sin(x) +fig, (ax1, ax2) = plt.subplots(1, 2) + +ax1.plot(x, ic, '--k', label="ICs") +ax1.plot(x, data[200:, 1], label=r"Final solution") + +ax1.set_xlabel(r"$x$") +ax1.legend() +ax1.set_xlim(0, np.pi * 2) + + + +ax2.plot(x, ic - data[200:, 1], '--k', label="Absolute error") +ax2.set_xlabel(r"$x$") +ax2.set_ylabel(r"$absolute error$") + +plt.tight_layout() +plt.savefig(join(DATA_PATH, "results.png"), dpi=300) diff --git a/examples/scalar_mms/results.png b/examples/scalar_mms/results.png new file mode 100644 index 00000000000..fdefb219b4f Binary files /dev/null and b/examples/scalar_mms/results.png differ diff --git a/examples/scalar_mms/run.sh b/examples/scalar_mms/run.sh new file mode 100755 index 00000000000..985d5f2e385 --- /dev/null +++ b/examples/scalar_mms/run.sh @@ -0,0 +1,12 @@ +# Compile the case +makeneko scalar_mms.f90 +# Generate probes +python create_probes.py + +# Remove results if exist +rm output.csv + +# Generate mesh +genmeshbox 0 1 0 1 0 1 20 1 1 .false. .true. .true. +# Run the code +./neko scalar_mms.case diff --git a/examples/scalar_mms/scalar_mms.case b/examples/scalar_mms/scalar_mms.case new file mode 100755 index 00000000000..0a7da6072f9 --- /dev/null +++ b/examples/scalar_mms/scalar_mms.case @@ -0,0 +1,75 @@ +{ + "version": 1, + "case": { + "mesh_file": "box.nmsh", + "output_at_end": false, + "output_boundary": false, + "output_checkpoints": false, + "end_time": 0.1, + "timestep": 0.001, + "numerics": { + "time_order": 2, + "polynomial_order": 5, + "dealias": true + }, + "fluid": { + "scheme": "pnpn", + "mu": 1, + "rho": 1, + "freeze": true, + "initial_condition": { + "type": "user" + }, + "velocity_solver": { + "type": "cg", + "preconditioner": "jacobi", + "projection_space_size": 0, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "pressure_solver": { + "type": "gmres", + "preconditioner": "hsmg", + "projection_space_size": 4, + "absolute_tolerance": 1e-7, + "max_iterations": 800 + }, + "output_control": "nsamples", + "output_value": 1 + }, + "scalar": { + "enabled": true, + "lambda": 0.01, + "cp": 1, + "initial_condition": { + "type": "user" + }, + "source_terms": [ + { + "type": "user_vector" + }, + { + "type": "constant", + "values": [ + "1.0" + ] + } + ], + "boundary_types": [ + "d=0", "n=0.01" + ] + }, + "simulation_components": [ + { + "type": "probes", + "compute_control": "nsamples", + "compute_value": 1, + "points_file": "probes.csv", + "output_file": "output.csv", + "fields": [ + "s" + ] + } + ] + } +} diff --git a/examples/scalar_mms/scalar_mms.f90 b/examples/scalar_mms/scalar_mms.f90 new file mode 100644 index 00000000000..0e6eba35c4a --- /dev/null +++ b/examples/scalar_mms/scalar_mms.f90 @@ -0,0 +1,97 @@ +module user + use neko + implicit none + +contains + !> Register user defined functions (see user_intf.f90) + subroutine user_setup(u) + type(user_t), intent(inout) :: u + u%scalar_user_ic => set_s_ic + u%fluid_user_ic => set_velocity + u%scalar_user_f_vector => set_source + u%user_mesh_setup => user_mesh_scale + end subroutine user_setup + + ! Stretch bounds to 2pi + subroutine user_mesh_scale(msh) + type(mesh_t), intent(inout) :: msh + integer :: i, p, nvert + + nvert = size(msh%points) + do i = 1, nvert + msh%points(i)%x(1) = msh%points(i)%x(1)* pi *2 + msh%points(i)%x(2) = msh%points(i)%x(2)* pi *2 + end do + end subroutine user_mesh_scale + + !> Set source term + subroutine set_source(f, t) + class(scalar_user_source_term_t), intent(inout) :: f + real(kind=rp), intent(in) :: t + real(kind=rp) :: x, y + integer :: i + + do i = 1, f%dm%size() + x = f%dm%x(i,1,1,1) + y = f%dm%y(i,1,1,1) + + ! 0.01 is the viscosity + f%s(i,1,1,1) = cos(x) - 0.01 * sin(x) - 1.0_rp + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(f%s, f%s_d, f%dm%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine set_source + + !> User initial condition for the scalar + subroutine set_s_ic(s, params) + type(field_t), intent(inout) :: s + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: x, y + + do i = 1, s%dof%size() + x = s%dof%x(i,1,1,1) + y = s%dof%y(i,1,1,1) + + s%x(i,1,1,1) = sin(x) + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + end subroutine set_s_ic + + !> Set the advecting velocity field. + subroutine set_velocity(u, v, w, p, params) + type(field_t), intent(inout) :: u + type(field_t), intent(inout) :: v + type(field_t), intent(inout) :: w + type(field_t), intent(inout) :: p + type(json_file), intent(inout) :: params + integer :: i, e, k, j + real(kind=rp) :: x, y + + do i = 1, u%dof%size() + + ! Simple advection in x + u%x(i,1,1,1) = 1 + v%x(i,1,1,1) = 0 + w%x(i,1,1,1) = 0 + end do + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + end subroutine set_velocity + +end module user diff --git a/examples/turb_channel/README.md b/examples/turb_channel/README.md index 30fb9b5b115..4c00979885d 100644 --- a/examples/turb_channel/README.md +++ b/examples/turb_channel/README.md @@ -1,3 +1,9 @@ #Simple turbulent channel case at Re_tau=180 The channel starts with a slightly perturbed initial condition which becomes turbulent after around 10 time units. -If one wants to change the mesh and play around, please do so, by generating a new mesh with genmeshbox. +If one wants to change the mesh and play around, please do so, by generating a new mesh with genmeshbox. +To compute thte spatial and temporal averages of the output fields, mean_fields and stats one can use the following contrib scripts: +- average_fields_in_time, averages a fld series in time +- average_field_in_space, for channelflow we can directly average in xz +- postprocess_fluid_stats, computes the mean gradients and the reynolds stresses. + +To see the options for each postprocessing script, run it without any command line arguments, for example: ./average_field_in_space diff --git a/m4/ax_compiler_vendor.m4 b/m4/ax_compiler_vendor.m4 index 1dd168697b4..616ba69a0f0 100644 --- a/m4/ax_compiler_vendor.m4 +++ b/m4/ax_compiler_vendor.m4 @@ -58,7 +58,7 @@ # modified version of the Autoconf Macro, you may extend this special # exception to the GPL to apply to your modified version as well. -#serial 30 +#serial 33 AC_DEFUN([AX_COMPILER_VENDOR], [dnl AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [dnl @@ -79,6 +79,7 @@ AC_DEFUN([AX_COMPILER_VENDOR], [dnl sdcc: SDCC,__SDCC sx: _SX sx-aurora: __ve__ + nvhpc: __NVCOMPILER portland: __PGI gnu: __GNUC__ sun: __SUNPRO_C,__SUNPRO_CC,__SUNPRO_F90,__SUNPRO_F95 diff --git a/m4/ax_cray.m4 b/m4/ax_cray.m4 index 8fcc747904f..612d4e00cb6 100644 --- a/m4/ax_cray.m4 +++ b/m4/ax_cray.m4 @@ -195,8 +195,12 @@ AC_DEFUN([AX_CRAY_ROCM],[ hip_bcknd="1" else AC_MSG_RESULT([no]) - AC_MSG_ERROR([Cray ROCm Toolkit not found]) - have_hip="no" + if test "${ROCM_PATH}"; then + AX_HIP + else + AC_MSG_ERROR([Cray ROCm Toolkit not found]) + have_hip="no" + fi fi fi AC_SUBST(hip_bcknd) diff --git a/makeneko.in b/makeneko.in index 2d9b88067c5..a9af13d260a 100644 --- a/makeneko.in +++ b/makeneko.in @@ -22,7 +22,7 @@ cat >> usr_driver.f90 << _ACEOF program usrneko use neko use user - type(case_t) :: C + type(case_t), target :: C call user_setup(C%usr) call neko_init(C) @@ -33,7 +33,7 @@ program usrneko end program usrneko _ACEOF -$FC $FCFLAGS -I$includedir_pkg -L$libdir $1 usr_driver.f90\ +$FC $FCFLAGS -I$includedir_pkg -L$libdir $@ usr_driver.f90\ -lneko @LDFLAGS@ @LIBS@ -o neko rm -f usr_driver.f90 diff --git a/patches/nvhpc_bge.patch b/patches/nvhpc_bge.patch index 8fb242baf61..3deed99186b 100644 --- a/patches/nvhpc_bge.patch +++ b/patches/nvhpc_bge.patch @@ -3,9 +3,9 @@ index 0793c48a0..1419e9378 100644 --- a/src/common/signal.f90 +++ b/src/common/signal.f90 @@ -99,11 +99,11 @@ contains - + usr12 = sighdl_usr() - + - if (bge(usr12, usr)) then - raised = .true. - else @@ -16,6 +16,6 @@ index 0793c48a0..1419e9378 100644 + ! else + ! raised = .false. + ! end if - + end function signal_usr - + diff --git a/reframe/src/rayleigh.case.template b/reframe/src/rayleigh.case.template index d6c002118ba..d7d6ddd51a3 100644 --- a/reframe/src/rayleigh.case.template +++ b/reframe/src/rayleigh.case.template @@ -14,14 +14,15 @@ "fluid": { "scheme": "pnpn", "Ra": 1750, - "source_terms": + "source_terms": [ { "type": "user_vector" } ], "initial_condition": { - "type": "user" + "type": "uniform", + "value": [0,0,0] }, "velocity_solver": { "type": "cg", @@ -51,6 +52,9 @@ "scalar": { "enabled": true, "Pr": 0.71, + "initial_condition": { + "type": "user" + }, "boundary_types": [ "", "", diff --git a/reframe/src/rayleigh.f90 b/reframe/src/rayleigh.f90 index 7e846a5e69a..1b1bc7db137 100644 --- a/reframe/src/rayleigh.f90 +++ b/reframe/src/rayleigh.f90 @@ -12,8 +12,8 @@ module user ! Register user defined functions (see user_intf.f90) subroutine user_setup(u) type(user_t), intent(inout) :: u - u%fluid_user_ic => set_ic u%fluid_user_f_vector => forcing + u%scalar_user_ic => set_ic u%scalar_user_bc => scalar_bc u%material_properties => set_material_properties end subroutine user_setup @@ -29,7 +29,7 @@ subroutine set_material_properties(t, tstep, rho, mu, cp, lambda, params) call json_get(params, "case.scalar.Pr", Pr) Re = 1.0_rp / Pr - + mu = 1.0_rp / Re lambda = mu / Pr rho = 1.0_rp @@ -50,27 +50,18 @@ subroutine scalar_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) integer, intent(in) :: ie real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - ! If we set scalar_bcs(*) = 'user' instead + ! If we set scalar_bcs(*) = 'user' instead ! this will be used instead on that zone s = 1.0_rp-z end subroutine scalar_bc - - !> User initial condition - subroutine set_ic(u, v, w, p, params) - type(field_t), intent(inout) :: u - type(field_t), intent(inout) :: v - type(field_t), intent(inout) :: w - type(field_t), intent(inout) :: p + + !> User initial condition for the scalar field + subroutine set_ic(s, params) + type(field_t), intent(inout) :: s type(json_file), intent(inout) :: params - type(field_t), pointer :: s integer :: i, e, k, j real(kind=rp) :: rand, z - s => neko_field_registry%get_field('s') - call rzero(u%x,u%dof%size()) - call rzero(v%x,v%dof%size()) - call rzero(w%x,w%dof%size()) - do i = 1, s%dof%size() s%x(i,1,1,1) = 1-s%dof%z(i,1,1,1) end do @@ -96,15 +87,14 @@ subroutine set_ic(u, v, w, p, params) if ((NEKO_BCKND_CUDA .eq. 1) .or. (NEKO_BCKND_HIP .eq. 1) & .or. (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(s%x,s%x_d,s%dof%size(),HOST_TO_DEVICE) + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if - end subroutine set_ic - !> Forcing subroutine forcing(f, t) class(fluid_user_source_term_t), intent(inout) :: f diff --git a/src/.depends b/src/.depends index ce1e11eaecd..c96c2625ed2 100644 --- a/src/.depends +++ b/src/.depends @@ -15,18 +15,20 @@ common/json_utils.o : common/json_utils.f90 common/utils.o config/num_types.o common/system.o : common/system.f90 math/mxm_wrapper.o : math/mxm_wrapper.F90 common/utils.o config/num_types.o sem/speclib.o : sem/speclib.f90 common/utils.o config/num_types.o +qoi/drag_torque.o : qoi/drag_torque.f90 math/operators.o config/num_types.o sem/space.o math/math.o comm/comm.o mesh/facet_zone.o mesh/mesh.o sem/coef.o field/field.o sem/local_interpolation.o : sem/local_interpolation.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o field/field_list.o field/field.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o math/math.o : math/math.f90 comm/comm.o config/num_types.o math/mathops.o : math/mathops.f90 config/num_types.o math/fast3d.o : math/fast3d.f90 math/math.o sem/speclib.o config/num_types.o sem/space.o : sem/space.f90 math/mxm_wrapper.o math/tensor.o math/math.o math/fast3d.o common/utils.o device/device.o sem/speclib.o config/num_types.o config/neko_config.o +sem/map_1d.o : sem/map_1d.f90 comm/mpi_types.o math/math.o common/utils.o common/log.o comm/comm.o device/device.o mesh/mesh.o gs/gather_scatter.o sem/dofmap.o sem/space.o config/num_types.o sem/dofmap.o : sem/dofmap.f90 mesh/hex.o mesh/quad.o mesh/element.o math/math.o device/device.o math/tensor.o math/fast3d.o common/utils.o config/num_types.o adt/tuple.o sem/space.o mesh/mesh.o config/neko_config.o sem/coef.o : sem/coef.f90 device/device.o math/mxm_wrapper.o sem/bcknd/device/device_coef.o math/bcknd/device/device_math.o mesh/mesh.o math/math.o sem/space.o sem/dofmap.o config/num_types.o config/neko_config.o gs/gs_ops.o gs/gather_scatter.o sem/cpr.o : sem/cpr.f90 sem/dofmap.o common/log.o math/mxm_wrapper.o math/tensor.o sem/coef.o mesh/mesh.o math/math.o sem/space.o field/field.o config/num_types.o config/neko_config.o gs/gather_scatter.o -sem/spectral_error_indicator.o : sem/spectral_error_indicator.f90 common/utils.o comm/comm.o device/device.o config/neko_config.o gs/gather_scatter.o math/bcknd/device/device_math.o math/tensor.o io/file.o math/math.o field/field_list.o sem/coef.o field/field.o common/log.o config/num_types.o +sem/spectral_error_indicator.o : sem/spectral_error_indicator.f90 common/utils.o comm/comm.o device/device.o config/neko_config.o gs/gather_scatter.o math/bcknd/device/device_math.o math/tensor.o io/file.o math/math.o field/field_list.o sem/coef.o field/field.o config/num_types.o common/time_interpolator.o : common/time_interpolator.f90 common/utils.o math/math.o math/bcknd/device/device_math.o config/neko_config.o field/field.o config/num_types.o -sem/interpolation.o : sem/interpolation.f90 mesh/point.o sem/coef.o math/mxm_wrapper.o sem/space.o math/bcknd/cpu/tensor_cpu.o math/tensor.o field/field.o math/fast3d.o device/device.o config/num_types.o config/neko_config.o -sem/point_interpolator.o : sem/point_interpolator.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o field/field_list.o field/field.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o +sem/interpolation.o : sem/interpolation.f90 sem/space.o math/bcknd/cpu/tensor_cpu.o math/tensor.o math/fast3d.o device/device.o config/num_types.o config/neko_config.o +sem/point_interpolator.o : sem/point_interpolator.f90 config/neko_config.o math/bcknd/device/device_math.o device/device.o common/utils.o math/fast3d.o math/math.o mesh/point.o config/num_types.o sem/space.o math/tensor.o sem/bcknd/device/device_coef.o : sem/bcknd/device/device_coef.F90 common/utils.o config/num_types.o gs/gs_bcknd.o : gs/gs_bcknd.f90 config/num_types.o gs/bcknd/cpu/gs_cpu.o : gs/bcknd/cpu/gs_cpu.f90 gs/gs_ops.o gs/gs_bcknd.o config/num_types.o @@ -40,7 +42,7 @@ gs/gather_scatter.o : gs/gather_scatter.f90 device/device.o common/profiler.o co mesh/entity.o : mesh/entity.f90 mesh/point.o : mesh/point.f90 mesh/entity.o math/math.o config/num_types.o mesh/element.o : mesh/element.f90 mesh/point.o adt/tuple.o mesh/entity.o config/num_types.o -math/ax.o : math/ax.f90 mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o +math/ax.o : math/ax.f90 mesh/mesh.o sem/space.o sem/coef.o config/num_types.o mesh/quad.o : mesh/quad.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o mesh/hex.o : mesh/hex.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o mesh/tet.o : mesh/tet.f90 mesh/point.o adt/tuple.o mesh/element.o config/num_types.o @@ -53,14 +55,17 @@ mesh/facet_zone.o : mesh/facet_zone.f90 common/utils.o adt/stack.o adt/tuple.o mesh/point_zone.o : mesh/point_zone.f90 device/device.o config/neko_config.o sem/dofmap.o common/utils.o config/num_types.o adt/stack.o mesh/point_zones/sphere_point_zone.o : mesh/point_zones/sphere_point_zone.f90 math/math.o common/json_utils.o config/num_types.o mesh/point_zone.o mesh/point_zones/box_point_zone.o : mesh/point_zones/box_point_zone.f90 math/math.o common/json_utils.o config/num_types.o mesh/point_zone.o -mesh/point_zone_factory.o : mesh/point_zone_factory.f90 common/utils.o sem/dofmap.o common/json_utils.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o -mesh/point_zone_registry.o : mesh/point_zone_registry.f90 common/json_utils.o common/utils.o sem/dofmap.o mesh/point_zone_factory.o mesh/point_zone.o +mesh/point_zones/cylinder_point_zone.o : mesh/point_zones/cylinder_point_zone.f90 common/utils.o common/json_utils.o config/num_types.o mesh/point_zone.o +mesh/point_zone_fctry.o : mesh/point_zone_fctry.f90 common/utils.o sem/dofmap.o common/json_utils.o mesh/point_zones/cylinder_point_zone.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o +mesh/point_zone_registry.o : mesh/point_zone_registry.f90 common/json_utils.o common/utils.o sem/space.o mesh/mesh.o sem/dofmap.o mesh/point_zone_fctry.o mesh/point_zone.o mesh/mesh.o : mesh/mesh.f90 mesh/curve.o adt/uset.o math/math.o mesh/facet_zone.o comm/comm.o common/distdata.o common/datadist.o adt/htable.o adt/tuple.o adt/stack.o common/utils.o mesh/quad.o mesh/hex.o mesh/element.o mesh/point.o config/num_types.o mesh/octree.o : mesh/octree.f90 common/utils.o mesh/point.o config/num_types.o +mesh/search_tree/aabb.o : mesh/search_tree/aabb.f90 common/utils.o mesh/tet_mesh.o mesh/tri_mesh.o mesh/mesh.o mesh/hex.o mesh/tet.o mesh/quad.o mesh/tri.o mesh/point.o mesh/element.o config/num_types.o +mesh/aabb_tree.o : mesh/aabb_tree.f90 adt/stack.o common/utils.o config/num_types.o mesh/tri.o mesh/search_tree/aabb.o mesh/tet_mesh.o : mesh/tet_mesh.f90 common/utils.o mesh/point.o mesh/tet.o mesh/mesh.o mesh/tri_mesh.o : mesh/tri_mesh.f90 mesh/point.o mesh/tri.o field/field_registry.o : field/field_registry.f90 adt/htable.o common/utils.o sem/dofmap.o field/field.o -field/scratch_registry.o : field/scratch_registry.f90 common/utils.o sem/dofmap.o field/field.o config/num_types.o +field/scratch_registry.o : field/scratch_registry.f90 sem/dofmap.o field/field.o field/field.o : field/field.f90 sem/dofmap.o sem/space.o mesh/mesh.o math/math.o device/device.o config/num_types.o math/bcknd/device/device_math.o config/neko_config.o field/field_list.o : field/field_list.f90 field/field.o field/field_series.o : field/field_series.f90 field/field.o config/num_types.o @@ -77,9 +82,10 @@ math/bcknd/sx/sx_conv1.o : math/bcknd/sx/sx_conv1.f90 config/num_types.o math/bcknd/sx/sx_dudxyz.o : math/bcknd/sx/sx_dudxyz.f90 math/math.o config/num_types.o math/bcknd/sx/sx_opgrad.o : math/bcknd/sx/sx_opgrad.f90 config/num_types.o math/bcknd/sx/sx_cfl.o : math/bcknd/sx/sx_cfl.f90 config/num_types.o -math/operators.o : math/operators.f90 math/bcknd/device/device_math.o device/device.o comm/comm.o math/math.o field/field.o sem/coef.o sem/space.o math/bcknd/device/opr_device.o math/bcknd/xsmm/opr_xsmm.o math/bcknd/sx/opr_sx.o math/bcknd/cpu/opr_cpu.o config/num_types.o config/neko_config.o -math/bcknd/cpu/opr_cpu.o : math/bcknd/cpu/opr_cpu.f90 math/mathops.o gs/gather_scatter.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o config/num_types.o math/bcknd/cpu/conv1.o math/bcknd/cpu/cdtp.o math/bcknd/cpu/opgrad.o math/bcknd/cpu/dudxyz.o -math/bcknd/sx/opr_sx.o : math/bcknd/sx/opr_sx.f90 math/mathops.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o config/num_types.o gs/gather_scatter.o math/bcknd/sx/sx_cfl.o math/bcknd/sx/sx_cdtp.o math/bcknd/sx/sx_conv1.o math/bcknd/sx/sx_opgrad.o math/bcknd/sx/sx_dudxyz.o +math/bcknd/sx/sx_lambda2.o : math/bcknd/sx/sx_lambda2.f90 math/math.o config/num_types.o +math/operators.o : math/operators.f90 comm/comm.o math/bcknd/device/device_math.o device/device.o math/math.o field/field.o sem/coef.o sem/space.o math/bcknd/device/opr_device.o math/bcknd/xsmm/opr_xsmm.o math/bcknd/sx/opr_sx.o math/bcknd/cpu/opr_cpu.o config/num_types.o config/neko_config.o +math/bcknd/cpu/opr_cpu.o : math/bcknd/cpu/opr_cpu.f90 math/mathops.o gs/gather_scatter.o field/field.o math/math.o sem/coef.o sem/space.o config/num_types.o math/bcknd/cpu/conv1.o math/bcknd/cpu/cdtp.o math/bcknd/cpu/opgrad.o math/bcknd/cpu/dudxyz.o +math/bcknd/sx/opr_sx.o : math/bcknd/sx/opr_sx.f90 math/mathops.o field/field.o math/math.o sem/coef.o sem/space.o config/num_types.o gs/gather_scatter.o math/bcknd/sx/sx_lambda2.o math/bcknd/sx/sx_cfl.o math/bcknd/sx/sx_cdtp.o math/bcknd/sx/sx_conv1.o math/bcknd/sx/sx_opgrad.o math/bcknd/sx/sx_dudxyz.o math/bcknd/xsmm/opr_xsmm.o : math/bcknd/xsmm/opr_xsmm.F90 math/mathops.o gs/gather_scatter.o field/field.o mesh/mesh.o math/math.o sem/coef.o sem/space.o math/mxm_wrapper.o config/num_types.o math/bcknd/device/opr_device.o : math/bcknd/device/opr_device.F90 comm/comm.o common/utils.o field/field.o mesh/mesh.o sem/coef.o sem/space.o device/device.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o config/num_types.o gs/gather_scatter.o math/tensor.o : math/tensor.f90 device/device.o config/neko_config.o math/mxm_wrapper.o config/num_types.o math/bcknd/device/tensor_device.o math/bcknd/sx/tensor_sx.o math/bcknd/cpu/tensor_cpu.o math/bcknd/xsmm/tensor_xsmm.o @@ -90,49 +96,54 @@ math/fdm.o : math/fdm.f90 comm/comm.o common/utils.o device/device.o math/bcknd/ math/bcknd/cpu/fdm_cpu.o : math/bcknd/cpu/fdm_cpu.f90 math/bcknd/cpu/tensor_cpu.o config/num_types.o math/bcknd/sx/fdm_sx.o : math/bcknd/sx/fdm_sx.f90 math/bcknd/sx/tensor_sx.o config/num_types.o math/bcknd/xsmm/fdm_xsmm.o : math/bcknd/xsmm/fdm_xsmm.f90 math/bcknd/xsmm/tensor_xsmm.o config/num_types.o -math/schwarz.o : math/schwarz.f90 config/neko_config.o device/device.o math/fdm.o math/bcknd/device/device_math.o math/bcknd/device/device_schwarz.o gs/gather_scatter.o bc/dirichlet.o bc/bc.o sem/dofmap.o sem/space.o mesh/mesh.o math/math.o config/num_types.o +math/schwarz.o : math/schwarz.f90 config/neko_config.o device/device.o math/fdm.o math/bcknd/device/device_math.o math/bcknd/device/device_schwarz.o gs/gather_scatter.o bc/bc.o sem/dofmap.o sem/space.o mesh/mesh.o math/math.o config/num_types.o math/vector.o : math/vector.f90 common/utils.o math/bcknd/device/device_math.o device/device.o config/num_types.o config/neko_config.o math/matrix.o : math/matrix.f90 common/utils.o math/bcknd/device/device_math.o device/device.o config/num_types.o config/neko_config.o +math/signed_distance.o : math/signed_distance.f90 adt/stack.o mesh/search_tree/aabb.o mesh/point.o common/utils.o mesh/aabb_tree.o mesh/tri_mesh.o mesh/tri.o field/field.o config/num_types.o common/checkpoint.o : common/checkpoint.f90 mesh/mesh.o common/utils.o field/field.o device/device.o sem/space.o field/field_series.o config/num_types.o config/neko_config.o -io/generic_file.o : io/generic_file.f90 config/num_types.o +io/generic_file.o : io/generic_file.f90 comm/comm.o common/utils.o config/num_types.o io/map_file.o : io/map_file.f90 io/format/map.o comm/comm.o common/utils.o io/generic_file.o io/re2_file.o : io/re2_file.f90 common/log.o adt/htable.o io/map_file.o io/format/map.o io/format/re2.o common/datadist.o comm/mpi_types.o comm/comm.o mesh/point.o mesh/mesh.o common/utils.o config/num_types.o io/generic_file.o io/rea_file.o : io/rea_file.f90 common/log.o adt/htable.o common/datadist.o comm/comm.o io/map_file.o io/re2_file.o io/format/rea.o io/format/map.o mesh/point.o mesh/mesh.o common/utils.o config/num_types.o io/generic_file.o -io/fld_file.o : io/fld_file.f90 comm/mpi_types.o common/datadist.o comm/comm.o common/utils.o mesh/mesh.o fluid/mean_sqr_flow.o fluid/mean_flow.o io/fld_file_data.o math/vector.o common/structs.o sem/space.o sem/dofmap.o field/field_list.o field/field.o io/generic_file.o +io/fld_file.o : io/fld_file.f90 comm/mpi_types.o math/math.o common/datadist.o comm/comm.o common/utils.o mesh/mesh.o fluid/mean_sqr_flow.o fluid/mean_flow.o io/fld_file_data.o math/vector.o common/structs.o sem/space.o sem/dofmap.o field/field_list.o field/field.o io/generic_file.o io/fld_file_data.o : io/fld_file_data.f90 math/vector.o math/math.o config/num_types.o io/vtk_file.o : io/vtk_file.f90 comm/comm.o common/log.o mesh/tri_mesh.o mesh/tet_mesh.o field/mesh_field.o sem/dofmap.o field/field.o mesh/mesh.o common/utils.o io/generic_file.o config/num_types.o -io/stl_file.o : io/stl_file.f90 io/format/stl.o comm/comm.o comm/mpi_types.o mesh/point.o common/log.o mesh/tri_mesh.o io/generic_file.o config/num_types.o +io/stl_file.o : io/stl_file.f90 io/format/stl.o comm/comm.o common/utils.o comm/mpi_types.o mesh/point.o common/log.o mesh/tri_mesh.o io/generic_file.o config/num_types.o io/nmsh_file.o : io/nmsh_file.f90 common/log.o comm/mpi_types.o common/datadist.o mesh/element.o io/format/nmsh.o adt/tuple.o mesh/point.o common/utils.o mesh/mesh.o comm/comm.o io/generic_file.o -io/chkp_file.o : io/chkp_file.f90 common/global_interpolation.o comm/comm.o comm/mpi_types.o sem/interpolation.o math/math.o mesh/mesh.o sem/space.o common/utils.o sem/dofmap.o field/field.o config/num_types.o common/checkpoint.o field/field_series.o io/generic_file.o +io/chkp_file.o : io/chkp_file.f90 common/global_interpolation.o comm/comm.o comm/mpi_types.o sem/interpolation.o math/math.o mesh/mesh.o sem/space.o common/utils.o math/bcknd/device/device_math.o sem/dofmap.o field/field.o config/num_types.o common/checkpoint.o field/field_series.o io/generic_file.o io/csv_file.o : io/csv_file.f90 comm/comm.o common/log.o config/num_types.o common/utils.o io/generic_file.o math/matrix.o math/vector.o -io/file.o : io/file.f90 io/csv_file.o io/stl_file.o io/vtk_file.o io/fld_file_data.o io/fld_file.o io/re2_file.o io/rea_file.o io/map_file.o io/chkp_file.o io/nmsh_file.o io/generic_file.o common/utils.o +io/file.o : io/file.f90 io/csv_file.o io/stl_file.o io/vtk_file.o io/fld_file_data.o io/fld_file.o io/re2_file.o io/rea_file.o io/map_file.o io/chkp_file.o io/nmsh_file.o io/generic_file.o config/num_types.o common/utils.o io/output.o : io/output.f90 io/file.o config/num_types.o -io/fluid_output.o : io/fluid_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o scalar/scalar_scheme.o fluid/fluid_scheme.o +io/fluid_output.o : io/fluid_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o scalar/scalar_scheme.o fluid/fluid_scheme.o config/num_types.o +io/fld_file_output.o : io/fld_file_output.f90 io/output.o device/device.o config/neko_config.o field/field_list.o config/num_types.o io/chkp_output.o : io/chkp_output.f90 config/num_types.o io/output.o common/checkpoint.o io/mean_flow_output.o : io/mean_flow_output.f90 io/output.o device/device.o config/num_types.o fluid/mean_flow.o io/fluid_stats_output.o : io/fluid_stats_output.f90 io/output.o device/device.o config/num_types.o config/neko_config.o fluid/fluid_stats.o io/mean_sqr_flow_output.o : io/mean_sqr_flow_output.f90 io/output.o config/num_types.o fluid/mean_sqr_flow.o io/data_streamer.o : io/data_streamer.F90 config/neko_config.o comm/mpi_types.o comm/comm.o device/device.o common/utils.o sem/coef.o field/field.o config/num_types.o -common/sampler.o : common/sampler.f90 common/time_based_controller.o config/num_types.o common/profiler.o common/utils.o common/log.o comm/comm.o io/output.o +common/sampler.o : common/sampler.f90 common/time_based_controller.o config/num_types.o common/profiler.o common/utils.o common/log.o comm/comm.o io/fld_file.o io/output.o common/global_interpolation.o : common/global_interpolation.F90 comm/mpi_types.o math/math.o comm/comm.o sem/local_interpolation.o common/utils.o common/log.o mesh/mesh.o sem/dofmap.o sem/space.o config/num_types.o common/profiler.o : common/profiler.F90 common/craypat.o device/hip/roctx.o device/cuda/nvtx.o device/device.o config/neko_config.o common/craypat.o : common/craypat.F90 common/utils.o adt/stack.o -bc/bc.o : bc/bc.f90 common/utils.o adt/tuple.o adt/stack.o mesh/facet_zone.o mesh/mesh.o sem/space.o sem/dofmap.o device/device.o config/num_types.o config/neko_config.o +bc/bc.o : bc/bc.f90 common/utils.o adt/tuple.o adt/stack.o mesh/facet_zone.o mesh/mesh.o sem/space.o sem/coef.o sem/dofmap.o device/device.o config/num_types.o config/neko_config.o bc/dirichlet.o : bc/dirichlet.f90 bc/bc.o config/num_types.o bc/bcknd/device/device_dirichlet.o -bc/dong_outflow.o : bc/dong_outflow.f90 bc/bcknd/device/device_dong_outflow.o common/utils.o sem/coef.o sem/dofmap.o field/field.o bc/bc.o config/num_types.o device/device.o bc/dirichlet.o config/neko_config.o +bc/neumann.o : bc/neumann.f90 sem/coef.o common/utils.o bc/bc.o config/num_types.o +bc/dong_outflow.o : bc/dong_outflow.f90 field/field_registry.o bc/bcknd/device/device_dong_outflow.o common/utils.o sem/coef.o sem/dofmap.o field/field.o bc/bc.o config/num_types.o device/device.o bc/dirichlet.o config/neko_config.o bc/wall.o : bc/wall.f90 bc/dirichlet.o config/num_types.o bc/bcknd/device/device_wall.o bc/inflow.o : bc/inflow.f90 bc/dirichlet.o config/num_types.o bc/bcknd/device/device_inflow.o +bc/field_dirichlet.o : bc/field_dirichlet.f90 sem/dofmap.o math/bcknd/device/device_math.o math/math.o field/field_list.o field/field.o common/utils.o device/device.o bc/bc.o bc/dirichlet.o sem/coef.o config/num_types.o +bc/field_dirichlet_vector.o : bc/field_dirichlet_vector.f90 bc/field_dirichlet.o sem/dofmap.o math/bcknd/device/device_math.o math/math.o field/field_list.o field/field.o common/utils.o device/device.o bc/bc.o bc/dirichlet.o sem/coef.o config/num_types.o bc/usr_inflow.o : bc/usr_inflow.f90 common/utils.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/inflow.o sem/coef.o config/num_types.o bc/usr_scalar.o : bc/usr_scalar.f90 common/utils.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/dirichlet.o sem/coef.o config/num_types.o bc/facet_normal.o : bc/facet_normal.f90 common/utils.o bc/dirichlet.o sem/coef.o math/math.o config/num_types.o bc/bcknd/device/device_facet_normal.o -bc/symmetry.o : bc/symmetry.f90 adt/tuple.o adt/stack.o common/utils.o math/math.o sem/coef.o bc/bc.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/bcknd/device/device_symmetry.o +bc/symmetry.o : bc/symmetry.f90 sem/coef.o adt/tuple.o adt/stack.o common/utils.o math/math.o bc/bc.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/bcknd/device/device_symmetry.o bc/non_normal.o : bc/non_normal.f90 adt/stack.o common/utils.o math/math.o sem/coef.o device/device.o adt/tuple.o bc/dirichlet.o config/num_types.o config/neko_config.o bc/symmetry.o bc/blasius.o : bc/blasius.f90 fluid/flow_profile.o bc/bcknd/device/device_inhom_dirichlet.o device/device.o bc/inflow.o common/utils.o sem/coef.o config/num_types.o krylov/precon.o : krylov/precon.f90 config/num_types.o krylov/krylov.o : krylov/krylov.f90 config/neko_config.o krylov/bcknd/device/pc_identity_device.o krylov/pc_identity.o bc/bc.o common/utils.o field/field.o mesh/mesh.o sem/coef.o krylov/precon.o config/num_types.o math/ax.o gs/gather_scatter.o krylov/pc_identity.o : krylov/pc_identity.f90 config/num_types.o krylov/precon.o math/math.o krylov/precon_fctry.o : krylov/precon_fctry.f90 config/neko_config.o common/utils.o krylov/pc_hsmg.o krylov/bcknd/device/pc_jacobi_device.o krylov/bcknd/sx/pc_jacobi_sx.o krylov/bcknd/cpu/pc_jacobi.o krylov/bcknd/device/pc_identity_device.o krylov/pc_identity.o krylov/precon.o -krylov/krylov_fctry.o : krylov/krylov_fctry.f90 config/neko_config.o common/utils.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/device/gmres_device.o krylov/bcknd/sx/gmres_sx.o krylov/bcknd/cpu/gmres.o krylov/bcknd/cpu/bicgstab.o krylov/bcknd/device/pipecg_device.o krylov/bcknd/sx/pipecg_sx.o krylov/bcknd/cpu/pipecg.o krylov/bcknd/cpu/cacg.o krylov/bcknd/device/cg_device.o krylov/bcknd/sx/cg_sx.o krylov/bcknd/cpu/cg.o +krylov/krylov_fctry.o : krylov/krylov_fctry.f90 config/neko_config.o common/utils.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/device/gmres_device.o krylov/bcknd/sx/gmres_sx.o krylov/bcknd/cpu/gmres.o krylov/bcknd/cpu/bicgstab.o krylov/bcknd/device/fusedcg_device.o krylov/bcknd/device/pipecg_device.o krylov/bcknd/sx/pipecg_sx.o krylov/bcknd/cpu/pipecg.o krylov/bcknd/cpu/cacg.o krylov/bcknd/device/cg_device.o krylov/bcknd/sx/cg_sx.o krylov/bcknd/cpu/cg.o krylov/bcknd/cpu/cg.o : krylov/bcknd/cpu/cg.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/cpu/cacg.o : krylov/bcknd/cpu/cacg.f90 math/mxm_wrapper.o comm/comm.o common/utils.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/cpu/pipecg.o : krylov/bcknd/cpu/pipecg.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o @@ -143,12 +154,13 @@ krylov/bcknd/sx/cg_sx.o : krylov/bcknd/sx/cg_sx.f90 math/math.o bc/bc.o gs/gathe krylov/bcknd/sx/pipecg_sx.o : krylov/bcknd/sx/pipecg_sx.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o krylov/bcknd/sx/gmres_sx.o : krylov/bcknd/sx/gmres_sx.f90 comm/comm.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o krylov/bcknd/sx/pc_jacobi_sx.o : krylov/bcknd/sx/pc_jacobi_sx.f90 gs/gather_scatter.o config/num_types.o sem/dofmap.o sem/coef.o krylov/precon.o math/math.o -krylov/bcknd/device/cg_device.o : krylov/bcknd/device/cg_device.f90 math/bcknd/device/device_math.o device/device.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o +krylov/bcknd/device/cg_device.o : krylov/bcknd/device/cg_device.f90 math/bcknd/device/device_math.o device/device.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o math/ax.o krylov/precon.o krylov/krylov.o config/num_types.o krylov/bcknd/device/pipecg_device.o : krylov/bcknd/device/pipecg_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o +krylov/bcknd/device/fusedcg_device.o : krylov/bcknd/device/fusedcg_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o krylov/bcknd/device/gmres_device.o : krylov/bcknd/device/gmres_device.F90 comm/comm.o device/device.o math/bcknd/device/device_math.o math/math.o krylov/bcknd/device/pc_identity_device.o bc/bc.o gs/gather_scatter.o sem/coef.o field/field.o config/num_types.o math/ax.o krylov/precon.o krylov/krylov.o krylov/bcknd/device/pc_jacobi_device.o : krylov/bcknd/device/pc_jacobi_device.F90 gs/gather_scatter.o device/device.o math/bcknd/device/device_math.o config/num_types.o sem/dofmap.o sem/coef.o krylov/precon.o krylov/bcknd/device/pc_identity_device.o : krylov/bcknd/device/pc_identity_device.f90 config/num_types.o krylov/precon.o math/bcknd/device/device_math.o device/device.o -time_schemes/time_scheme.o : time_schemes/time_scheme.f90 common/utils.o config/num_types.o config/neko_config.o +time_schemes/time_scheme.o : time_schemes/time_scheme.f90 config/num_types.o config/neko_config.o time_schemes/bdf_time_scheme.o : time_schemes/bdf_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o time_schemes/ext_time_scheme.o : time_schemes/ext_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o time_schemes/ab_time_scheme.o : time_schemes/ab_time_scheme.f90 common/utils.o math/math.o time_schemes/time_scheme.o config/num_types.o config/neko_config.o @@ -158,20 +170,21 @@ common/stats_quant.o : common/stats_quant.f90 config/num_types.o common/statistics.o : common/statistics.f90 comm/comm.o common/log.o common/stats_quant.o config/num_types.o common/rhs_maker.o : common/rhs_maker.f90 field/field.o field/field_series.o config/num_types.o common/rhs_maker_fctry.o : common/rhs_maker_fctry.f90 config/neko_config.o common/bcknd/device/rhs_maker_device.o common/bcknd/sx/rhs_maker_sx.o common/bcknd/cpu/rhs_maker_cpu.o common/rhs_maker.o -simulation_components/probes.o : simulation_components/probes.F90 case.o io/csv_file.o io/file.o device/device.o comm/comm.o math/tensor.o math/math.o common/global_interpolation.o common/json_utils.o sem/dofmap.o field/field_registry.o simulation_components/simulation_component.o field/field_list.o field/field.o common/utils.o common/log.o math/matrix.o config/num_types.o +simulation_components/probes.o : simulation_components/probes.F90 case.o io/csv_file.o io/file.o device/device.o comm/comm.o math/tensor.o common/global_interpolation.o common/json_utils.o sem/dofmap.o field/field_registry.o simulation_components/simulation_component.o field/field_list.o common/utils.o common/log.o math/matrix.o config/num_types.o +simulation_components/field_writer.o : simulation_components/field_writer.f90 common/json_utils.o io/fld_file_output.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o common/bcknd/cpu/rhs_maker_cpu.o : common/bcknd/cpu/rhs_maker_cpu.f90 field/scratch_registry.o config/num_types.o field/field.o field/field_series.o common/rhs_maker.o common/bcknd/sx/rhs_maker_sx.o : common/bcknd/sx/rhs_maker_sx.f90 field/scratch_registry.o config/num_types.o field/field.o field/field_series.o common/rhs_maker.o common/bcknd/device/rhs_maker_device.o : common/bcknd/device/rhs_maker_device.F90 config/num_types.o field/field.o field/field_series.o common/utils.o device/device.o common/rhs_maker.o common/material_properties.o : common/material_properties.f90 comm/comm.o common/utils.o common/user_intf.o common/log.o common/json_utils.o config/num_types.o config/neko_config.o : config/neko_config.f90 -case.o : case.f90 common/material_properties.o mesh/point_zone_registry.o field/scratch_registry.o common/json_utils.o scalar/scalar_pnpn.o common/user_intf.o common/jobctrl.o common/log.o time_schemes/time_scheme_controller.o comm/comm.o mesh/mesh.o common/utils.o io/file.o common/statistics.o fluid/flow_ic.o common/sampler.o comm/redist.o comm/parmetis.o field/mesh_field.o io/fluid_stats_output.o io/mean_flow_output.o io/mean_sqr_flow_output.o io/chkp_output.o io/fluid_output.o fluid/fluid_fctry.o config/num_types.o -common/user_intf.o : common/user_intf.f90 config/num_types.o bc/usr_scalar.o bc/usr_inflow.o mesh/mesh.o sem/coef.o scalar/source_scalar.o fluid/fluid_user_source_term.o field/field.o -fluid/fluid_scheme.o : fluid/fluid_scheme.f90 common/material_properties.o common/utils.o common/user_intf.o source_terms/const_source_term.o source_terms/source_term_factory.o source_terms/source_term.o field/scratch_registry.o common/json_utils.o field/field_registry.o common/log.o math/operators.o math/mathops.o time_schemes/time_scheme_controller.o math/math.o mesh/mesh.o bc/bc.o fluid/fluid_stats.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/non_normal.o bc/symmetry.o bc/dong_outflow.o bc/dirichlet.o bc/blasius.o bc/usr_inflow.o bc/inflow.o bc/wall.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o field/field_list.o fluid/fluid_source_term.o fluid/fluid_user_source_term.o config/num_types.o fluid/mean_flow.o common/checkpoint.o config/neko_config.o fluid/mean_sqr_flow.o gs/gather_scatter.o +case.o : case.f90 common/material_properties.o mesh/point_zone_registry.o field/scratch_registry.o common/json_utils.o scalar/scalar_pnpn.o common/user_intf.o common/jobctrl.o common/log.o time_schemes/time_scheme_controller.o comm/comm.o mesh/mesh.o common/utils.o io/file.o common/statistics.o scalar/scalar_ic.o fluid/flow_ic.o common/sampler.o comm/redist.o comm/parmetis.o field/mesh_field.o io/fluid_stats_output.o io/mean_flow_output.o io/mean_sqr_flow_output.o io/chkp_output.o io/fluid_output.o fluid/fluid_fctry.o config/num_types.o +common/user_intf.o : common/user_intf.f90 common/utils.o config/num_types.o bc/field_dirichlet.o bc/usr_scalar.o bc/usr_inflow.o mesh/mesh.o bc/bc.o sem/coef.o scalar/scalar_user_source_term.o fluid/fluid_user_source_term.o field/field_list.o field/field.o +fluid/fluid_scheme.o : fluid/fluid_scheme.f90 common/time_step_controller.o field/field_series.o common/material_properties.o common/utils.o common/user_intf.o field/scratch_registry.o common/json_utils.o field/field_registry.o common/log.o math/operators.o math/mathops.o time_schemes/time_scheme_controller.o math/math.o mesh/mesh.o bc/bc.o fluid/fluid_stats.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/field_dirichlet_vector.o bc/field_dirichlet.o bc/non_normal.o bc/symmetry.o bc/dong_outflow.o bc/dirichlet.o bc/blasius.o bc/usr_inflow.o bc/inflow.o bc/wall.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o field/field_list.o fluid/fluid_source_term.o fluid/fluid_user_source_term.o comm/comm.o config/num_types.o fluid/mean_flow.o common/checkpoint.o config/neko_config.o fluid/mean_sqr_flow.o gs/gather_scatter.o fluid/fluid_aux.o : fluid/fluid_aux.f90 krylov/krylov.o config/num_types.o common/log.o -fluid/fluid_pnpn.o : fluid/fluid_pnpn.f90 common/material_properties.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o common/projection.o time_schemes/time_scheme_controller.o fluid/fluid_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o bc/facet_normal.o field/field_series.o fluid/fluid_scheme.o fluid/fluid_volflow.o common/rhs_maker_fctry.o math/ax_helm_fctry.o fluid/pnpn_res_fctry.o +fluid/fluid_pnpn.o : fluid/fluid_pnpn.f90 bc/bc.o math/mathops.o math/math.o config/neko_config.o gs/gather_scatter.o common/time_step_controller.o sem/coef.o common/user_intf.o mesh/mesh.o bc/non_normal.o bc/facet_normal.o bc/dirichlet.o field/field.o math/ax.o fluid/advection_fctry.o common/material_properties.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o device/device.o common/projection.o time_schemes/time_scheme_controller.o fluid/fluid_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o field/field_series.o fluid/fluid_scheme.o fluid/fluid_volflow.o common/rhs_maker.o common/rhs_maker_fctry.o math/ax_helm_fctry.o fluid/pnpn_res.o fluid/pnpn_res_fctry.o krylov/krylov.o config/num_types.o fluid/fluid_fctry.o : fluid/fluid_fctry.f90 config/neko_config.o common/utils.o fluid/fluid_pnpn.o fluid/fluid_scheme.o fluid/fluid_volflow.o : fluid/fluid_volflow.f90 math/ax.o bc/bc.o field/scratch_registry.o common/json_utils.o gs/gather_scatter.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o config/neko_config.o comm/comm.o math/math.o time_schemes/time_scheme_controller.o sem/coef.o field/field.o sem/dofmap.o krylov/precon.o krylov/krylov.o math/mathops.o config/num_types.o math/operators.o -fluid/pnpn_res.o : fluid/pnpn_res.f90 field/scratch_registry.o config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o +fluid/pnpn_res.o : fluid/pnpn_res.f90 config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o fluid/pnpn_res_fctry.o : fluid/pnpn_res_fctry.f90 fluid/bcknd/sx/pnpn_res_sx.o fluid/bcknd/cpu/pnpn_res_cpu.o fluid/bcknd/device/pnpn_res_device.o fluid/pnpn_res.o common/utils.o config/neko_config.o fluid/mean_flow.o : fluid/mean_flow.f90 field/field.o field/mean_field.o fluid/fluid_stats.o : fluid/fluid_stats.f90 common/utils.o config/neko_config.o device/device.o common/stats_quant.o gs/gather_scatter.o field/field_list.o field/field_registry.o field/field.o sem/coef.o math/operators.o math/math.o math/mathops.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o field/mean_field.o @@ -179,18 +192,20 @@ fluid/mean_sqr_flow.o : fluid/mean_sqr_flow.f90 field/field.o field/mean_sqr_fie fluid/flow_profile.o : fluid/flow_profile.f90 config/num_types.o fluid/flow_ic.o : fluid/flow_ic.f90 common/json_utils.o common/user_intf.o math/math.o sem/coef.o common/utils.o field/field.o device/device.o math/bcknd/device/device_math.o fluid/flow_profile.o config/neko_config.o gs/gather_scatter.o fluid/advection.o : fluid/advection.f90 device/device.o sem/interpolation.o math/operators.o config/neko_config.o math/bcknd/device/device_math.o sem/coef.o field/field.o sem/space.o common/utils.o math/math.o config/num_types.o +fluid/advection_fctry.o : fluid/advection_fctry.f90 common/json_utils.o device/device.o sem/coef.o fluid/advection.o config/num_types.o fluid/bcknd/cpu/pnpn_res_cpu.o : fluid/bcknd/cpu/pnpn_res_cpu.f90 sem/space.o config/num_types.o mesh/mesh.o field/scratch_registry.o fluid/pnpn_res.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o fluid/bcknd/sx/pnpn_res_sx.o : fluid/bcknd/sx/pnpn_res_sx.f90 sem/space.o config/num_types.o mesh/mesh.o field/scratch_registry.o fluid/pnpn_res.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o fluid/bcknd/device/pnpn_res_device.o : fluid/bcknd/device/pnpn_res_device.F90 field/scratch_registry.o fluid/pnpn_res.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o sem/space.o config/num_types.o mesh/mesh.o bc/facet_normal.o sem/coef.o math/ax.o field/field.o math/operators.o gs/gather_scatter.o -fluid/fluid_user_source_term.o : fluid/fluid_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o field/field.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o -fluid/fluid_source_term.o : fluid/fluid_source_term.f90 common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_factory.o source_terms/source_term.o fluid/fluid_user_source_term.o config/num_types.o config/neko_config.o -simulation.o : simulation.f90 common/json_utils.o simulation_components/simulation_component_global.o common/profiler.o common/jobctrl.o common/log.o math/math.o io/file.o time_schemes/time_scheme_controller.o gs/gather_scatter.o case.o +fluid/fluid_user_source_term.o : fluid/fluid_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o +fluid/fluid_source_term.o : fluid/fluid_source_term.f90 common/utils.o common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_fctry.o source_terms/source_term.o fluid/fluid_user_source_term.o config/num_types.o config/neko_config.o +common/time_step_controller.o : common/time_step_controller.f90 common/json_utils.o common/log.o config/num_types.o +simulation.o : simulation.f90 common/time_step_controller.o common/json_utils.o simulation_components/simcomp_executor.o common/profiler.o field/field.o common/jobctrl.o math/bcknd/device/device_math.o device/device.o common/log.o math/math.o io/file.o time_schemes/time_scheme_controller.o gs/gather_scatter.o case.o math/ax_helm_fctry.o : math/ax_helm_fctry.f90 math/bcknd/cpu/ax_helm.o math/bcknd/sx/ax_helm_sx.o math/bcknd/xsmm/ax_helm_xsmm.o math/bcknd/device/ax_helm_device.o math/ax.o config/neko_config.o -math/bcknd/cpu/ax_helm.o : math/bcknd/cpu/ax_helm.f90 math/math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o -math/bcknd/sx/ax_helm_sx.o : math/bcknd/sx/ax_helm_sx.f90 math/math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o -math/bcknd/xsmm/ax_helm_xsmm.o : math/bcknd/xsmm/ax_helm_xsmm.F90 math/mxm_wrapper.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o -math/bcknd/device/ax_helm_device.o : math/bcknd/device/ax_helm_device.F90 device/device.o math/bcknd/device/device_math.o mesh/mesh.o field/field.o sem/space.o sem/coef.o config/num_types.o math/ax.o -common/projection.o : common/projection.f90 common/log.o common/profiler.o common/bcknd/device/device_projection.o math/bcknd/device/device_math.o device/device.o config/neko_config.o gs/gather_scatter.o comm/comm.o bc/bc.o math/ax.o sem/coef.o math/math.o config/num_types.o +math/bcknd/cpu/ax_helm.o : math/bcknd/cpu/ax_helm.f90 math/math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o +math/bcknd/sx/ax_helm_sx.o : math/bcknd/sx/ax_helm_sx.f90 math/math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o +math/bcknd/xsmm/ax_helm_xsmm.o : math/bcknd/xsmm/ax_helm_xsmm.F90 math/mxm_wrapper.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o +math/bcknd/device/ax_helm_device.o : math/bcknd/device/ax_helm_device.F90 device/device.o math/bcknd/device/device_math.o mesh/mesh.o sem/space.o sem/coef.o config/num_types.o math/ax.o +common/projection.o : common/projection.f90 common/time_step_controller.o common/log.o common/profiler.o common/bcknd/device/device_projection.o math/bcknd/device/device_math.o device/device.o config/neko_config.o gs/gather_scatter.o comm/comm.o bc/bc.o math/ax.o sem/coef.o math/math.o config/num_types.o common/bcknd/device/device_projection.o : common/bcknd/device/device_projection.F90 common/utils.o config/num_types.o comm/parmetis.o : comm/parmetis.F90 mesh/mesh.o field/mesh_field.o config/num_types.o common/utils.o mesh/point.o comm/comm.o comm/redist.o : comm/redist.f90 mesh/element.o mesh/facet_zone.o io/format/nmsh.o mesh/mesh.o comm/comm.o mesh/curve.o adt/stack.o mesh/point.o adt/htable.o comm/mpi_types.o field/mesh_field.o @@ -218,23 +233,39 @@ bc/bcknd/device/device_facet_normal.o : bc/bcknd/device/device_facet_normal.F90 bc/bcknd/device/device_inhom_dirichlet.o : bc/bcknd/device/device_inhom_dirichlet.F90 common/utils.o config/num_types.o bc/bcknd/device/device_dong_outflow.o : bc/bcknd/device/device_dong_outflow.F90 common/utils.o config/num_types.o scalar/bcknd/device/scalar_residual_device.o : scalar/bcknd/device/scalar_residual_device.F90 math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o math/operators.o gs/gather_scatter.o scalar/scalar_residual.o -scalar/scalar_scheme.o : scalar/scalar_scheme.f90 common/material_properties.o common/user_intf.o common/json_utils.o bc/usr_scalar.o field/field_registry.o common/log.o time_schemes/time_scheme_controller.o mesh/facet_zone.o mesh/mesh.o bc/bc.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field.o scalar/source_scalar.o config/num_types.o common/checkpoint.o config/neko_config.o gs/gather_scatter.o -scalar/scalar_pnpn.o : scalar/scalar_pnpn.f90 common/material_properties.o common/user_intf.o common/json_utils.o common/profiler.o fluid/advection.o common/log.o math/math.o common/projection.o time_schemes/time_scheme_controller.o scalar/scalar_aux.o math/bcknd/device/device_mathops.o math/bcknd/device/device_math.o bc/facet_normal.o field/field_series.o math/ax.o scalar/scalar_residual.o gs/gather_scatter.o sem/coef.o mesh/mesh.o bc/bc.o field/field.o bc/dirichlet.o scalar/scalar_scheme.o common/rhs_maker_fctry.o math/ax_helm_fctry.o scalar/scalar_residual_fctry.o +scalar/scalar_scheme.o : scalar/scalar_scheme.f90 common/time_step_controller.o field/field_series.o scalar/scalar_source_term.o comm/comm.o common/utils.o common/material_properties.o common/user_intf.o common/json_utils.o bc/usr_scalar.o field/field_registry.o common/log.o time_schemes/time_scheme_controller.o mesh/facet_zone.o mesh/mesh.o bc/field_dirichlet.o bc/bc.o krylov/precon_fctry.o krylov/pc_hsmg.o krylov/bcknd/sx/pc_jacobi_sx.o krylov/bcknd/device/pc_jacobi_device.o krylov/bcknd/cpu/pc_jacobi.o krylov/krylov_fctry.o bc/neumann.o bc/dirichlet.o sem/coef.o krylov/krylov.o sem/dofmap.o sem/space.o field/field_list.o field/field.o config/num_types.o common/checkpoint.o gs/gather_scatter.o +scalar/scalar_pnpn.o : scalar/scalar_pnpn.f90 common/time_step_controller.o config/neko_config.o common/material_properties.o common/user_intf.o common/json_utils.o common/profiler.o fluid/advection_fctry.o fluid/advection.o common/log.o math/math.o common/projection.o time_schemes/time_scheme_controller.o scalar/scalar_aux.o math/bcknd/device/device_math.o krylov/krylov.o bc/facet_normal.o field/field_series.o math/ax.o scalar/scalar_residual.o gs/gather_scatter.o device/device.o sem/coef.o common/checkpoint.o mesh/mesh.o bc/bc.o field/field.o bc/neumann.o bc/dirichlet.o scalar/scalar_scheme.o common/rhs_maker.o common/rhs_maker_fctry.o math/ax_helm_fctry.o scalar/scalar_residual_fctry.o config/num_types.o scalar/scalar_aux.o : scalar/scalar_aux.f90 krylov/krylov.o config/num_types.o common/log.o scalar/scalar_residual.o : scalar/scalar_residual.f90 config/num_types.o mesh/mesh.o sem/space.o bc/facet_normal.o scalar/source_scalar.o sem/coef.o field/field.o math/ax.o gs/gather_scatter.o scalar/scalar_residual_fctry.o : scalar/scalar_residual_fctry.f90 scalar/bcknd/sx/scalar_residual_sx.o scalar/bcknd/cpu/scalar_residual_cpu.o scalar/bcknd/device/scalar_residual_device.o scalar/scalar_residual.o config/neko_config.o +scalar/scalar_ic.o : scalar/scalar_ic.f90 common/json_utils.o common/user_intf.o math/math.o sem/coef.o common/utils.o field/field.o device/device.o math/bcknd/device/device_math.o config/num_types.o config/neko_config.o gs/gather_scatter.o +scalar/scalar_source_term.o : scalar/scalar_source_term.f90 common/utils.o common/user_intf.o sem/coef.o common/json_utils.o field/field_list.o field/field.o source_terms/source_term_fctry.o source_terms/source_term.o scalar/scalar_user_source_term.o config/num_types.o config/neko_config.o +scalar/scalar_user_source_term.o : scalar/scalar_user_source_term.f90 sem/dofmap.o math/math.o math/bcknd/device/device_math.o device/device.o sem/coef.o field/field_list.o source_terms/source_term.o common/utils.o config/num_types.o config/neko_config.o scalar/bcknd/cpu/scalar_residual_cpu.o : scalar/bcknd/cpu/scalar_residual_cpu.f90 math/operators.o scalar/scalar_residual.o gs/gather_scatter.o scalar/bcknd/sx/scalar_residual_sx.o : scalar/bcknd/sx/scalar_residual_sx.f90 math/operators.o scalar/scalar_residual.o gs/gather_scatter.o scalar/source_scalar.o : scalar/source_scalar.f90 math/bcknd/device/device_math.o device/device.o common/utils.o sem/dofmap.o config/num_types.o config/neko_config.o simulation_components/simulation_component.o : simulation_components/simulation_component.f90 common/json_utils.o common/time_based_controller.o case.o config/num_types.o -simulation_components/simulation_component_global.o : simulation_components/simulation_component_global.f90 case.o simulation_components/simulation_component_factory.o simulation_components/simulation_component.o -simulation_components/vorticity.o : simulation_components/vorticity.f90 case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o -simulation_components/lambda2.o : simulation_components/lambda2.f90 device/device.o case.o math/operators.o field/field_list.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o -simulation_components/simulation_component_factory.o : simulation_components/simulation_component_factory.f90 common/json_utils.o case.o simulation_components/probes.o simulation_components/lambda2.o simulation_components/vorticity.o simulation_components/simulation_component.o +simulation_components/simcomp_executor.o : simulation_components/simcomp_executor.f90 case.o common/json_utils.o simulation_components/simulation_component_fctry.o simulation_components/simulation_component.o config/num_types.o +simulation_components/vorticity.o : simulation_components/vorticity.f90 simulation_components/field_writer.o common/json_utils.o io/fld_file_output.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o +simulation_components/lambda2.o : simulation_components/lambda2.f90 device/device.o simulation_components/field_writer.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o +simulation_components/les_simcomp.o : simulation_components/les_simcomp.f90 common/json_utils.o les/les_model_fctry.o les/les_model.o case.o math/operators.o field/field.o field/field_registry.o simulation_components/simulation_component.o config/num_types.o +simulation_components/simulation_component_fctry.o : simulation_components/simulation_component_fctry.f90 simulation_components/field_writer.o common/log.o common/json_utils.o case.o simulation_components/les_simcomp.o simulation_components/probes.o simulation_components/lambda2.o simulation_components/vorticity.o simulation_components/simulation_component.o source_terms/source_term.o : source_terms/source_term.f90 field/field_list.o sem/coef.o config/num_types.o config/neko_config.o source_terms/const_source_term.o : source_terms/const_source_term.f90 source_terms/bcknd/device/const_source_term_device.o source_terms/bcknd/cpu/const_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o common/json_utils.o field/field_list.o config/num_types.o source_terms/bcknd/cpu/const_source_term_cpu.o : source_terms/bcknd/cpu/const_source_term_cpu.f90 math/math.o field/field_list.o config/num_types.o source_terms/bcknd/device/const_source_term_device.o : source_terms/bcknd/device/const_source_term_device.f90 math/bcknd/device/device_math.o field/field_list.o config/num_types.o -source_terms/source_term_factory.o : source_terms/source_term_factory.f90 sem/coef.o common/utils.o field/field_list.o common/json_utils.o source_terms/const_source_term.o source_terms/source_term.o -neko.o : neko.f90 mesh/point_zone_registry.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o sem/point_interpolator.o common/time_interpolator.o io/data_streamer.o simulation_components/simulation_component_global.o field/scratch_registry.o field/field_registry.o common/system.o sem/spectral_error_indicator.o simulation_components/probes.o simulation_components/simulation_component.o math/tensor.o math/vector.o fluid/fluid_user_source_term.o field/field_list.o fluid/fluid_stats.o sem/cpr.o math/bcknd/device/device_math.o device/device.o common/jobctrl.o common/signal.o comm/parmetis.o common/user_intf.o common/projection.o math/mathops.o math/operators.o simulation.o io/output.o common/sampler.o case.o config/neko_config.o math/ax.o math/ax_helm_fctry.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o bc/wall.o bc/bc.o sem/coef.o gs/gather_scatter.o comm/mpi_types.o field/field.o io/file.o common/global_interpolation.o math/mxm_wrapper.o io/format/map.o field/mesh_field.o mesh/point.o mesh/mesh.o adt/tuple.o adt/stack.o adt/uset.o adt/htable.o sem/space.o sem/dofmap.o sem/speclib.o math/math.o common/log.o common/utils.o comm/comm.o config/num_types.o +source_terms/boussinesq_source_term.o : source_terms/boussinesq_source_term.f90 field/field_registry.o source_terms/bcknd/device/boussinesq_source_term_device.o source_terms/bcknd/cpu/boussinesq_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o common/json_utils.o field/field.o field/field_list.o config/num_types.o +source_terms/bcknd/cpu/boussinesq_source_term_cpu.o : source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 math/math.o field/field.o field/field_list.o config/num_types.o +source_terms/bcknd/device/boussinesq_source_term_device.o : source_terms/bcknd/device/boussinesq_source_term_device.f90 math/bcknd/device/device_math.o field/field.o field/field_list.o config/num_types.o +source_terms/source_term_fctry.o : source_terms/source_term_fctry.f90 sem/coef.o common/utils.o field/field_list.o common/json_utils.o source_terms/brinkman_source_term.o source_terms/boussinesq_source_term.o source_terms/const_source_term.o source_terms/source_term.o +source_terms/brinkman_source_term.o : source_terms/brinkman_source_term.f90 mesh/point_zone_registry.o mesh/point_zone.o mesh/search_tree/aabb.o common/profiler.o math/signed_distance.o source_terms/brinkman/filters.o device/device.o mesh/tri_mesh.o io/file.o source_terms/bcknd/device/brinkman_source_term_device.o source_terms/bcknd/cpu/brinkman_source_term_cpu.o common/utils.o config/neko_config.o sem/coef.o source_terms/source_term.o field/field_registry.o common/json_utils.o field/field_list.o field/field.o config/num_types.o +source_terms/bcknd/cpu/brinkman_source_term_cpu.o : source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 field/field_registry.o math/math.o field/field_list.o field/field.o config/num_types.o +source_terms/bcknd/device/brinkman_source_term_device.o : source_terms/bcknd/device/brinkman_source_term_device.f90 field/field_registry.o math/bcknd/device/device_math.o field/field_list.o field/field.o config/num_types.o +source_terms/brinkman/filters.o : source_terms/brinkman/filters.f90 source_terms/bcknd/cpu/filters_cpu.o config/num_types.o field/field.o +source_terms/bcknd/cpu/filters_cpu.o : source_terms/bcknd/cpu/filters_cpu.f90 config/num_types.o +les/les_model.o : les/les_model.f90 device/device.o config/neko_config.o gs/gs_ops.o sem/coef.o sem/dofmap.o field/field_registry.o field/field.o config/num_types.o +les/les_model_fctry.o : les/les_model_fctry.f90 sem/coef.o sem/dofmap.o les/vreman.o les/les_model.o +les/vreman.o : les/vreman.f90 sem/coef.o les/bcknd/cpu/vreman_cpu.o config/neko_config.o common/utils.o common/json_utils.o sem/dofmap.o les/les_model.o field/field.o config/num_types.o +les/bcknd/cpu/vreman_cpu.o : les/bcknd/cpu/vreman_cpu.f90 sem/coef.o math/operators.o field/field.o field/field_registry.o field/scratch_registry.o math/math.o field/field_list.o config/num_types.o +neko.o : neko.f90 mesh/point_zone_registry.o mesh/point_zones/sphere_point_zone.o mesh/point_zones/box_point_zone.o mesh/point_zone.o sem/point_interpolator.o common/time_interpolator.o io/data_streamer.o simulation_components/simcomp_executor.o field/scratch_registry.o field/field_registry.o qoi/drag_torque.o common/system.o sem/spectral_error_indicator.o simulation_components/probes.o simulation_components/simulation_component.o math/tensor.o math/matrix.o math/vector.o scalar/scalar_user_source_term.o fluid/fluid_user_source_term.o field/field_list.o fluid/fluid_stats.o sem/cpr.o sem/map_1d.o math/bcknd/device/device_math.o device/device.o common/jobctrl.o common/signal.o comm/parmetis.o common/user_intf.o common/projection.o math/mathops.o math/operators.o simulation.o io/output.o common/sampler.o case.o config/neko_config.o math/ax.o math/ax_helm_fctry.o krylov/precon_fctry.o krylov/krylov_fctry.o bc/dirichlet.o bc/wall.o bc/bc.o sem/coef.o gs/gather_scatter.o comm/mpi_types.o field/field.o io/file.o common/global_interpolation.o math/mxm_wrapper.o io/format/map.o field/mesh_field.o mesh/point.o mesh/mesh.o adt/tuple.o adt/stack.o adt/uset.o adt/htable.o sem/space.o sem/dofmap.o sem/speclib.o math/math.o common/log.o common/utils.o comm/comm.o config/num_types.o driver.o : driver.f90 neko.o diff --git a/src/.depends_device b/src/.depends_device index ae467eb0f44..537362f2c0c 100644 --- a/src/.depends_device +++ b/src/.depends_device @@ -13,6 +13,7 @@ math/bcknd/device/hip/tensor.o : math/bcknd/device/hip/tensor.hip math/bcknd/dev math/bcknd/device/hip/fdm.o : math/bcknd/device/hip/fdm.hip math/bcknd/device/hip/fdm_kernel.h krylov/bcknd/device/hip/pc_jacobi.o : krylov/bcknd/device/hip/pc_jacobi.hip krylov/bcknd/device/hip/pipecg_aux.o : krylov/bcknd/device/hip/pipecg_aux.hip krylov/bcknd/device/hip/pipecg_kernel.h +krylov/bcknd/device/hip/fusedcg_aux.o : krylov/bcknd/device/hip/fusedcg_aux.hip krylov/bcknd/device/hip/fusedcg_kernel.h krylov/bcknd/device/hip/gmres_aux.o : krylov/bcknd/device/hip/gmres_aux.hip krylov/bcknd/device/hip/gmres_kernel.h gs/bcknd/device/hip/gs.o : gs/bcknd/device/hip/gs.hip gs/bcknd/device/hip/gs_kernels.h bc/bcknd/device/hip/dirichlet.o : bc/bcknd/device/hip/dirichlet.hip bc/bcknd/device/hip/dirichlet_kernel.h @@ -42,6 +43,7 @@ math/bcknd/device/cuda/tensor.o : math/bcknd/device/cuda/tensor.cu math/bcknd/de math/bcknd/device/cuda/fdm.o : math/bcknd/device/cuda/fdm.cu math/bcknd/device/cuda/fdm_kernel.h krylov/bcknd/device/cuda/pc_jacobi.o : krylov/bcknd/device/cuda/pc_jacobi.cu krylov/bcknd/device/cuda/pipecg_aux.o : krylov/bcknd/device/cuda/pipecg_aux.cu krylov/bcknd/device/cuda/pipecg_kernel.h +krylov/bcknd/device/cuda/fusedcg_aux.o : krylov/bcknd/device/cuda/fusedcg_aux.cu krylov/bcknd/device/cuda/fusedcg_kernel.h krylov/bcknd/device/cuda/gmres_aux.o : krylov/bcknd/device/cuda/gmres_aux.cu krylov/bcknd/device/cuda/gmres_kernel.h gs/bcknd/device/cuda/gs.o : gs/bcknd/device/cuda/gs.cu gs/bcknd/device/cuda/gs_kernels.h bc/bcknd/device/cuda/dirichlet.o : bc/bcknd/device/cuda/dirichlet.cu bc/bcknd/device/cuda/dirichlet_kernel.h diff --git a/src/Makefile.am b/src/Makefile.am index 4d0019389ae..1176764478e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -18,11 +18,13 @@ neko_fortran_SOURCES = \ common/system.f90\ math/mxm_wrapper.F90\ sem/speclib.f90 \ + qoi/drag_torque.f90\ sem/local_interpolation.f90 \ math/math.f90\ math/mathops.f90\ math/fast3d.f90\ sem/space.f90\ + sem/map_1d.f90\ sem/dofmap.f90\ sem/coef.f90\ sem/cpr.f90\ @@ -56,10 +58,13 @@ neko_fortran_SOURCES = \ mesh/point_zone.f90\ mesh/point_zones/sphere_point_zone.f90\ mesh/point_zones/box_point_zone.f90\ - mesh/point_zone_factory.f90\ + mesh/point_zones/cylinder_point_zone.f90\ + mesh/point_zone_fctry.f90\ mesh/point_zone_registry.f90\ mesh/mesh.f90\ mesh/octree.f90\ + mesh/search_tree/aabb.f90\ + mesh/aabb_tree.f90\ mesh/tet_mesh.f90\ mesh/tri_mesh.f90\ field/field_registry.f90\ @@ -80,6 +85,7 @@ neko_fortran_SOURCES = \ math/bcknd/sx/sx_dudxyz.f90\ math/bcknd/sx/sx_opgrad.f90\ math/bcknd/sx/sx_cfl.f90\ + math/bcknd/sx/sx_lambda2.f90\ math/operators.f90\ math/bcknd/cpu/opr_cpu.f90\ math/bcknd/sx/opr_sx.f90\ @@ -96,6 +102,7 @@ neko_fortran_SOURCES = \ math/schwarz.f90\ math/vector.f90\ math/matrix.f90\ + math/signed_distance.f90\ common/checkpoint.f90\ io/generic_file.f90\ io/map_file.f90\ @@ -111,6 +118,7 @@ neko_fortran_SOURCES = \ io/file.f90\ io/output.f90\ io/fluid_output.f90\ + io/fld_file_output.f90\ io/chkp_output.f90\ io/mean_flow_output.f90\ io/fluid_stats_output.f90\ @@ -122,9 +130,12 @@ neko_fortran_SOURCES = \ common/craypat.F90\ bc/bc.f90\ bc/dirichlet.f90\ + bc/neumann.f90\ bc/dong_outflow.f90\ bc/wall.f90\ bc/inflow.f90\ + bc/field_dirichlet.f90\ + bc/field_dirichlet_vector.f90\ bc/usr_inflow.f90\ bc/usr_scalar.f90\ bc/facet_normal.f90\ @@ -148,6 +159,7 @@ neko_fortran_SOURCES = \ krylov/bcknd/sx/pc_jacobi_sx.f90\ krylov/bcknd/device/cg_device.f90\ krylov/bcknd/device/pipecg_device.F90\ + krylov/bcknd/device/fusedcg_device.F90\ krylov/bcknd/device/gmres_device.F90\ krylov/bcknd/device/pc_jacobi_device.F90\ krylov/bcknd/device/pc_identity_device.f90\ @@ -162,6 +174,7 @@ neko_fortran_SOURCES = \ common/rhs_maker.f90\ common/rhs_maker_fctry.f90\ simulation_components/probes.F90\ + simulation_components/field_writer.f90\ common/bcknd/cpu/rhs_maker_cpu.f90\ common/bcknd/sx/rhs_maker_sx.f90\ common/bcknd/device/rhs_maker_device.F90\ @@ -182,11 +195,13 @@ neko_fortran_SOURCES = \ fluid/flow_profile.f90\ fluid/flow_ic.f90\ fluid/advection.f90\ + fluid/advection_fctry.f90\ fluid/bcknd/cpu/pnpn_res_cpu.f90\ fluid/bcknd/sx/pnpn_res_sx.f90\ fluid/bcknd/device/pnpn_res_device.F90\ fluid/fluid_user_source_term.f90\ fluid/fluid_source_term.f90\ + common/time_step_controller.f90\ simulation.f90\ math/ax_helm_fctry.f90\ math/bcknd/cpu/ax_helm.f90\ @@ -226,19 +241,35 @@ neko_fortran_SOURCES = \ scalar/scalar_aux.f90\ scalar/scalar_residual.f90\ scalar/scalar_residual_fctry.f90\ + scalar/scalar_ic.f90\ + scalar/scalar_source_term.f90\ + scalar/scalar_user_source_term.f90\ scalar/bcknd/cpu/scalar_residual_cpu.f90\ scalar/bcknd/sx/scalar_residual_sx.f90\ scalar/source_scalar.f90\ simulation_components/simulation_component.f90\ - simulation_components/simulation_component_global.f90\ + simulation_components/simcomp_executor.f90\ simulation_components/vorticity.f90\ simulation_components/lambda2.f90\ - simulation_components/simulation_component_factory.f90\ + simulation_components/les_simcomp.f90\ + simulation_components/simulation_component_fctry.f90\ source_terms/source_term.f90\ source_terms/const_source_term.f90\ source_terms/bcknd/cpu/const_source_term_cpu.f90\ source_terms/bcknd/device/const_source_term_device.f90\ - source_terms/source_term_factory.f90\ + source_terms/boussinesq_source_term.f90\ + source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90\ + source_terms/bcknd/device/boussinesq_source_term_device.f90\ + source_terms/source_term_fctry.f90\ + source_terms/brinkman_source_term.f90\ + source_terms/bcknd/cpu/brinkman_source_term_cpu.f90\ + source_terms/bcknd/device/brinkman_source_term_device.f90\ + source_terms/brinkman/filters.f90\ + source_terms/bcknd/cpu/filters_cpu.f90\ + les/les_model.f90\ + les/les_model_fctry.f90\ + les/vreman.f90\ + les/bcknd/cpu/vreman_cpu.f90\ neko.f90 neko_c_SOURCES = comm/comm_wrapper.c\ @@ -271,6 +302,7 @@ libneko_a_SOURCES += \ math/bcknd/device/hip/ax_helm.hip\ krylov/bcknd/device/hip/pc_jacobi.hip\ krylov/bcknd/device/hip/pipecg_aux.hip\ + krylov/bcknd/device/hip/fusedcg_aux.hip\ krylov/bcknd/device/hip/gmres_aux.hip\ gs/bcknd/device/hip/gs.hip\ bc/bcknd/device/hip/dirichlet.hip\ @@ -304,6 +336,7 @@ libneko_a_SOURCES += \ math/bcknd/device/cuda/ax_helm.cu\ krylov/bcknd/device/cuda/pc_jacobi.cu\ krylov/bcknd/device/cuda/pipecg_aux.cu\ + krylov/bcknd/device/cuda/fusedcg_aux.cu\ krylov/bcknd/device/cuda/gmres_aux.cu\ gs/bcknd/device/cuda/gs.cu\ bc/bcknd/device/cuda/dirichlet.cu\ @@ -481,6 +514,8 @@ EXTRA_DIST = \ scalar/bcknd/device/hip/scalar_residual_update_kernel.h\ krylov/bcknd/device/cuda/pipecg_kernel.h\ krylov/bcknd/device/hip/pipecg_kernel.h\ + krylov/bcknd/device/cuda/fusedcg_kernel.h\ + krylov/bcknd/device/hip/fusedcg_kernel.h\ krylov/bcknd/device/cuda/gmres_kernel.h\ krylov/bcknd/device/hip/gmres_kernel.h\ sem/bcknd/device/hip/coef_kernel.h\ diff --git a/src/adt/htable.f90 b/src/adt/htable.f90 index 48b025a99dc..f59ba812faf 100644 --- a/src/adt/htable.f90 +++ b/src/adt/htable.f90 @@ -47,7 +47,7 @@ module htable type :: h_tuple_t logical :: valid = .false. logical :: skip = .false. - class(*), allocatable :: key + class(*), allocatable :: key class(*), allocatable :: data end type h_tuple_t @@ -135,7 +135,7 @@ end function htable_hash procedure, pass(this) :: set => htable_i4t4_set procedure, pass(this) :: get => htable_i4t4_get procedure, pass(this) :: hash => htable_i4t4_hash - procedure, pass(this) :: remove => htable_i4t4_remove + procedure, pass(this) :: remove => htable_i4t4_remove end type htable_i4t4_t !> C pointer based hash table @@ -145,7 +145,7 @@ end function htable_hash procedure, pass(this) :: set => htable_cptr_set procedure, pass(this) :: get => htable_cptr_get procedure, pass(this) :: hash => htable_cptr_hash - procedure, pass(this) :: remove => htable_cptr_remove + procedure, pass(this) :: remove => htable_cptr_remove end type htable_cptr_t ! @@ -168,7 +168,7 @@ end function htable_hash procedure, pass(this) :: init => htable_iter_i4_init procedure, pass(this) :: value => htable_iter_i4_value procedure, pass(this) :: key => htable_iter_i4_key - final :: htable_iter_i4_free + final :: htable_iter_i4_free end type htable_iter_i4_t !> Iterator for an integer*8 based hash table @@ -242,10 +242,10 @@ subroutine htable_init(this, size, key, data) class(*), target, intent(in), optional :: data !< Type of data class(*), pointer :: dp integer :: i - + call htable_free(this) - + if (size .lt. 4) then size = 4 end if @@ -283,7 +283,7 @@ subroutine htable_free(this) this%size = 0 this%entries = 0 - + end subroutine htable_free !> Clear all entries in a hash table @@ -294,9 +294,9 @@ subroutine htable_clear(this) this%t(:)%valid = .false. this%entries = 0 else - call neko_error("Hash table not allocated") + call neko_error("Hash table not allocated") end if - + end subroutine htable_clear !> Return number of entries in the table @@ -315,7 +315,7 @@ end function htable_size !> Insert tuple @a (key, value) into the hash table - recursive subroutine htable_set(this, key, data) + recursive subroutine htable_set(this, key, data) class(htable_t), intent(inout) :: this class(*), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -326,7 +326,7 @@ recursive subroutine htable_set(this, key, data) i = log(1.0/this%size)/log(0.6) !i = (this%size-1)/10 index = 0 - + do while (i .ge. 0) index = this%hash(key, c**2) if (index .lt. 0) then @@ -391,7 +391,7 @@ function htable_get(this, key, data) result(rcode) c = 0 i = this%size - 1 - + do while (i .ge. 0) index = this%hash(key, c**2) if (index .lt. 0) then @@ -401,7 +401,7 @@ function htable_get(this, key, data) result(rcode) if (.not. this%t(index)%valid .and. & .not. this%t(index)%skip) then rcode = 1 - return + return else if ((this%t(index)%valid) .and. & htable_eq_key(this, index, key)) then call htable_get_data(this, index, data) @@ -422,7 +422,7 @@ subroutine htable_remove(this, key) c = 0 i = this%size - 1 - + do while (i .ge. 0) index = this%hash(key, c**2) if (index .lt. 0) then @@ -576,12 +576,12 @@ pure function htable_eq_key(this, idx, key) result(res) end function htable_eq_key !> Set key at @a idx to @a key - subroutine htable_set_key(this, idx, key) + subroutine htable_set_key(this, idx, key) class(htable_t), target, intent(inout) :: this integer, intent(in) :: idx !< Table index class(*), intent(in) :: key !< Key to set at @a idx class(*), pointer :: kp - + kp => this%t(idx)%key select type(key) type is (integer) @@ -633,7 +633,7 @@ function htable_iter_next(this) result(valid) valid = (this%n .lt. this%t%size) if (.not. valid) this%n = -1 - + end function htable_iter_next !> Reset an iterator @@ -688,9 +688,9 @@ subroutine htable_iter_data(this, data) class default call neko_error('Invalid htable data (iter)') end select - + end subroutine htable_iter_data - + ! ! Integer based implementation ! @@ -706,11 +706,11 @@ subroutine htable_i4_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_i4_init !> Insert an integer into the hash table - subroutine htable_i4_set(this, key, data) + subroutine htable_i4_set(this, key, data) class(htable_i4_t), intent(inout) :: this integer, intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -761,7 +761,7 @@ pure function htable_i4_hash(this, k, c) result(hash) end function htable_i4_hash !> Remove an integer with key @a key from the hash table - subroutine htable_i4_remove(this, key) + subroutine htable_i4_remove(this, key) class(htable_i4_t), intent(inout) :: this integer, intent(inout) :: key !< Table key @@ -775,7 +775,7 @@ subroutine htable_iter_i4_init(this, t) type(htable_i4_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_i4_init @@ -796,7 +796,7 @@ function htable_iter_i4_value(this) result(value) class default call neko_error('Key and data of different kind (i4)') end select - + end function htable_iter_i4_value !> Return the current key of the integer based hash table iterator @@ -810,7 +810,7 @@ function htable_iter_i4_key(this) result(key) class default call neko_error('Invalid key (i4)') end select - + end function htable_iter_i4_key ! @@ -828,11 +828,11 @@ subroutine htable_i8_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_i8_init !> Insert an integer*8 into the hash table - subroutine htable_i8_set(this, key, data) + subroutine htable_i8_set(this, key, data) class(htable_i8_t), intent(inout) :: this integer(kind=i8), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -884,7 +884,7 @@ pure function htable_i8_hash(this, k, c) result(hash) end function htable_i8_hash !> Remove an integer*8 with key @a key from the hash table - subroutine htable_i8_remove(this, key) + subroutine htable_i8_remove(this, key) class(htable_i8_t), intent(inout) :: this integer(kind=i8), intent(inout) :: key !< Table key @@ -898,7 +898,7 @@ subroutine htable_iter_i8_init(this, t) type(htable_i8_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_i8_init @@ -920,7 +920,7 @@ function htable_iter_i8_value(this) result(value) class default call neko_error('Key and data of different kind (i8)') end select - + end function htable_iter_i8_value !> Return the current key of the integer*8 based hash table iterator @@ -942,9 +942,9 @@ function htable_iter_i8_key(this) result(key) class default call neko_error('Corrupt htable iter. (i8)') end select - + end function htable_iter_i8_key - + ! ! Double precision based implementation @@ -961,11 +961,11 @@ subroutine htable_r8_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_r8_init !> Insert a double precision @a key (with @a data) into the hash table - subroutine htable_r8_set(this, key, data) + subroutine htable_r8_set(this, key, data) class(htable_r8_t), intent(inout) :: this real(kind=dp), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -1000,14 +1000,14 @@ pure function htable_r8_hash(this, k, c) result(hash) end function htable_r8_hash !> Remove a double precision key @a key from the hash table - subroutine htable_r8_remove(this, key) + subroutine htable_r8_remove(this, key) class(htable_r8_t), intent(inout) :: this real(kind=dp), intent(inout) :: key !< Table key call htable_remove(this, key) end subroutine htable_r8_remove - + !> Initialize a double precision based hash table iterator subroutine htable_iter_r8_init(this, t) @@ -1015,7 +1015,7 @@ subroutine htable_iter_r8_init(this, t) type(htable_r8_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_r8_init @@ -1036,7 +1036,7 @@ function htable_iter_r8_value(this) result(value) class default call neko_error('Key and data of different kind (r8)') end select - + end function htable_iter_r8_value !> Return the current key of the double precision based hash table iterator @@ -1050,9 +1050,9 @@ function htable_iter_r8_key(this) result(key) class default call neko_error('Invalid key (r8)') end select - + end function htable_iter_r8_key - + ! ! Point based implementation ! @@ -1068,11 +1068,11 @@ subroutine htable_pt_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_pt_init !> Insert a point @a key (with @a data) into the hash table - subroutine htable_pt_set(this, key, data) + subroutine htable_pt_set(this, key, data) class(htable_pt_t), intent(inout) :: this type(point_t), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -1097,7 +1097,7 @@ pure function htable_pt_hash(this, k, c) result(hash) class(htable_pt_t), intent(in) :: this class(*), intent(in) :: k integer, value :: c - integer :: hash, i + integer :: hash, i integer(kind=i8) :: hash2, tmp, mult integer(kind=i8), parameter :: M1 = int(Z'7ed55d15', i8) integer(kind=i8), parameter :: M2 = int(Z'c761c23c', i8) @@ -1131,7 +1131,7 @@ pure function htable_pt_hash(this, k, c) result(hash) end function htable_pt_hash !> Remove a point with key @a key from the hash table - subroutine htable_pt_remove(this, key) + subroutine htable_pt_remove(this, key) class(htable_pt_t), intent(inout) :: this type(point_t), intent(inout) :: key !< Table key @@ -1146,7 +1146,7 @@ subroutine htable_iter_pt_init(this, t) type(htable_pt_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_pt_init @@ -1167,7 +1167,7 @@ function htable_iter_pt_value(this) result(value) class default call neko_error('Key and data of different kind (pt)') end select - + end function htable_iter_pt_value !> Return the current key of the point based hash table iterator @@ -1181,7 +1181,7 @@ function htable_iter_pt_key(this) result(key) class default call neko_error('Invalid key (pt)') end select - + end function htable_iter_pt_key ! @@ -1199,11 +1199,11 @@ subroutine htable_i4t2_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_i4t2_init !> Insert an integer 2-tuple into the hash table - subroutine htable_i4t2_set(this, key, data) + subroutine htable_i4t2_set(this, key, data) class(htable_i4t2_t), intent(inout) :: this type(tuple_i4_t), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -1261,24 +1261,24 @@ pure function htable_i4t2_hash(this, k, c) result(hash) end function htable_i4t2_hash !> Remove an integer 2-tuple with key @a key from the hash table - subroutine htable_i4t2_remove(this, key) + subroutine htable_i4t2_remove(this, key) class(htable_i4t2_t), intent(inout) :: this type(tuple_i4_t), intent(inout) :: key !< Table key call htable_remove(this, key) end subroutine htable_i4t2_remove - + !> Initialize an integer 2-tuple based hash table iterator subroutine htable_iter_i4t2_init(this, t) class(htable_iter_i4t2_t), intent(inout) :: this type(htable_i4t2_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_i4t2_init - + !> Destroy an integer 2-tuple based hash table iterator subroutine htable_iter_i4t2_free(this) type(htable_iter_i4t2_t), intent(inout) :: this @@ -1296,7 +1296,7 @@ function htable_iter_i4t2_value(this) result(value) class default call neko_error('Key and data of different kind (i4t2)') end select - + end function htable_iter_i4t2_value !> Return the current key of integer based 2-tuple hash table iterator @@ -1310,7 +1310,7 @@ function htable_iter_i4t2_key(this) result(key) class default call neko_error('Invalid key (i4t2)') end select - + end function htable_iter_i4t2_key ! @@ -1328,11 +1328,11 @@ subroutine htable_i4t4_init(this, size, data) else call htable_init(this, size, key) end if - + end subroutine htable_i4t4_init !> Insert an integer 4-tuple into the hash table - subroutine htable_i4t4_set(this, key, data) + subroutine htable_i4t4_set(this, key, data) class(htable_i4t4_t), intent(inout) :: this type(tuple4_i4_t), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -1365,7 +1365,7 @@ pure function htable_i4t4_hash(this, k, c) result(hash) integer(kind=i8), parameter :: M4 = int(Z'd3a2646c', i8) integer(kind=i8), parameter :: M5 = int(Z'fd7046c5', i8) integer(kind=i8), parameter :: M6 = int(Z'b55a4f09', i8) - + select type(k) type is (tuple4_i4_t) mult = int(1000003, i8) @@ -1390,7 +1390,7 @@ pure function htable_i4t4_hash(this, k, c) result(hash) end function htable_i4t4_hash !> Remove an integer 4-tuple with key @a key from the hash table - subroutine htable_i4t4_remove(this, key) + subroutine htable_i4t4_remove(this, key) class(htable_i4t4_t), intent(inout) :: this type(tuple4_i4_t), intent(inout) :: key !< Table key @@ -1404,7 +1404,7 @@ subroutine htable_iter_i4t4_init(this, t) type(htable_i4t4_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_i4t4_init @@ -1425,7 +1425,7 @@ function htable_iter_i4t4_value(this) result(value) class default call neko_error('Key and data of different kind (i4t4)') end select - + end function htable_iter_i4t4_value !> Return the current key of integer based 4-tuple hash table iterator @@ -1447,7 +1447,7 @@ function htable_iter_i4t4_key(this) result(key) class default call neko_error('Corrupt htable iter. (i4t4)') end select - + end function htable_iter_i4t4_key ! @@ -1459,17 +1459,17 @@ subroutine htable_cptr_init(this, size, data) integer, value :: size !< Initial size of the table class(*), intent(inout), optional :: data !< Data to associate with @a key type(h_cptr_t) :: key - + if (present(data)) then call htable_init(this, size, key, data) else call htable_init(this, size, key) end if - + end subroutine htable_cptr_init !> Insert a C pointer into the hash table - subroutine htable_cptr_set(this, key, data) + subroutine htable_cptr_set(this, key, data) class(htable_cptr_t), target, intent(inout) :: this type(h_cptr_t), intent(inout) :: key !< Table key class(*), intent(inout) :: data !< Data associated with @a key @@ -1508,7 +1508,7 @@ pure function htable_cptr_hash(this, k, c) result(hash) end function htable_cptr_hash !> Remove a C pointer with key @a key from the hash table - subroutine htable_cptr_remove(this, key) + subroutine htable_cptr_remove(this, key) class(htable_cptr_t), target, intent(inout) :: this type(h_cptr_t), intent(inout) :: key !< Table key @@ -1522,7 +1522,7 @@ subroutine htable_iter_cptr_init(this, t) type(htable_cptr_t), target, intent(inout) :: t this%t => t - this%n = -1 + this%n = -1 end subroutine htable_iter_cptr_init @@ -1545,7 +1545,7 @@ function htable_iter_cptr_value(this) result(value) class default call neko_error('Key and data of different kind (cptr)') end select - + end function htable_iter_cptr_value !> Return the current key of a C pointer based hash table iterator @@ -1561,7 +1561,7 @@ function htable_iter_cptr_key(this) result(key) class default call neko_error('Invalid key (cptr)') end select - + end function htable_iter_cptr_key end module htable diff --git a/src/adt/stack.f90 b/src/adt/stack.f90 index 975bbaa1332..24922ee0640 100644 --- a/src/adt/stack.f90 +++ b/src/adt/stack.f90 @@ -39,10 +39,10 @@ module stack use point, only : point_t use structs, only : struct_curve_t use math, only : NEKO_M_LN2 - use tuple, only : tuple_t, tuple_i4_t, tuple4_i4_t, tuple_i4r8_t, tuple_2i4r8_t + use tuple, only : tuple_i4_t, tuple4_i4_t, tuple_i4r8_t, tuple_2i4r8_t implicit none private - + integer, parameter :: NEKO_STACK_SIZE_T = 32 !> Base type for a stack @@ -55,6 +55,7 @@ module stack procedure, non_overridable, pass(this) :: free => stack_free procedure, non_overridable, pass(this) :: clear => stack_clear procedure, non_overridable, pass(this) :: size => stack_size + procedure, non_overridable, pass(this) :: is_empty => stack_is_empty procedure, non_overridable, pass(this) :: push => stack_push end type stack_t @@ -106,7 +107,7 @@ module stack procedure, public, pass(this) :: pop => stack_2i4r8t3_pop procedure, public, pass(this) :: array => stack_2i4r8t3_data end type stack_2i4r8t3_t - + !> Curved element stack type, public, extends(stack_t) :: stack_curve_t contains @@ -151,9 +152,9 @@ module stack contains - !> Initialize a stack of arbitrary type + !> Initialize a stack of arbitrary type subroutine stack_init(this, size) - class(stack_t), intent(inout) :: this + class(stack_t), intent(inout) :: this integer, optional :: size !< Initial size of the stack integer :: size_t @@ -202,16 +203,16 @@ subroutine stack_init(this, size) end select end subroutine stack_init - + !> Destroy a stack subroutine stack_free(this) class(stack_t), intent(inout) :: this - + if (allocated(this%data)) then deallocate(this%data) - this%size_ = 0 + this%size_ = 0 this%top_ = 0 - end if + end if end subroutine stack_free @@ -228,6 +229,13 @@ pure function stack_size(this) result(size) size = this%top_ end function stack_size + !> Return true if the stack is empty + pure function stack_is_empty(this) result(is_empty) + class(stack_t), intent(in) :: this + logical :: is_empty + is_empty = this%top_ .eq. 0 + end function stack_is_empty + !> Push data onto the stack subroutine stack_push(this, data) class(stack_t), target, intent(inout) :: this @@ -242,7 +250,7 @@ subroutine stack_push(this, data) allocate(integer::tmp(this%size_)) type is(integer(i8)) allocate(integer(i8)::tmp(this%size_)) - type is(double precision) + type is(double precision) allocate(double precision::tmp(this%size_)) type is(tuple_i4_t) allocate(tuple_i4_t::tmp(this%size_)) @@ -267,7 +275,7 @@ subroutine stack_push(this, data) class default call neko_error('Invalid data type (stack_push)') end select - + select type(tmp) type is (integer) select type(sdp=>this%data) @@ -347,7 +355,7 @@ subroutine stack_push(this, data) end select call move_alloc(tmp, this%data) end if - + this%top_ = this%top_ + 1 select type(sdp=>this%data) @@ -427,7 +435,7 @@ function stack_i4_pop(this) result(data) integer :: data select type (sdp=>this%data) - type is (integer) + type is (integer) data = sdp(this%top_) class default call neko_error('Invalid data type (i4 pop)') @@ -438,10 +446,10 @@ end function stack_i4_pop !> Return a pointer to the internal integer array function stack_i4_data(this) result(data) class(stack_i4_t), target, intent(inout) :: this - integer, pointer :: data(:) + integer, contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (integer) + type is (integer) data => sdp class default call neko_error('Invalid data type (i4 array)') @@ -454,7 +462,7 @@ function stack_i8_pop(this) result(data) integer(kind=i8) :: data select type (sdp=>this%data) - type is (integer(i8)) + type is (integer(i8)) data = sdp(this%top_) class default call neko_error('Invalid data type (i8 pop)') @@ -465,10 +473,10 @@ end function stack_i8_pop !> Return a pointer to the internal integer*8 array function stack_i8_data(this) result(data) class(stack_i8_t), target, intent(inout) :: this - integer(kind=i8), pointer :: data(:) + integer(kind=i8), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (integer(i8)) + type is (integer(i8)) data => sdp class default call neko_error('Invalid data type (i8 array)') @@ -479,9 +487,9 @@ end function stack_i8_data function stack_r8_pop(this) result(data) class(stack_r8_t), target, intent(inout) :: this real(kind=dp) :: data - + select type (sdp=>this%data) - type is (double precision) + type is (double precision) data = sdp(this%top_) class default call neko_error('Invalid data type (r8 pop)') @@ -489,13 +497,13 @@ function stack_r8_pop(this) result(data) this%top_ = this%top_ -1 end function stack_r8_pop - !> Return a pointer to the internal double precision array + !> Return a pointer to the internal double precision array function stack_r8_data(this) result(data) class(stack_r8_t), target, intent(inout) :: this - real(kind=dp), pointer :: data(:) + real(kind=dp), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (double precision) + type is (double precision) data => sdp class default call neko_error('Invalid data type (r8 array)') @@ -506,9 +514,9 @@ end function stack_r8_data function stack_i4t2_pop(this) result(data) class(stack_i4t2_t), target, intent(inout) :: this type(tuple_i4_t) :: data - + select type (sdp=>this%data) - type is (tuple_i4_t) + type is (tuple_i4_t) data = sdp(this%top_) class default call neko_error('Invalid data type (i4t2 pop)') @@ -519,10 +527,10 @@ end function stack_i4t2_pop !> Return a pointer to the interal 2-tuple array function stack_i4t2_data(this) result(data) class(stack_i4t2_t), target, intent(inout) :: this - type(tuple_i4_t), pointer :: data(:) + type(tuple_i4_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (tuple_i4_t) + type is (tuple_i4_t) data => sdp class default call neko_error('Invalid data type (i4t2 array)') @@ -533,9 +541,9 @@ end function stack_i4t2_data function stack_i4t4_pop(this) result(data) class(stack_i4t4_t), target, intent(inout) :: this type(tuple4_i4_t) :: data - + select type (sdp=>this%data) - type is (tuple4_i4_t) + type is (tuple4_i4_t) data = sdp(this%top_) class default call neko_error('Invalid data type (i4t4 pop)') @@ -546,10 +554,10 @@ end function stack_i4t4_pop !> Return a pointer to the internal 4-tuple array function stack_i4t4_data(this) result(data) class(stack_i4t4_t), target, intent(inout) :: this - type(tuple4_i4_t), pointer :: data(:) + type(tuple4_i4_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (tuple4_i4_t) + type is (tuple4_i4_t) data => sdp class default call neko_error('Invalid data type (i4t4 array)') @@ -560,9 +568,9 @@ end function stack_i4t4_data function stack_i4r8t2_pop(this) result(data) class(stack_i4r8t2_t), target, intent(inout) :: this type(tuple_i4r8_t) :: data - + select type (sdp=>this%data) - type is (tuple_i4r8_t) + type is (tuple_i4r8_t) data = sdp(this%top_) class default call neko_error('Invalid data type (i4r8t2 pop)') @@ -573,10 +581,10 @@ end function stack_i4r8t2_pop !> Return a pointer to the internal 2-tuple array function stack_i4r8t2_data(this) result(data) class(stack_i4r8t2_t), target, intent(inout) :: this - type(tuple_i4r8_t), pointer :: data(:) + type(tuple_i4r8_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (tuple_i4r8_t) + type is (tuple_i4r8_t) data => sdp class default call neko_error('Invalid data type (i4r8t2 array)') @@ -587,9 +595,9 @@ end function stack_i4r8t2_data function stack_2i4r8t3_pop(this) result(data) class(stack_2i4r8t3_t), target, intent(inout) :: this type(tuple_2i4r8_t) :: data - + select type (sdp=>this%data) - type is (tuple_2i4r8_t) + type is (tuple_2i4r8_t) data = sdp(this%top_) class default call neko_error('Invalid data type (i4r8t2 pop)') @@ -600,23 +608,23 @@ end function stack_2i4r8t3_pop !> Return a pointer to the internal 2-tuple array function stack_2i4r8t3_data(this) result(data) class(stack_2i4r8t3_t), target, intent(inout) :: this - type(tuple_2i4r8_t), pointer :: data(:) + type(tuple_2i4r8_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (tuple_2i4r8_t) + type is (tuple_2i4r8_t) data => sdp class default call neko_error('Invalid data type (i4r8t2 array)') end select end function stack_2i4r8t3_data - + !> Pop a curve element of the stack function stack_curve_element_pop(this) result(data) class(stack_curve_t), target, intent(inout) :: this type(struct_curve_t) :: data - + select type (sdp=>this%data) - type is (struct_curve_t) + type is (struct_curve_t) data = sdp(this%top_) class default call neko_error('Invalid data type (curve pop)') @@ -627,10 +635,10 @@ end function stack_curve_element_pop !> Return a pointer to the internal curve element array function stack_curve_element_data(this) result(data) class(stack_curve_t), target, intent(inout) :: this - type(struct_curve_t), pointer :: data(:) + type(struct_curve_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (struct_curve_t) + type is (struct_curve_t) data => sdp class default call neko_error('Invalid data type (curve array)') @@ -643,7 +651,7 @@ function stack_nq_pop(this) result(data) type(nmsh_quad_t) :: data select type (sdp=>this%data) - type is (nmsh_quad_t) + type is (nmsh_quad_t) data = sdp(this%top_) class default call neko_error('Invalid data type (nq pop)') @@ -654,10 +662,10 @@ end function stack_nq_pop !> Return a pointer to the internal Neko quad array function stack_nq_data(this) result(data) class(stack_nq_t), target, intent(inout) :: this - type(nmsh_quad_t), pointer :: data(:) + type(nmsh_quad_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (nmsh_quad_t) + type is (nmsh_quad_t) data => sdp class default call neko_error('Invalid data type (nq array)') @@ -670,7 +678,7 @@ function stack_nh_pop(this) result(data) type(nmsh_hex_t) :: data select type (sdp=>this%data) - type is (nmsh_hex_t) + type is (nmsh_hex_t) data = sdp(this%top_) class default call neko_error('Invalid data type (nh pop)') @@ -681,10 +689,10 @@ end function stack_nh_pop !> Return a pointer to the internal Neko quad array function stack_nh_data(this) result(data) class(stack_nh_t), target, intent(inout) :: this - type(nmsh_hex_t), pointer :: data(:) + type(nmsh_hex_t), contiguous, pointer :: data(:) select type (sdp => this%data) - type is (nmsh_hex_t) + type is (nmsh_hex_t) data => sdp class default call neko_error('Invalid data type (nh array)') @@ -697,7 +705,7 @@ function stack_nz_pop(this) result(data) type(nmsh_zone_t) :: data select type (sdp=>this%data) - type is (nmsh_zone_t) + type is (nmsh_zone_t) data = sdp(this%top_) class default call neko_error('Invalid data type (nz pop)') @@ -708,10 +716,10 @@ end function stack_nz_pop !> Return a pointer to the internal Neko zone array function stack_nz_data(this) result(data) class(stack_nz_t), target, intent(inout) :: this - type(nmsh_zone_t), pointer :: data(:) + type(nmsh_zone_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (nmsh_zone_t) + type is (nmsh_zone_t) data => sdp class default call neko_error('Invalid data type (nz array)') @@ -724,7 +732,7 @@ function stack_nc_pop(this) result(data) type(nmsh_curve_el_t) :: data select type (sdp=>this%data) - type is (nmsh_curve_el_t) + type is (nmsh_curve_el_t) data = sdp(this%top_) class default call neko_error('Invalid data type (nc pop)') @@ -738,7 +746,7 @@ function stack_nc_data(this) result(data) type(nmsh_curve_el_t), pointer :: data(:) select type (sdp=>this%data) - type is (nmsh_curve_el_t) + type is (nmsh_curve_el_t) data => sdp class default call neko_error('Invalid data type (nc array)') @@ -751,7 +759,7 @@ function stack_pt_pop(this) result(data) type(point_t) :: data select type (sdp=>this%data) - type is (point_t) + type is (point_t) data = sdp(this%top_) class default call neko_error('Invalid data type (point pop)') @@ -762,14 +770,14 @@ end function stack_pt_pop !> Return a pointer to the internal point array function stack_pt_data(this) result(data) class(stack_pt_t), target, intent(inout) :: this - type(point_t), pointer :: data(:) + type(point_t), contiguous, pointer :: data(:) select type (sdp=>this%data) - type is (point_t) + type is (point_t) data => sdp class default call neko_error('Invalid data type (point array)') end select end function stack_pt_data - + end module stack diff --git a/src/adt/tuple.f90 b/src/adt/tuple.f90 index 8c43dcf9ad9..5c9771a5ec4 100644 --- a/src/adt/tuple.f90 +++ b/src/adt/tuple.f90 @@ -32,12 +32,12 @@ ! !> Implements a n-tuple module tuple - use math, only : abscmp + use math, only : abscmp use num_types, only : dp implicit none private - !> Base type for an n-tuple + !> Base type for an n-tuple type, public, abstract :: tuple_t contains procedure(tuple_assign_tuple), pass(this), deferred :: assign_tuple @@ -47,7 +47,7 @@ module tuple generic :: assignment(=) => assign_tuple, assign_vector end type tuple_t - !> Integer based 2-tuple + !> Integer based 2-tuple type, extends(tuple_t), public :: tuple_i4_t integer :: x(2) = (/0, 0/) contains @@ -56,7 +56,7 @@ module tuple procedure, pass(this) :: equal => tuple_i4_equal end type tuple_i4_t - !> Integer based 3-tuple + !> Integer based 3-tuple type, extends(tuple_t), public :: tuple3_i4_t integer :: x(3) = (/0, 0, 0/) contains @@ -65,7 +65,7 @@ module tuple procedure, pass(this) :: equal => tuple3_i4_equal end type tuple3_i4_t - !> Integer based 4-tuple + !> Integer based 4-tuple type, extends(tuple_t), public :: tuple4_i4_t integer :: x(4) = (/0, 0, 0, 0/) contains @@ -74,7 +74,7 @@ module tuple procedure, pass(this) :: equal => tuple4_i4_equal end type tuple4_i4_t - !> Double precision based 2-tuple + !> Double precision based 2-tuple type, extends(tuple_t), public :: tuple_r8_t real(kind=dp) :: x(2) = (/0d0, 0d0/) contains @@ -93,9 +93,9 @@ module tuple procedure, pass(this) :: equal => tuple_i4r8_equal end type tuple_i4r8_t - !> Mixed integer (\f$ x, y \f$) double precision (\f$ z \f$) 3-tuple + !> Mixed integer (\f$ x, y \f$) double precision (\f$ z \f$) 3-tuple type, extends(tuple_t), public :: tuple_2i4r8_t - integer :: x, y + integer :: x, y real(kind=dp) :: z contains procedure, pass(this) :: assign_tuple => tuple_2i4r8_assign_tuple @@ -120,7 +120,7 @@ subroutine tuple_assign_vector(this, x) class(*), dimension(:), intent(in) :: x end subroutine tuple_assign_vector end interface - + !> Abstract intf. for tuple comparison abstract interface pure function tuple_equal(this, other) result(res) @@ -132,7 +132,7 @@ end function tuple_equal end interface contains - + !> Assign an integer 2-tuple to a tuple subroutine tuple_i4_assign_tuple(this, other) class(tuple_i4_t), intent(inout) :: this @@ -152,7 +152,7 @@ subroutine tuple_i4_assign_vector(this, x) select type(x) type is (integer) this%x = x - end select + end select end subroutine tuple_i4_assign_vector !> Check if two integer based tuples are equal @@ -187,14 +187,14 @@ subroutine tuple3_i4_assign_vector(this, x) select type(x) type is (integer) this%x = x - end select + end select end subroutine tuple3_i4_assign_vector !> Check if two integer based tuples are equal pure function tuple3_i4_equal(this, other) result(res) class(tuple3_i4_t), intent(in) :: this class(tuple_t), intent(in) :: other - logical :: res + logical :: res res = .false. select type(other) @@ -202,7 +202,7 @@ pure function tuple3_i4_equal(this, other) result(res) res = all(this%x .eq. other%x) end select end function tuple3_i4_equal - + !> Assign an integer 4-tuple to a tuple subroutine tuple4_i4_assign_tuple(this, other) class(tuple4_i4_t), intent(inout) :: this @@ -222,14 +222,14 @@ subroutine tuple4_i4_assign_vector(this, x) select type(x) type is (integer) this%x = x - end select + end select end subroutine tuple4_i4_assign_vector !> Check if two integer based tuples are equal pure function tuple4_i4_equal(this, other) result(res) class(tuple4_i4_t), intent(in) :: this class(tuple_t), intent(in) :: other - logical :: res + logical :: res res = .false. select type(other) @@ -244,7 +244,7 @@ subroutine tuple_r8_assign_tuple(this, other) class(tuple_t), intent(in) :: other select type(other) - type is(tuple_r8_t) + type is(tuple_r8_t) this%x = other%x end select end subroutine tuple_r8_assign_tuple @@ -257,7 +257,7 @@ subroutine tuple_r8_assign_vector(this, x) select type(x) type is (double precision) this%x = x - end select + end select end subroutine tuple_r8_assign_vector !> Check if two double precision tuples are equal @@ -271,7 +271,7 @@ pure function tuple_r8_equal(this, other) result(res) type is(tuple_r8_t) if (abscmp(this%x(1), other%x(1)) .and. & abscmp(this%x(2), other%x(2))) then - res = .true. + res = .true. end if end select end function tuple_r8_equal @@ -282,7 +282,7 @@ subroutine tuple_i4r8_assign_tuple(this, other) class(tuple_t), intent(in) :: other select type(other) - type is(tuple_i4r8_t) + type is(tuple_i4r8_t) this%x = other%x this%y = other%y end select @@ -315,7 +315,7 @@ pure function tuple_i4r8_equal(this, other) result(res) type is(tuple_i4r8_t) if ((this%x .eq. other%x) .and. & abscmp(this%y, other%y)) then - res = .true. + res = .true. end if end select end function tuple_i4r8_equal @@ -326,7 +326,7 @@ subroutine tuple_2i4r8_assign_tuple(this, other) class(tuple_t), intent(in) :: other select type(other) - type is(tuple_2i4r8_t) + type is(tuple_2i4r8_t) this%x = other%x this%y = other%y this%z = other%z @@ -363,9 +363,9 @@ pure function tuple_2i4r8_equal(this, other) result(res) if ((this%x .eq. other%x) .and. & (this%y .eq. other%y) .and. & abscmp(this%z, other%z)) then - res = .true. + res = .true. end if end select end function tuple_2i4r8_equal - + end module tuple diff --git a/src/adt/uset.f90 b/src/adt/uset.f90 index 5da3fb60883..f3e2c3237f6 100644 --- a/src/adt/uset.f90 +++ b/src/adt/uset.f90 @@ -31,7 +31,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! !> Implements an unordered set ADT -!! @details A unordered set storing a fixed data-type @a data +!! @details A unordered set storing a fixed data-type @a data module uset use utils, only : neko_error use num_types, only : i8, dp @@ -65,7 +65,7 @@ module uset procedure, pass(this) :: element => uset_i4_element procedure, pass(this) :: add => uset_i4_add procedure, pass(this) :: remove => uset_i4_remove - procedure, pass(this) :: iter_init => uset_i4_iter_init + procedure, pass(this) :: iter_init => uset_i4_iter_init procedure, pass(this) :: iter_next => uset_i4_iter_next procedure, pass(this) :: iter_value => uset_i4_iter_value end type uset_i4_t @@ -82,7 +82,7 @@ module uset procedure, pass(this) :: element => uset_i8_element procedure, pass(this) :: add => uset_i8_add procedure, pass(this) :: remove => uset_i8_remove - procedure, pass(this) :: iter_init => uset_i8_iter_init + procedure, pass(this) :: iter_init => uset_i8_iter_init procedure, pass(this) :: iter_next => uset_i8_iter_next procedure, pass(this) :: iter_value => uset_i8_iter_value end type uset_i8_t @@ -99,14 +99,14 @@ module uset procedure, pass(this) :: element => uset_r8_element procedure, pass(this) :: add => uset_r8_add procedure, pass(this) :: remove => uset_r8_remove - procedure, pass(this) :: iter_init => uset_r8_iter_init + procedure, pass(this) :: iter_init => uset_r8_iter_init procedure, pass(this) :: iter_next => uset_r8_iter_next procedure, pass(this) :: iter_value => uset_r8_iter_value end type uset_r8_t !> Interface for initializing an unordered set abstract interface - subroutine uset_init(this, n) + subroutine uset_init(this, n) import uset_t class(uset_t), intent(inout) :: this integer, optional :: n @@ -115,7 +115,7 @@ end subroutine uset_init !> Interface for destroying an unordered set abstract interface - subroutine uset_free(this) + subroutine uset_free(this) import uset_t class(uset_t), intent(inout) :: this end subroutine uset_free @@ -132,7 +132,7 @@ end function uset_size !> Interface for clearing an unordered set abstract interface - subroutine uset_clear(this) + subroutine uset_clear(this) import uset_t class(uset_t), intent(inout) :: this end subroutine uset_clear @@ -149,7 +149,7 @@ end function uset_element end interface !> Inteface for adding @a key to an unorderd set - abstract interface + abstract interface subroutine uset_add(this, key) import uset_t class(uset_t), intent(inout) :: this @@ -158,7 +158,7 @@ end subroutine uset_add end interface !> Inteface for removing @a key in an unorderd set - abstract interface + abstract interface subroutine uset_remove(this, key) import uset_t class(uset_t), intent(inout) :: this @@ -177,16 +177,16 @@ subroutine uset_i4_init(this, n) call this%t%init(n) else call this%t%init(64) - end if + end if end subroutine uset_i4_init - + !> Destroy an integer based unordered set subroutine uset_i4_free(this) class(uset_i4_t), intent(inout) :: this nullify(this%it%t) call this%t%free() - + end subroutine uset_i4_free !> Return the cardinality of an integer based unordered set @@ -195,7 +195,7 @@ pure function uset_i4_size(this) result(entries) integer :: entries entries = this%t%num_entries() - + end function uset_i4_size !> Clear an integer based unordered set @@ -217,9 +217,9 @@ function uset_i4_element(this, key) result(res) res = (this%t%get(key, data) .eq. 0) class default res = .false. - end select + end select end function uset_i4_element - + !> Add an integer @a key to the set subroutine uset_i4_add(this, key) class(uset_i4_t), intent(inout) :: this @@ -228,7 +228,7 @@ subroutine uset_i4_add(this, key) select type(key) type is (integer) - data = key + data = key call this%t%set(key, data) class default call neko_error("Invalid key") @@ -251,7 +251,7 @@ end subroutine uset_i4_remove !> Initialise an integer based set iterator subroutine uset_i4_iter_init(this) class(uset_i4_t), target, intent(inout) :: this - call this%it%init(this%t) + call this%it%init(this%t) end subroutine uset_i4_iter_init !> Advance an integer based set iterator @@ -265,7 +265,7 @@ end function uset_i4_iter_next function uset_i4_iter_value(this) result(value) class(uset_i4_t), target, intent(inout) :: this integer, pointer :: value - value => this%it%value() + value => this%it%value() end function uset_i4_iter_value !> Initialize an empty integer*8 based unordered set @@ -277,16 +277,16 @@ subroutine uset_i8_init(this, n) call this%t%init(n) else call this%t%init(64) - end if + end if end subroutine uset_i8_init - + !> Destroy an integer*8 based unordered set subroutine uset_i8_free(this) class(uset_i8_t), intent(inout) :: this nullify(this%it%t) call this%t%free() - + end subroutine uset_i8_free !> Return the cardinality of an integer*8 based unordered set @@ -295,7 +295,7 @@ pure function uset_i8_size(this) result(entries) integer :: entries entries = this%t%num_entries() - + end function uset_i8_size !> Clear an integer*8 based unordered set @@ -317,9 +317,9 @@ function uset_i8_element(this, key) result(res) res = (this%t%get(key, data) .eq. 0) class default res = .false. - end select + end select end function uset_i8_element - + !> Add an integer*8 @a key to the set subroutine uset_i8_add(this, key) class(uset_i8_t), intent(inout) :: this @@ -328,7 +328,7 @@ subroutine uset_i8_add(this, key) select type(key) type is (integer(i8)) - data = key + data = key call this%t%set(key, data) class default call neko_error("Invalid key") @@ -351,7 +351,7 @@ end subroutine uset_i8_remove !> Initialise an integer based set iterator*8 subroutine uset_i8_iter_init(this) class(uset_i8_t), target, intent(inout) :: this - call this%it%init(this%t) + call this%it%init(this%t) end subroutine uset_i8_iter_init !> Advance an integer*8 based set iterator @@ -373,10 +373,10 @@ function uset_i8_iter_value(this) result(value) type is (uset_i8_t) value => hp%it%value() class default - call neko_error('Invalid uset htable iter (i8)') + call neko_error('Invalid uset htable iter (i8)') end select end function uset_i8_iter_value - + !> Initialize an empty double precision based unordered set subroutine uset_r8_init(this, n) class(uset_r8_t), intent(inout) :: this @@ -388,13 +388,13 @@ subroutine uset_r8_init(this, n) call this%t%init(64) end if end subroutine uset_r8_init - + !> Destroy a double precision based unordered set subroutine uset_r8_free(this) class(uset_r8_t), intent(inout) :: this - + call this%t%free() - + end subroutine uset_r8_free !> Return the cardinality of a double precision based unordered set @@ -412,7 +412,7 @@ subroutine uset_r8_clear(this) call this%t%clear() end subroutine uset_r8_clear - + !> Check if a double precision @a key is an element of the set function uset_r8_element(this, key) result(res) class(uset_r8_t), intent(inout) :: this @@ -426,7 +426,7 @@ function uset_r8_element(this, key) result(res) class default res = .false. end select - + end function uset_r8_element !> Add a double precision @a key to the set @@ -460,7 +460,7 @@ end subroutine uset_r8_remove !> Initialise a double precision based set iterator subroutine uset_r8_iter_init(this) class(uset_r8_t), target, intent(inout) :: this - call this%it%init(this%t) + call this%it%init(this%t) end subroutine uset_r8_iter_init !> Advance a double precision based set iterator @@ -474,7 +474,7 @@ end function uset_r8_iter_next function uset_r8_iter_value(this) result(value) class(uset_r8_t), target, intent(inout) :: this real(kind=dp), pointer :: value - value => this%it%value() + value => this%it%value() end function uset_r8_iter_value diff --git a/src/bc/bc.f90 b/src/bc/bc.f90 index 6413cf1b249..a58209d1a7b 100644 --- a/src/bc/bc.f90 +++ b/src/bc/bc.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2021, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -36,16 +36,17 @@ module bc use num_types use device use dofmap, only : dofmap_t + use coefs, only : coef_t use space, only : space_t - use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS + use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS, NEKO_MSH_MAX_ZLBL_LEN use facet_zone, only : facet_zone_t use stack, only : stack_i4t2_t use tuple, only : tuple_i4_t - use utils, only : neko_error, linear_index + use utils, only : neko_error, linear_index, split_string use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR implicit none private - + !> Base type for a boundary condition type, public, abstract :: bc_t !> The linear index of each node in each boundary facet @@ -54,6 +55,8 @@ module bc integer, allocatable :: facet(:) !> Map of degrees of freedom type(dofmap_t), pointer :: dof + !> SEM coefficients + type(coef_t), pointer :: coef !> The mesh type(mesh_t), pointer :: msh !> The function space @@ -64,7 +67,7 @@ module bc type(c_ptr) :: msk_d = C_NULL_PTR !> Device pointer for facet type(c_ptr) :: facet_d = C_NULL_PTR - contains + contains !> Constructor procedure, pass(this) :: init => bc_init !> Destructor @@ -94,14 +97,16 @@ module bc type, private :: bcp_t class(bc_t), pointer :: bcp end type bcp_t - + !> A list of boundary conditions type, public :: bc_list_t type(bcp_t), allocatable :: bc(:) + !> Number of items. integer :: n + !> Capacity. integer :: size end type bc_list_t - + abstract interface !> Apply the boundary condition to a scalar field !! @param x The field for which to apply the boundary condition. @@ -139,12 +144,12 @@ subroutine bc_apply_vector(this, x, y, z, n, t, tstep) integer, intent(in), optional :: tstep end subroutine bc_apply_vector end interface - + abstract interface !> Apply the boundary condition to a scalar field on the device !! @param x_d Device pointer to the field. subroutine bc_apply_scalar_dev(this, x_d, t, tstep) - import :: c_ptr + import :: c_ptr import :: bc_t import :: rp class(bc_t), intent(inout), target :: this @@ -175,23 +180,24 @@ end subroutine bc_apply_vector_dev interface bc_list_apply module procedure bc_list_apply_scalar, bc_list_apply_vector end interface bc_list_apply - + public :: bc_list_init, bc_list_free, bc_list_add, & bc_list_apply_scalar, bc_list_apply_vector, bc_list_apply - + contains !> Constructor !! @param dof Map of degrees of freedom. - subroutine bc_init(this, dof) + subroutine bc_init(this, coef) class(bc_t), intent(inout) :: this - type(dofmap_t), target, intent(in) :: dof + type(coef_t), target, intent(in) :: coef call bc_free(this) - this%dof => dof - this%Xh => dof%Xh - this%msh => dof%msh + this%dof => coef%dof + this%coef => coef + this%Xh => this%dof%Xh + this%msh => this%dof%msh call this%marked_facet%init() @@ -202,9 +208,9 @@ subroutine bc_free(this) class(bc_t), intent(inout) :: this call this%marked_facet%free() - + nullify(this%Xh) - nullify(this%msh) + nullify(this%msh) nullify(this%dof) if (allocated(this%msk)) then @@ -224,7 +230,7 @@ subroutine bc_free(this) call device_free(this%facet_d) this%facet_d = C_NULL_PTR end if - + end subroutine bc_free !> Mark @a facet on element @a el as part of the boundary condition @@ -238,7 +244,7 @@ subroutine bc_mark_facet(this, facet, el) t%x = (/facet, el/) call this%marked_facet%push(t) - + end subroutine bc_mark_facet !> Mark all facets from a (facet, el) tuple list @@ -253,7 +259,7 @@ subroutine bc_mark_facets(this, facet_list) do i = 1, facet_list%size() call this%marked_facet%push(fp(i)) end do - + end subroutine bc_mark_facets !> Mark all facets from a zone @@ -277,31 +283,59 @@ subroutine bc_mark_zones_from_list(this, bc_zones, bc_key, bc_labels) class(bc_t), intent(inout) :: this class(facet_zone_t), intent(inout) :: bc_zones(:) character(len=*) :: bc_key - character(len=20) :: bc_labels(NEKO_MSH_MAX_ZLBLS) - integer :: i, j, k, msh_bc_type - + character(len=100), allocatable :: split_key(:) + character(len=NEKO_MSH_MAX_ZLBL_LEN) :: bc_labels(NEKO_MSH_MAX_ZLBLS) + integer :: i, j, k, l, msh_bc_type + msh_bc_type = 0 if(trim(bc_key) .eq. 'o' .or. trim(bc_key) .eq. 'on' & .or. trim(bc_key) .eq. 'o+dong' .or. trim(bc_key) .eq. 'on+dong') then msh_bc_type = 1 + else if(trim(bc_key) .eq. 'd_pres') then + msh_bc_type = 1 else if(trim(bc_key) .eq. 'w') then msh_bc_type = 2 else if(trim(bc_key) .eq. 'v') then msh_bc_type = 2 + else if(trim(bc_key) .eq. 'd_vel_u') then + msh_bc_type = 2 + else if(trim(bc_key) .eq. 'd_vel_v') then + msh_bc_type = 2 + else if(trim(bc_key) .eq. 'd_vel_w') then + msh_bc_type = 2 else if(trim(bc_key) .eq. 'sym') then msh_bc_type = 2 end if do i = 1, NEKO_MSH_MAX_ZLBLS - if (trim(bc_key) .eq. trim(bc_labels(i))) then - call bc_mark_zone(this, bc_zones(i)) - ! Loop across all faces in the mesh - do j = 1,this%msh%nelv - do k = 1, 2 * this%msh%gdim - if (this%msh%facet_type(k,j) .eq. -i) then - this%msh%facet_type(k,j) = msh_bc_type - end if + !Check if several bcs are defined for this zone + !bcs are seperated by /, but we could use something else + if (index(trim(bc_labels(i)), '/') .eq. 0) then + if (trim(bc_key) .eq. trim(bc_labels(i))) then + call bc_mark_zone(this, bc_zones(i)) + ! Loop across all faces in the mesh + do j = 1,this%msh%nelv + do k = 1, 2 * this%msh%gdim + if (this%msh%facet_type(k,j) .eq. -i) then + this%msh%facet_type(k,j) = msh_bc_type + end if + end do end do + end if + else + split_key = split_string(trim(bc_labels(i)),'/') + do l = 1, size(split_key) + if (trim(split_key(l)) .eq. trim(bc_key)) then + call bc_mark_zone(this, bc_zones(i)) + ! Loop across all faces in the mesh + do j = 1,this%msh%nelv + do k = 1, 2 * this%msh%gdim + if (this%msh%facet_type(k,j) .eq. -i) then + this%msh%facet_type(k,j) = msh_bc_type + end if + end do + end do + end if end do end if end do @@ -324,7 +358,7 @@ subroutine bc_finalize(this) lz = this%Xh%lz !>@todo add 2D case - + ! Note we assume that lx = ly = lz facet_size = lx**2 allocate(this%msk(0:facet_size * this%marked_facet%size())) @@ -332,10 +366,10 @@ subroutine bc_finalize(this) msk_c = 0 bfp => this%marked_facet%array() - + ! Loop through each (facet, element) id tuple ! Then loop over all the nodes of the face and compute their linear index - ! This index goes into This%msk, whereas the corresponding face id goes into + ! This index goes into this%msk, whereas the corresponding face id goes into ! this%facet do i = 1, this%marked_facet%size() bc_facet = bfp(i) @@ -395,14 +429,16 @@ subroutine bc_finalize(this) this%msk(0) = msk_c this%facet(0) = msk_c - + if (NEKO_BCKND_DEVICE .eq. 1) then n = facet_size * this%marked_facet%size() + 1 call device_map(this%msk, this%msk_d, n) call device_map(this%facet, this%facet_d, n) - call device_memcpy(this%msk, this%msk_d, n, HOST_TO_DEVICE) - call device_memcpy(this%facet, this%facet_d, n, HOST_TO_DEVICE) + call device_memcpy(this%msk, this%msk_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%facet, this%facet_d, n, & + HOST_TO_DEVICE, sync=.false.) end if end subroutine bc_finalize @@ -430,7 +466,7 @@ subroutine bc_list_init(bclst, size) bclst%n = 0 bclst%size = n - + end subroutine bc_list_init !> Destructor for a list of boundary conditions @@ -445,7 +481,7 @@ subroutine bc_list_free(bclst) bclst%n = 0 bclst%size = 0 - + end subroutine bc_list_free !> Add a condition to a list of boundary conditions @@ -467,7 +503,7 @@ subroutine bc_list_add(bclst, bc) bclst%n = bclst%n + 1 bclst%bc(bclst%n)%bcp => bc - + end subroutine bc_list_add !> Apply a list of boundary conditions to a scalar field @@ -587,6 +623,6 @@ subroutine bc_list_apply_vector(bclst, x, y, z, n, t, tstep) end if end subroutine bc_list_apply_vector - - + + end module bc diff --git a/src/bc/bcknd/device/cuda/dirichlet_kernel.h b/src/bc/bcknd/device/cuda/dirichlet_kernel.h index 0daf3e0fd28..d11a9198b2b 100644 --- a/src/bc/bcknd/device/cuda/dirichlet_kernel.h +++ b/src/bc/bcknd/device/cuda/dirichlet_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DIRICHLET_KERNEL__ +#define __BC_DIRICHLET_KERNEL__ + /** * Device kernel for scalar apply for a Dirichlet condition */ @@ -72,3 +75,4 @@ __global__ void dirichlet_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/cuda/dong_outflow_kernel.h b/src/bc/bcknd/device/cuda/dong_outflow_kernel.h index a293fddffe5..99dd02bd90d 100644 --- a/src/bc/bcknd/device/cuda/dong_outflow_kernel.h +++ b/src/bc/bcknd/device/cuda/dong_outflow_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DONG_OUTFLOW_KERNEL__ +#define __BC_DONG_OUTFLOW_KERNEL__ + /** * Device kernel for vector apply for a dong outflow */ @@ -62,3 +65,5 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk, x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0; } } + +#endif // __BC_DONG_OUTFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/cuda/facet_normal_kernel.h b/src/bc/bcknd/device/cuda/facet_normal_kernel.h index 03b72762bd3..1f8d8200cf4 100644 --- a/src/bc/bcknd/device/cuda/facet_normal_kernel.h +++ b/src/bc/bcknd/device/cuda/facet_normal_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_FACET_NORMAL_KERNEL__ +#define __BC_FACET_NORMAL_KERNEL__ + /** * Computes the linear index for area and normal arrays * @note Fortran indexing input, C indexing output @@ -122,3 +125,4 @@ void facet_normal_apply_surfvec_kernel(const int * __restrict__ msk, } } +#endif // __BC_FACET_NORMAL_KERNEL__ diff --git a/src/bc/bcknd/device/cuda/inflow_kernel.h b/src/bc/bcknd/device/cuda/inflow_kernel.h index 5da1babbe1c..8b707344b93 100644 --- a/src/bc/bcknd/device/cuda/inflow_kernel.h +++ b/src/bc/bcknd/device/cuda/inflow_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INFLOW_KERNEL__ +#define __BC_INFLOW_KERNEL__ + /** * Device kernel for vector apply for a Dirichlet condition */ @@ -56,3 +59,4 @@ __global__ void inflow_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_INFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h b/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h index 4d0dd05074f..afc8ec38cbb 100644 --- a/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h +++ b/src/bc/bcknd/device/cuda/inhom_dirichlet_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INHOM_DIRICHLET_KERNEL__ +#define __BC_INHOM_DIRICHLET_KERNEL__ + /** * Device kernel for vector apply for an inhomogeneous Dirichlet condition */ @@ -73,3 +76,5 @@ __global__ void inhom_dirichlet_apply_scalar_kernel(const int * __restrict__ msk x[k] = bla_x[i]; } } + +#endif // __BC_INHOM_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h b/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h index d54f92b7149..25f56e7280d 100644 --- a/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h +++ b/src/bc/bcknd/device/cuda/no_slip_wall_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_NO_SLIP_WALL_KERNEL__ +#define __BC_NO_SLIP_WALL_KERNEL__ + /** * Device kernel for scalar apply for a no-slip wall conditon */ @@ -70,3 +73,4 @@ __global__ void no_slip_wall_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_NO_SLIP_WALL_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/cuda/symmetry_kernel.h b/src/bc/bcknd/device/cuda/symmetry_kernel.h index ed9878db2b5..6ba8ad1ff56 100644 --- a/src/bc/bcknd/device/cuda/symmetry_kernel.h +++ b/src/bc/bcknd/device/cuda/symmetry_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_SYMMETRY_KERNEL__ +#define __BC_SYMMETRY_KERNEL__ + /** * Device kernel for vector apply for a symmetry condition */ @@ -66,4 +69,4 @@ __global__ void symmetry_apply_vector_kernel(const int * __restrict__ xmsk, } - +#endif // __BC_SYMMETRY_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/device_dirichlet.F90 b/src/bc/bcknd/device/device_dirichlet.F90 index 76da8ddf964..e1da42d3935 100644 --- a/src/bc/bcknd/device/device_dirichlet.F90 +++ b/src/bc/bcknd/device/device_dirichlet.F90 @@ -49,7 +49,7 @@ subroutine hip_dirichlet_apply_scalar(msk, x, g, m) & type(c_ptr), value :: msk, x end subroutine hip_dirichlet_apply_scalar end interface - + interface subroutine hip_dirichlet_apply_vector(msk, x, y, z, g, m) & bind(c, name='hip_dirichlet_apply_vector') @@ -73,7 +73,7 @@ subroutine cuda_dirichlet_apply_scalar(msk, x, g, m) & type(c_ptr), value :: msk, x end subroutine cuda_dirichlet_apply_scalar end interface - + interface subroutine cuda_dirichlet_apply_vector(msk, x, y, z, g, m) & bind(c, name='cuda_dirichlet_apply_vector') @@ -97,7 +97,7 @@ subroutine opencl_dirichlet_apply_scalar(msk, x, g, m) & type(c_ptr), value :: msk, x end subroutine opencl_dirichlet_apply_scalar end interface - + interface subroutine opencl_dirichlet_apply_vector(msk, x, y, z, g, m) & bind(c, name='opencl_dirichlet_apply_vector') @@ -112,7 +112,7 @@ end subroutine opencl_dirichlet_apply_vector #endif public :: device_dirichlet_apply_scalar, device_dirichlet_apply_vector - + contains subroutine device_dirichlet_apply_scalar(msk, x, g, m) @@ -129,7 +129,7 @@ subroutine device_dirichlet_apply_scalar(msk, x, g, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_dirichlet_apply_scalar subroutine device_dirichlet_apply_vector(msk, x, y, z, g, m) @@ -146,7 +146,7 @@ subroutine device_dirichlet_apply_vector(msk, x, y, z, g, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_dirichlet_apply_vector - + end module device_dirichlet diff --git a/src/bc/bcknd/device/device_dong_outflow.F90 b/src/bc/bcknd/device/device_dong_outflow.F90 index bf7e19c6eac..5393080f48a 100644 --- a/src/bc/bcknd/device/device_dong_outflow.F90 +++ b/src/bc/bcknd/device/device_dong_outflow.F90 @@ -46,7 +46,7 @@ subroutine hip_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,& import c_rp implicit none integer(c_int) :: m - real(kind=c_rp) :: uinf, delta + real(kind=c_rp) :: uinf, delta type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z end subroutine hip_dong_outflow_apply_scalar end interface @@ -59,7 +59,7 @@ subroutine cuda_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,& import c_rp implicit none integer(c_int) :: m - real(kind=c_rp) :: uinf, delta + real(kind=c_rp) :: uinf, delta type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z end subroutine cuda_dong_outflow_apply_scalar end interface @@ -72,7 +72,7 @@ subroutine opencl_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,& import c_rp implicit none integer(c_int) :: m - real(kind=c_rp) :: uinf, delta + real(kind=c_rp) :: uinf, delta type(c_ptr), value :: msk, x, u, v, w, normal_x, normal_y, normal_z end subroutine opencl_dong_outflow_apply_scalar end interface @@ -85,7 +85,7 @@ end subroutine opencl_dong_outflow_apply_scalar subroutine device_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,& normal_z, u, v, w, uinf, delta, m) integer(c_int) :: m - real(kind=c_rp) :: uinf, delta + real(kind=c_rp) :: uinf, delta type(c_ptr) :: msk, x, u, v, w, normal_x, normal_y, normal_z #ifdef HAVE_HIP @@ -101,7 +101,7 @@ subroutine device_dong_outflow_apply_scalar(msk, x, normal_x, normal_y,& #else call neko_error('No device backend configured') #endif - + end subroutine device_dong_outflow_apply_scalar - + end module device_dong_outflow diff --git a/src/bc/bcknd/device/device_facet_normal.F90 b/src/bc/bcknd/device/device_facet_normal.F90 index 75f0f2083c6..c50c49ffe2d 100644 --- a/src/bc/bcknd/device/device_facet_normal.F90 +++ b/src/bc/bcknd/device/device_facet_normal.F90 @@ -72,7 +72,7 @@ end subroutine opencl_facet_normal_apply_surfvec #endif public :: device_facet_normal_apply_surfvec - + contains subroutine device_facet_normal_apply_surfvec(msk, facet, x, y, z, u, v, w, & @@ -92,7 +92,7 @@ subroutine device_facet_normal_apply_surfvec(msk, facet, x, y, z, u, v, w, & #else call neko_error('No device backend configured') #endif - + end subroutine device_facet_normal_apply_surfvec - + end module device_facet_normal diff --git a/src/bc/bcknd/device/device_inflow.F90 b/src/bc/bcknd/device/device_inflow.F90 index b8bdc776a12..14721ae8fe8 100644 --- a/src/bc/bcknd/device/device_inflow.F90 +++ b/src/bc/bcknd/device/device_inflow.F90 @@ -35,7 +35,7 @@ module device_inflow use utils use, intrinsic :: iso_c_binding, only : c_ptr private - + #ifdef HAVE_HIP interface @@ -78,7 +78,7 @@ end subroutine opencl_inflow_apply_vector contains - subroutine device_inflow_apply_vector(msk, x, y, z, g, m) + subroutine device_inflow_apply_vector(msk, x, y, z, g, m) integer, intent(in) :: m type(c_ptr) :: msk, x, y, z, g @@ -91,8 +91,8 @@ subroutine device_inflow_apply_vector(msk, x, y, z, g, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_inflow_apply_vector - + end module device_inflow diff --git a/src/bc/bcknd/device/device_inhom_dirichlet.F90 b/src/bc/bcknd/device/device_inhom_dirichlet.F90 index e68f21abade..efdf81af1ed 100644 --- a/src/bc/bcknd/device/device_inhom_dirichlet.F90 +++ b/src/bc/bcknd/device/device_inhom_dirichlet.F90 @@ -105,7 +105,7 @@ subroutine opencl_inhom_dirichlet_apply_scalar(msk, x, bla_x, m) & type(c_ptr), value :: msk, x, bla_x end subroutine opencl_inhom_dirichlet_apply_scalar end interface -#endif +#endif contains @@ -122,9 +122,9 @@ subroutine device_inhom_dirichlet_apply_vector(msk, x, y, z, bla_x, bla_y, bla_z #else call neko_error('No device backend configured') #endif - + end subroutine device_inhom_dirichlet_apply_vector - + subroutine device_inhom_dirichlet_apply_scalar(msk, x, bla_x, m) integer, intent(in) :: m type(c_ptr) :: msk, x, bla_x @@ -138,7 +138,7 @@ subroutine device_inhom_dirichlet_apply_scalar(msk, x, bla_x, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_inhom_dirichlet_apply_scalar - + end module device_inhom_dirichlet diff --git a/src/bc/bcknd/device/device_symmetry.F90 b/src/bc/bcknd/device/device_symmetry.F90 index a414965ab80..d0fc058c75e 100644 --- a/src/bc/bcknd/device/device_symmetry.F90 +++ b/src/bc/bcknd/device/device_symmetry.F90 @@ -67,7 +67,7 @@ subroutine opencl_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l) & end subroutine opencl_symmetry_apply_vector end interface #endif - + contains subroutine device_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l) @@ -83,7 +83,7 @@ subroutine device_symmetry_apply_vector(xmsk, ymsk, zmsk, x, y, z, m, n, l) #else call neko_error('No device backend configured') #endif - + end subroutine device_symmetry_apply_vector - + end module device_symmetry diff --git a/src/bc/bcknd/device/device_wall.F90 b/src/bc/bcknd/device/device_wall.F90 index dbe2e283c5b..d5d7ed78a67 100644 --- a/src/bc/bcknd/device/device_wall.F90 +++ b/src/bc/bcknd/device/device_wall.F90 @@ -36,7 +36,7 @@ module device_wall private #ifdef HAVE_HIP - interface + interface subroutine hip_no_slip_wall_apply_scalar(msk, x, m) & bind(c, name='hip_no_slip_wall_apply_scalar') use, intrinsic :: iso_c_binding @@ -45,7 +45,7 @@ subroutine hip_no_slip_wall_apply_scalar(msk, x, m) & type(c_ptr), value :: msk, x end subroutine hip_no_slip_wall_apply_scalar end interface - + interface subroutine hip_no_slip_wall_apply_vector(msk, x, y, z, m) & bind(c, name='hip_no_slip_wall_apply_vector') @@ -56,7 +56,7 @@ subroutine hip_no_slip_wall_apply_vector(msk, x, y, z, m) & end subroutine hip_no_slip_wall_apply_vector end interface #elif HAVE_CUDA - interface + interface subroutine cuda_no_slip_wall_apply_scalar(msk, x, m) & bind(c, name='cuda_no_slip_wall_apply_scalar') use, intrinsic :: iso_c_binding @@ -65,7 +65,7 @@ subroutine cuda_no_slip_wall_apply_scalar(msk, x, m) & type(c_ptr), value :: msk, x end subroutine cuda_no_slip_wall_apply_scalar end interface - + interface subroutine cuda_no_slip_wall_apply_vector(msk, x, y, z, m) & bind(c, name='cuda_no_slip_wall_apply_vector') @@ -76,7 +76,7 @@ subroutine cuda_no_slip_wall_apply_vector(msk, x, y, z, m) & end subroutine cuda_no_slip_wall_apply_vector end interface #elif HAVE_OPENCL - interface + interface subroutine opencl_no_slip_wall_apply_scalar(msk, x, m) & bind(c, name='opencl_no_slip_wall_apply_scalar') use, intrinsic :: iso_c_binding @@ -85,7 +85,7 @@ subroutine opencl_no_slip_wall_apply_scalar(msk, x, m) & type(c_ptr), value :: msk, x end subroutine opencl_no_slip_wall_apply_scalar end interface - + interface subroutine opencl_no_slip_wall_apply_vector(msk, x, y, z, m) & bind(c, name='opencl_no_slip_wall_apply_vector') @@ -98,7 +98,7 @@ end subroutine opencl_no_slip_wall_apply_vector #endif public :: device_no_slip_wall_apply_scalar, device_no_slip_wall_apply_vector - + contains subroutine device_no_slip_wall_apply_scalar(msk, x, m) @@ -114,7 +114,7 @@ subroutine device_no_slip_wall_apply_scalar(msk, x, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_no_slip_wall_apply_scalar subroutine device_no_slip_wall_apply_vector(msk, x, y, z, m) @@ -130,7 +130,7 @@ subroutine device_no_slip_wall_apply_vector(msk, x, y, z, m) #else call neko_error('No device backend configured') #endif - + end subroutine device_no_slip_wall_apply_vector - + end module device_wall diff --git a/src/bc/bcknd/device/hip/dirichlet_kernel.h b/src/bc/bcknd/device/hip/dirichlet_kernel.h index 13e66153373..8172e6c782e 100644 --- a/src/bc/bcknd/device/hip/dirichlet_kernel.h +++ b/src/bc/bcknd/device/hip/dirichlet_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DIRICHLET_KERNEL__ +#define __BC_DIRICHLET_KERNEL__ + /** * Device kernel for scalar apply for a Dirichlet condition */ @@ -72,3 +75,4 @@ __global__ void dirichlet_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_HIP_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/dong_outflow_kernel.h b/src/bc/bcknd/device/hip/dong_outflow_kernel.h index b6dd44917ca..7f8c59e5a6b 100644 --- a/src/bc/bcknd/device/hip/dong_outflow_kernel.h +++ b/src/bc/bcknd/device/hip/dong_outflow_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DONG_OUTFLOW_KERNEL__ +#define __BC_DONG_OUTFLOW_KERNEL__ + /** * Device kernel for vector apply for a dong outflow condition */ @@ -48,7 +51,7 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk, const T uinf, const T delta, const int m) { - + const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int str = blockDim.x * gridDim.x; @@ -62,3 +65,5 @@ void dong_outflow_apply_scalar_kernel(const int * __restrict__ msk, x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0; } } + +#endif // __BC_DONG_OUTFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/facet_normal_kernel.h b/src/bc/bcknd/device/hip/facet_normal_kernel.h index b787bd628b9..c02ddddb02d 100644 --- a/src/bc/bcknd/device/hip/facet_normal_kernel.h +++ b/src/bc/bcknd/device/hip/facet_normal_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_FACET_NORMAL_KERNEL__ +#define __BC_FACET_NORMAL_KERNEL__ + /** * Computes the linear index for area and normal arrays * @note Fortran indexing input, C indexing output @@ -122,3 +125,4 @@ void facet_normal_apply_surfvec_kernel(const int * __restrict__ msk, } } +#endif // __BC_FACET_NORMAL_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/inflow_kernel.h b/src/bc/bcknd/device/hip/inflow_kernel.h index b4945a9388d..4c35e055471 100644 --- a/src/bc/bcknd/device/hip/inflow_kernel.h +++ b/src/bc/bcknd/device/hip/inflow_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INFLOW_KERNEL__ +#define __BC_INFLOW_KERNEL__ + /** * Device kernel for vector apply for a Dirichlet condition */ @@ -56,3 +59,4 @@ __global__ void inflow_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_INFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h b/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h index 3a84872abee..a71ef472cb4 100644 --- a/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h +++ b/src/bc/bcknd/device/hip/inhom_dirichlet_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INHOM_DIRICHLET_KERNEL__ +#define __BC_INHOM_DIRICHLET_KERNEL__ + /** * Device kernel for vector apply for an inhomogeneous Dirichlet condition */ @@ -73,3 +76,5 @@ __global__ void inhom_dirichlet_apply_scalar_kernel(const int * __restrict__ msk x[k] = bla_x[i]; } } + +#endif // __BC_INHOM_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/no_slip_wall_kernel.h b/src/bc/bcknd/device/hip/no_slip_wall_kernel.h index 4e18acef4e7..b83e811ce45 100644 --- a/src/bc/bcknd/device/hip/no_slip_wall_kernel.h +++ b/src/bc/bcknd/device/hip/no_slip_wall_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_NO_SLIP_WALL_KERNEL__ +#define __BC_NO_SLIP_WALL_KERNEL__ + #include /** @@ -72,3 +75,4 @@ __global__ void no_slip_wall_apply_vector_kernel(const int * __restrict__ msk, } } +#endif // __BC_NO_SLIP_WALL_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/hip/symmetry_kernel.h b/src/bc/bcknd/device/hip/symmetry_kernel.h index f96c33d5e3f..1aea0584621 100644 --- a/src/bc/bcknd/device/hip/symmetry_kernel.h +++ b/src/bc/bcknd/device/hip/symmetry_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_SYMETRY_KERNEL__ +#define __BC_SYMETRY_KERNEL__ + /** * Device kernel for vector apply for a symmetry condition */ @@ -65,3 +68,4 @@ __global__ void symmetry_apply_vector_kernel(const int * __restrict__ xmsk, } } +#endif // __BC_SYMETRY_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/dirichlet_kernel.cl b/src/bc/bcknd/device/opencl/dirichlet_kernel.cl index 6e4a79169b8..cea772ade20 100644 --- a/src/bc/bcknd/device/opencl/dirichlet_kernel.cl +++ b/src/bc/bcknd/device/opencl/dirichlet_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DIRICHLET_KERNEL__ +#define __BC_DIRICHLET_KERNEL__ + /** * Device kernel for scalar apply for a Dirichlet condition */ @@ -69,3 +72,5 @@ __kernel void dirichlet_apply_vector_kernel(__global const int *msk, z[k] = g; } } + +#endif // __BC_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl b/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl index ec35dd991d0..6a7f9fed77e 100644 --- a/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl +++ b/src/bc/bcknd/device/opencl/dong_outflow_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_DONG_OUTFLOW_KERNEL__ +#define __BC_DONG_OUTFLOW_KERNEL__ + /** * Device kernel for scalar apply for a dong outflow condition */ @@ -61,3 +64,5 @@ void dong_outflow_apply_scalar_kernel(__global const int * __restrict__ msk, x[k] = -0.5*(uk*uk+vk*vk+wk*wk)*S0; } } + +#endif // __BC_DONG_OUTFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/facet_normal_kernel.cl b/src/bc/bcknd/device/opencl/facet_normal_kernel.cl index cff360147e2..53f7e73de5d 100644 --- a/src/bc/bcknd/device/opencl/facet_normal_kernel.cl +++ b/src/bc/bcknd/device/opencl/facet_normal_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_FACET_NORMAL_KERNEL__ +#define __BC_FACET_NORMAL_KERNEL__ + /** * Computes the linear index for area and normal arrays * @note Fortran indexing input, C indexing output @@ -120,3 +123,4 @@ void facet_normal_apply_surfvec_kernel(__global const int *msk, } } +#endif // __BC_FACET_NORMAL_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/inflow_kernel.cl b/src/bc/bcknd/device/opencl/inflow_kernel.cl index e99b3be5e6f..1d7a6420de1 100644 --- a/src/bc/bcknd/device/opencl/inflow_kernel.cl +++ b/src/bc/bcknd/device/opencl/inflow_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INFLOW_KERNEL__ +#define __BC_INFLOW_KERNEL__ + /** * Device kernel for vector apply for a Dirichlet condition */ @@ -54,3 +57,5 @@ __kernel void inflow_apply_vector_kernel(__global const int *msk, z[k] = gz; } } + +#endif // __BC_INFLOW_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl b/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl index c6e7c1cf2c4..30ce1cb492f 100644 --- a/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl +++ b/src/bc/bcknd/device/opencl/inhom_dirichlet_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_INFLOW_DIRICHLET_KERNEL__ +#define __BC_INFLOW_DIRICHLET_KERNEL__ + /** * Device kernel for vector apply for a inhomogeneous Dirichlet condition */ @@ -71,3 +74,5 @@ __kernel void inhom_dirichlet_apply_scalar_kernel(__global const int *msk, x[k] = bla_x[i]; } } + +#endif // __BC_INFLOW_DIRICHLET_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl b/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl index 16e8e8232eb..9241165b9cf 100644 --- a/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl +++ b/src/bc/bcknd/device/opencl/no_slip_wall_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_NO_SLIP_WALL_KERNEL__ +#define __BC_NO_SLIP_WALL_KERNEL__ + /** * Device kernel for scalar apply for a no-slip wall conditon */ @@ -68,3 +71,4 @@ __kernel void no_slip_wall_apply_vector_kernel(__global const int *msk, } } +#endif // __BC_NO_SLIP_WALL_KERNEL__ \ No newline at end of file diff --git a/src/bc/bcknd/device/opencl/symmetry_kernel.cl b/src/bc/bcknd/device/opencl/symmetry_kernel.cl index 2379085e20e..678319acae6 100644 --- a/src/bc/bcknd/device/opencl/symmetry_kernel.cl +++ b/src/bc/bcknd/device/opencl/symmetry_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __BC_SYMETRY_KERNEL__ +#define __BC_SYMETRY_KERNEL__ + /** * Device kernel for vector apply for a symmetry condition */ @@ -64,3 +67,5 @@ __kernel void symmetry_apply_vector_kernel(__global const int *xmsk, } } + +#endif // ___BC_SYMETRY_KERNEL___ \ No newline at end of file diff --git a/src/bc/blasius.f90 b/src/bc/blasius.f90 index 66c55f0f4f1..7a67f0e920b 100644 --- a/src/bc/blasius.f90 +++ b/src/bc/blasius.f90 @@ -79,9 +79,9 @@ subroutine blasius_free(this) if (c_associated(this%blaz_d)) then call device_free(this%blaz_d) end if - + end subroutine blasius_free - + !> No-op scalar apply subroutine blasius_apply_scalar(this, x, n, t, tstep) class(blasius_t), intent(inout) :: this @@ -98,7 +98,7 @@ subroutine blasius_apply_scalar_dev(this, x_d, t, tstep) real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep end subroutine blasius_apply_scalar_dev - + !> Apply blasius conditions (vector valued) subroutine blasius_apply_vector(this, x, y, z, n, t, tstep) class(blasius_t), intent(inout) :: this @@ -122,10 +122,10 @@ subroutine blasius_apply_vector(this, x, y, z, n, t, tstep) case(1,2) x(k) = this%bla(zc(idx(1), idx(2), idx(3), idx(4)), & this%delta, this%x(1)) - y(k) = 0.0_rp + y(k) = 0.0_rp z(k) = 0.0_rp case(3,4) - x(k) = 0.0_rp + x(k) = 0.0_rp y(k) = this%bla(xc(idx(1), idx(2), idx(3), idx(4)), & this%delta, this%x(2)) z(k) = 0.0_rp @@ -134,7 +134,7 @@ subroutine blasius_apply_vector(this, x, y, z, n, t, tstep) y(k) = 0.0_rp z(k) = this%bla(yc(idx(1), idx(2), idx(3), idx(4)), & this%delta, this%x(3)) - end select + end select end do end associate end subroutine blasius_apply_vector @@ -172,7 +172,7 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call device_alloc(blax_d, s) call device_alloc(blay_d, s) call device_alloc(blaz_d, s) - + do i = 1, m k = this%msk(i) facet = this%facet(i) @@ -181,10 +181,10 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) case(1,2) bla_x(i) = this%bla(zc(idx(1), idx(2), idx(3), idx(4)), & this%delta, this%x(1)) - bla_y(i) = 0.0_rp + bla_y(i) = 0.0_rp bla_z(i) = 0.0_rp case(3,4) - bla_x(i) = 0.0_rp + bla_x(i) = 0.0_rp bla_y(i) = this%bla(xc(idx(1), idx(2), idx(3), idx(4)), & this%delta, this%x(2)) bla_z(i) = 0.0_rp @@ -196,8 +196,8 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) end select end do - call device_memcpy(bla_x, blax_d, m, HOST_TO_DEVICE) - call device_memcpy(bla_y, blay_d, m, HOST_TO_DEVICE) + call device_memcpy(bla_x, blax_d, m, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(bla_y, blay_d, m, HOST_TO_DEVICE, sync=.false.) call device_memcpy(bla_z, blaz_d, m, HOST_TO_DEVICE, sync=.true.) deallocate(bla_x, bla_y, bla_z) @@ -205,7 +205,7 @@ subroutine blasius_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call device_inhom_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, & blax_d, blay_d, blaz_d, m) - + end associate end subroutine blasius_apply_vector_dev @@ -216,7 +216,7 @@ subroutine blasius_set_params(this, delta, type) real(kind=rp) :: delta character(len=*) :: type this%delta = delta - + select case(trim(type)) case('linear') this%bla => blasius_linear @@ -239,5 +239,5 @@ subroutine blasius_set_coef(this, c) type(coef_t), target, intent(inout) :: c this%c => c end subroutine blasius_set_coef - + end module blasius diff --git a/src/bc/dirichlet.f90 b/src/bc/dirichlet.f90 index b1dc3462491..0c16064c278 100644 --- a/src/bc/dirichlet.f90 +++ b/src/bc/dirichlet.f90 @@ -89,7 +89,7 @@ subroutine dirichlet_apply_vector(this, x, y, z, n, t, tstep) y(k) = this%g z(k) = this%g end do - + end subroutine dirichlet_apply_vector !> Boundary condition apply for a generic Dirichlet condition @@ -102,10 +102,10 @@ subroutine dirichlet_apply_scalar_dev(this, x_d, t, tstep) call device_dirichlet_apply_scalar(this%msk_d, x_d, & this%g, size(this%msk)) - + end subroutine dirichlet_apply_scalar_dev - - !> Boundary condition apply for a generic Dirichlet condition + + !> Boundary condition apply for a generic Dirichlet condition !! to vectors @a x, @a y and @a z (device version) subroutine dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) class(dirichlet_t), intent(inout), target :: this @@ -117,7 +117,7 @@ subroutine dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call device_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, & this%g, size(this%msk)) - + end subroutine dirichlet_apply_vector_dev !> Set value of \f$ g \f$ @@ -126,7 +126,7 @@ subroutine dirichlet_set_g(this, g) real(kind=rp), intent(in) :: g this%g = g - + end subroutine dirichlet_set_g - + end module dirichlet diff --git a/src/bc/dong_outflow.f90 b/src/bc/dong_outflow.f90 index c5fb35b3251..d07d2ce9817 100644 --- a/src/bc/dong_outflow.f90 +++ b/src/bc/dong_outflow.f90 @@ -42,12 +42,13 @@ module dong_outflow use coefs use utils use device_dong_outflow + use field_registry, only : neko_field_registry use, intrinsic :: iso_c_binding, only : c_ptr, c_sizeof implicit none private !> Dong outflow condition - !! Follows + !! Follows !! "A Convective-like Energy-Stable Open Boundary Condition for !! Simulations of Incompressible Flows" !! by S. Dong @@ -55,9 +56,8 @@ module dong_outflow type(field_t), pointer :: u type(field_t), pointer :: v type(field_t), pointer :: w - type(coef_t), pointer :: c_Xh - real(kind=rp) :: delta - real(kind=rp) :: uinf + real(kind=rp) :: delta + real(kind=rp) :: uinf type(c_ptr) :: normal_x_d type(c_ptr) :: normal_y_d type(c_ptr) :: normal_z_d @@ -70,55 +70,53 @@ module dong_outflow end type dong_outflow_t contains - subroutine dong_outflow_set_vars(this, c_Xh, u, v, w, uinf, delta) - class(dong_outflow_t), intent(inout) :: this - type(coef_t), target, intent(in) :: c_Xh - type(field_t), target, intent(in) :: u, v, w - real(kind=rp), intent(in) :: uinf - real(kind=rp), optional, intent(in) :: delta - real(kind=rp), allocatable :: temp_x(:) - real(kind=rp), allocatable :: temp_y(:) - real(kind=rp), allocatable :: temp_z(:) - real(c_rp) :: dummy - integer :: i, m, k, facet, idx(4) - real(kind=rp) :: normal_xyz(3) - - - if (present(delta)) then - this%delta = delta - else - this%delta = 0.01_rp - end if - this%uinf = uinf - this%u => u - this%v => v - this%c_Xh=> c_Xh - this%w => w - if ((NEKO_BCKND_DEVICE .eq. 1) .and. (this%msk(0) .gt. 0)) then - call device_alloc(this%normal_x_d,c_sizeof(dummy)*this%msk(0)) - call device_alloc(this%normal_y_d,c_sizeof(dummy)*this%msk(0)) - call device_alloc(this%normal_z_d,c_sizeof(dummy)*this%msk(0)) - m = this%msk(0) - allocate(temp_x(m)) - allocate(temp_y(m)) - allocate(temp_z(m)) - do i = 1, m - k = this%msk(i) - facet = this%facet(i) - idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx) - normal_xyz = & - this%c_Xh%get_normal(idx(1), idx(2), idx(3), idx(4),facet) + subroutine dong_outflow_set_vars(this, uinf, delta) + class(dong_outflow_t), intent(inout) :: this + real(kind=rp), intent(in) :: uinf + real(kind=rp), optional, intent(in) :: delta + real(kind=rp), allocatable :: temp_x(:) + real(kind=rp), allocatable :: temp_y(:) + real(kind=rp), allocatable :: temp_z(:) + real(c_rp) :: dummy + integer :: i, m, k, facet, idx(4) + real(kind=rp) :: normal_xyz(3) + + + if (present(delta)) then + this%delta = delta + else + this%delta = 0.01_rp + end if + this%uinf = uinf + this%u => neko_field_registry%get_field("u") + this%v => neko_field_registry%get_field("v") + this%w => neko_field_registry%get_field("w") + if ((NEKO_BCKND_DEVICE .eq. 1) .and. (this%msk(0) .gt. 0)) then + call device_alloc(this%normal_x_d,c_sizeof(dummy)*this%msk(0)) + call device_alloc(this%normal_y_d,c_sizeof(dummy)*this%msk(0)) + call device_alloc(this%normal_z_d,c_sizeof(dummy)*this%msk(0)) + m = this%msk(0) + allocate(temp_x(m)) + allocate(temp_y(m)) + allocate(temp_z(m)) + do i = 1, m + k = this%msk(i) + facet = this%facet(i) + idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx) + normal_xyz = & + this%coef%get_normal(idx(1), idx(2), idx(3), idx(4),facet) temp_x(i) = normal_xyz(1) temp_y(i) = normal_xyz(2) temp_z(i) = normal_xyz(3) end do - call device_memcpy(temp_x, this%normal_x_d, m, HOST_TO_DEVICE) - call device_memcpy(temp_y, this%normal_y_d, m, HOST_TO_DEVICE) + call device_memcpy(temp_x, this%normal_x_d, m, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(temp_y, this%normal_y_d, m, & + HOST_TO_DEVICE, sync=.false.) call device_memcpy(temp_z, this%normal_z_d, m, & HOST_TO_DEVICE, sync=.true.) deallocate( temp_x, temp_y, temp_z) end if - end subroutine dong_outflow_set_vars !> Boundary condition apply for a generic Dirichlet condition @@ -140,13 +138,13 @@ subroutine dong_outflow_apply_scalar(this, x, n, t, tstep) uy = this%v%x(k,1,1,1) uz = this%w%x(k,1,1,1) idx = nonlinear_index(k,this%Xh%lx, this%Xh%lx,this%Xh%lx) - normal_xyz = this%c_Xh%get_normal(idx(1), idx(2), idx(3), idx(4),facet) - vn = ux*normal_xyz(1) + uy*normal_xyz(2) + uz*normal_xyz(3) + normal_xyz = this%coef%get_normal(idx(1), idx(2), idx(3), idx(4),facet) + vn = ux*normal_xyz(1) + uy*normal_xyz(2) + uz*normal_xyz(3) S0 = 0.5_rp*(1.0_rp - tanh(vn / (this%uinf * this%delta))) - + x(k)=-0.5*(ux*ux+uy*uy+uz*uz)*S0 end do -end subroutine dong_outflow_apply_scalar + end subroutine dong_outflow_apply_scalar !> Boundary condition apply for a generic Dirichlet condition !! to vectors @a x, @a y and @a z @@ -158,7 +156,7 @@ subroutine dong_outflow_apply_vector(this, x, y, z, n, t, tstep) real(kind=rp), intent(inout), dimension(n) :: z real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep - + end subroutine dong_outflow_apply_vector !> Boundary condition apply for a generic Dirichlet condition @@ -174,10 +172,10 @@ subroutine dong_outflow_apply_scalar_dev(this, x_d, t, tstep) this%u%x_d, this%v%x_d, this%w%x_d,& this%uinf, this%delta,& this%msk(0)) - + end subroutine dong_outflow_apply_scalar_dev - - !> Boundary condition apply for a generic Dirichlet condition + + !> Boundary condition apply for a generic Dirichlet condition !! to vectors @a x, @a y and @a z (device version) subroutine dong_outflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) class(dong_outflow_t), intent(inout), target :: this @@ -189,7 +187,7 @@ subroutine dong_outflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) !call device_dong_outflow_apply_vector(this%msk_d, x_d, y_d, z_d, & ! this%g, size(this%msk)) - + end subroutine dong_outflow_apply_vector_dev end module dong_outflow diff --git a/src/bc/facet_normal.f90 b/src/bc/facet_normal.f90 index 15856db900e..47fc23f7938 100644 --- a/src/bc/facet_normal.f90 +++ b/src/bc/facet_normal.f90 @@ -35,8 +35,8 @@ module facet_normal use device_facet_normal use num_types use math - use coefs, only : coef_t - use dirichlet, only : dirichlet_t + use coefs, only : coef_t + use dirichlet, only : dirichlet_t use utils use, intrinsic :: iso_c_binding, only : c_ptr implicit none @@ -50,7 +50,6 @@ module facet_normal procedure, pass(this) :: apply_vector => facet_normal_apply_vector procedure, pass(this) :: apply_surfvec => facet_normal_apply_surfvec procedure, pass(this) :: apply_surfvec_dev => facet_normal_apply_surfvec_dev - procedure, pass(this) :: set_coef => facet_normal_set_coef end type facet_normal_t contains @@ -88,18 +87,15 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep) real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep integer :: i, m, k, idx(4), facet - - if (.not. associated(this%c)) then - call neko_error('No coefficients assigned') - end if - associate(c => this%c) + + associate(c => this%coef) m = this%msk(0) do i = 1, m k = this%msk(i) facet = this%facet(i) idx = nonlinear_index(k, c%Xh%lx, c%Xh%lx, c%Xh%lx) select case(facet) - case(1,2) + case(1,2) x(k) = u(k) * c%nx(idx(2), idx(3), facet, idx(4)) & * c%area(idx(2), idx(3), facet, idx(4)) y(k) = v(k) * c%ny(idx(2), idx(3), facet, idx(4)) & @@ -112,7 +108,7 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep) y(k) = v(k) * c%ny(idx(1), idx(3), facet, idx(4)) & * c%area(idx(1), idx(3), facet, idx(4)) z(k) = w(k) * c%nz(idx(1), idx(3), facet, idx(4)) & - * c%area(idx(1), idx(3), facet, idx(4)) + * c%area(idx(1), idx(3), facet, idx(4)) case(5,6) x(k) = u(k) * c%nx(idx(1), idx(2), facet, idx(4)) & * c%area(idx(1), idx(2), facet, idx(4)) @@ -123,15 +119,8 @@ subroutine facet_normal_apply_surfvec(this, x, y, z, u, v, w, n, t, tstep) end select end do end associate - + end subroutine facet_normal_apply_surfvec - - !> Assign coefficients (facet normals etc) - subroutine facet_normal_set_coef(this, c) - class(facet_normal_t), intent(inout) :: this - type(coef_t), target, intent(inout) :: c - this%c => c - end subroutine facet_normal_set_coef !> Apply in facet normal direction (vector valued, device version) subroutine facet_normal_apply_surfvec_dev(this, x_d, y_d, z_d, & @@ -141,16 +130,13 @@ subroutine facet_normal_apply_surfvec_dev(this, x_d, y_d, z_d, & real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep - if (.not. associated(this%c)) then - call neko_error('No coefficients assigned') - end if - associate(c => this%c) + associate(c => this%coef) call device_facet_normal_apply_surfvec(this%msk_d, this%facet_d, & x_d, y_d, z_d, u_d, v_d, w_d, & c%nx_d, c%ny_d, c%nz_d, c%area_d, & c%Xh%lx, size(this%msk)) end associate - + end subroutine facet_normal_apply_surfvec_dev - + end module facet_normal diff --git a/src/bc/field_dirichlet.f90 b/src/bc/field_dirichlet.f90 new file mode 100644 index 00000000000..84b80c8d846 --- /dev/null +++ b/src/bc/field_dirichlet.f90 @@ -0,0 +1,189 @@ +! Copyright (c) 2020-2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Defines inflow dirichlet conditions +module field_dirichlet + use num_types, only: rp + use coefs, only: coef_t + use dirichlet, only: dirichlet_t + use bc, only: bc_list_t, bc_t + use device, only: c_ptr, c_size_t + use utils, only: split_string + use field, only : field_t + use field_list, only : field_list_t + use math, only: masked_copy + use device_math, only: device_masked_copy + use dofmap, only : dofmap_t + use utils, only: neko_error + implicit none + private + + !> User defined dirichlet condition, for which the user can work + !! with an entire field. + !! Would be neat to add another class that contains all three + !! dirichlet bcs for the velocity, this bc would then implement + !! apply_vector. + type, public, extends(bc_t) :: field_dirichlet_t + type(field_t) :: field_bc + contains + !> Constructor. + procedure, pass(this) :: init_field => field_dirichlet_init + !> Apply scalar by performing a masked copy. + procedure, pass(this) :: apply_scalar => field_dirichlet_apply_scalar + !> (No-op) Apply vector. + procedure, pass(this) :: apply_vector => field_dirichlet_apply_vector + !> (No-op) Apply vector (device). + procedure, pass(this) :: apply_vector_dev => field_dirichlet_apply_vector_dev + !> Apply scalar (device). + procedure, pass(this) :: apply_scalar_dev => field_dirichlet_apply_scalar_dev + end type field_dirichlet_t + + !> Abstract interface defining a dirichlet condition on a list of fields. + !! @param field_bc_list List of fields that are used to extract values for field_dirichlet. + !! @param dirichlet_bc_list List of BCs containing field_dirichlet_t BCs only. + !! @param coef Coef object. + !! @param t Current time. + !! @param tstep Current time step. + !! @param which_solver Indicates wether the fields provided come from "fluid" or "scalar". + abstract interface + subroutine field_dirichlet_update(dirichlet_field_list, dirichlet_bc_list, coef, t, tstep, which_solver) + import rp + import field_list_t + import bc_list_t + import coef_t + type(field_list_t), intent(inout) :: dirichlet_field_list + type(bc_list_t), intent(inout) :: dirichlet_bc_list + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + character(len=*), intent(in) :: which_solver + end subroutine field_dirichlet_update + end interface + + public :: field_dirichlet_update + +contains + + !> Initializes this%field_bc. + !! @param bc_name Name of this%field_bc + subroutine field_dirichlet_init(this, bc_name) + class(field_dirichlet_t), intent(inout) :: this + character(len=*), intent(in) :: bc_name + + call this%field_bc%init(this%dof, bc_name) + + end subroutine field_dirichlet_init + + !> Destructor. Currently this%field_bc is being freed in `fluid_scheme::free` + subroutine field_dirichlet_free(this) + type(field_dirichlet_t), intent(inout) :: this + + call this%field_bc%free() + + end subroutine field_dirichlet_free + + !> Apply scalar by performing a masked copy. + !! @param x Field onto which to copy the values (e.g. u,v,w,p or s). + !! @param n Size of the array `x`. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_apply_scalar(this, x, n, t, tstep) + class(field_dirichlet_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + if (this%msk(0) .gt. 0) then + call masked_copy(x, this%field_bc%x, this%msk, n, this%msk(0)) + end if + + end subroutine field_dirichlet_apply_scalar + + !> Apply scalar (device). + !! @param x_d Device pointer to the field onto which to copy the values. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_apply_scalar_dev(this, x_d, t, tstep) + class(field_dirichlet_t), intent(inout), target :: this + type(c_ptr) :: x_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + if (this%msk(0) .gt. 0) then + call device_masked_copy(x_d, this%field_bc%x_d, this%msk_d, & + this%field_bc%dof%size(), this%msk(0)) + end if + + end subroutine field_dirichlet_apply_scalar_dev + + !> (No-op) Apply vector. + !! @param x x-component of the field onto which to apply the values. + !! @param y y-component of the field onto which to apply the values. + !! @param z z-component of the field onto which to apply the values. + !! @param n Size of the `x`, `y` and `z` arrays. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_apply_vector(this, x, y, z, n, t, tstep) + class(field_dirichlet_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(inout), dimension(n) :: y + real(kind=rp), intent(inout), dimension(n) :: z + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("field_dirichlet cannot apply vector BCs.& +&Use field_dirichlet_vector instead!") + + end subroutine field_dirichlet_apply_vector + + !> (No-op) Apply vector (device). + !! @param x x-component of the field onto which to apply the values. + !! @param y y-component of the field onto which to apply the values. + !! @param z z-component of the field onto which to apply the values. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) + class(field_dirichlet_t), intent(inout), target :: this + type(c_ptr) :: x_d + type(c_ptr) :: y_d + type(c_ptr) :: z_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("field_dirichlet cannot apply vector BCs.& +&Use field_dirichlet_vector instead!") + + end subroutine field_dirichlet_apply_vector_dev + +end module field_dirichlet diff --git a/src/bc/field_dirichlet_vector.f90 b/src/bc/field_dirichlet_vector.f90 new file mode 100644 index 00000000000..6c8ebd79f95 --- /dev/null +++ b/src/bc/field_dirichlet_vector.f90 @@ -0,0 +1,188 @@ +! Copyright (c) 2020-2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Defines inflow dirichlet conditions +module field_dirichlet_vector + use num_types, only: rp + use coefs, only: coef_t + use dirichlet, only: dirichlet_t + use bc, only: bc_list_t, bc_t + use device, only: c_ptr, c_size_t + use utils, only: split_string + use field, only : field_t + use field_list, only : field_list_t + use math, only: masked_copy + use device_math, only: device_masked_copy + use dofmap, only : dofmap_t + use field_dirichlet, only: field_dirichlet_t + use utils, only: neko_error + implicit none + private + + !> Extension of the user defined dirichlet condition `field_dirichlet` + ! for the application on a vector field. + type, public, extends(bc_t) :: field_dirichlet_vector_t + type(field_dirichlet_t) :: field_dirichlet_u + type(field_dirichlet_t) :: field_dirichlet_v + type(field_dirichlet_t) :: field_dirichlet_w + contains + !> Initializes this%field_bc. + procedure, pass(this) :: init_field => field_dirichlet_vector_init + !> Apply scalar by performing a masked copy. + procedure, pass(this) :: apply_scalar => field_dirichlet_vector_apply_scalar + !> (No-op) Apply vector. + procedure, pass(this) :: apply_vector => field_dirichlet_vector_apply_vector + !> (No-op) Apply vector (device). + procedure, pass(this) :: apply_vector_dev => & + field_dirichlet_vector_apply_vector_dev + !> Apply scalar (device). + procedure, pass(this) :: apply_scalar_dev => & + field_dirichlet_vector_apply_scalar_dev + end type field_dirichlet_vector_t + +contains + + !> Initializes this%field_bc. + subroutine field_dirichlet_vector_init(this, bc_name) + class(field_dirichlet_vector_t), intent(inout) :: this + character(len=*), intent(in) :: bc_name + + call neko_error("Fields must be initialized individually!") + + end subroutine field_dirichlet_vector_init + + !> Destructor. Currently unused as is, all field_dirichlet attributes + !! are freed in `fluid_scheme::free`. + subroutine field_dirichlet_vector_free(this) + type(field_dirichlet_vector_t), intent(inout) :: this + + call this%field_dirichlet_u%free() + call this%field_dirichlet_v%free() + call this%field_dirichlet_w%free() + + end subroutine field_dirichlet_vector_free + + !> Apply scalar by performing a masked copy. + !! @param x Field onto which to copy the values (e.g. u,v,w,p or s). + !! @param n Size of the array `x`. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_vector_apply_scalar(this, x, n, t, tstep) + class(field_dirichlet_vector_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("field_dirichlet_vector cannot apply scalar BCs.& +&Use field_dirichlet instead!") + + end subroutine field_dirichlet_vector_apply_scalar + + !> Apply scalar (device). + !! @param x_d Device pointer to the field onto which to copy the values. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_vector_apply_scalar_dev(this, x_d, t, tstep) + class(field_dirichlet_vector_t), intent(inout), target :: this + type(c_ptr) :: x_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("field_dirichlet_vector cannot apply scalar BCs.& +&Use field_dirichlet instead!") + + end subroutine field_dirichlet_vector_apply_scalar_dev + + !> (No-op) Apply vector. + !! @param x x-component of the field onto which to apply the values. + !! @param y y-component of the field onto which to apply the values. + !! @param z z-component of the field onto which to apply the values. + !! @param n Size of the `x`, `y` and `z` arrays. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_vector_apply_vector(this, x, y, z, n, t, tstep) + class(field_dirichlet_vector_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(inout), dimension(n) :: y + real(kind=rp), intent(inout), dimension(n) :: z + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + if (present(t) .and. present(tstep)) then + call this%field_dirichlet_u%apply_scalar(x, n, t, tstep) + call this%field_dirichlet_v%apply_scalar(y, n, t, tstep) + call this%field_dirichlet_w%apply_scalar(z, n, t, tstep) + else if (present(t)) then + call this%field_dirichlet_u%apply_scalar(x, n, t=t) + call this%field_dirichlet_v%apply_scalar(y, n, t=t) + call this%field_dirichlet_w%apply_scalar(z, n, t=t) + else if (present(tstep)) then + call this%field_dirichlet_u%apply_scalar(x, n, tstep=tstep) + call this%field_dirichlet_v%apply_scalar(y, n, tstep=tstep) + call this%field_dirichlet_w%apply_scalar(z, n, tstep=tstep) + end if + + end subroutine field_dirichlet_vector_apply_vector + + !> (No-op) Apply vector (device). + !! @param x x-component of the field onto which to apply the values. + !! @param y y-component of the field onto which to apply the values. + !! @param z z-component of the field onto which to apply the values. + !! @param t Time. + !! @param tstep Time step. + subroutine field_dirichlet_vector_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) + class(field_dirichlet_vector_t), intent(inout), target :: this + type(c_ptr) :: x_d + type(c_ptr) :: y_d + type(c_ptr) :: z_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + if (present(t) .and. present(tstep)) then + call this%field_dirichlet_u%apply_scalar_dev(x_d, t, tstep) + call this%field_dirichlet_v%apply_scalar_dev(y_d, t, tstep) + call this%field_dirichlet_w%apply_scalar_dev(z_d, t, tstep) + else if (present(t)) then + call this%field_dirichlet_u%apply_scalar_dev(x_d, t=t) + call this%field_dirichlet_v%apply_scalar_dev(y_d, t=t) + call this%field_dirichlet_w%apply_scalar_dev(z_d, t=t) + else if (present(tstep)) then + call this%field_dirichlet_u%apply_scalar_dev(x_d, tstep=tstep) + call this%field_dirichlet_v%apply_scalar_dev(y_d, tstep=tstep) + call this%field_dirichlet_w%apply_scalar_dev(z_d, tstep=tstep) + end if + + end subroutine field_dirichlet_vector_apply_vector_dev + +end module field_dirichlet_vector diff --git a/src/bc/inflow.f90 b/src/bc/inflow.f90 index 15e65b75371..8b991db8492 100644 --- a/src/bc/inflow.f90 +++ b/src/bc/inflow.f90 @@ -38,7 +38,7 @@ module inflow use, intrinsic :: iso_c_binding, only : c_ptr, c_loc implicit none private - + !> Dirichlet condition for inlet (vector valued) type, public, extends(dirichlet_t) :: inflow_t real(kind=rp), dimension(3) :: x = (/0d0, 0d0, 0d0 /) @@ -49,7 +49,7 @@ module inflow procedure, pass(this) :: apply_vector_dev => inflow_apply_vector_dev procedure, pass(this) :: set_inflow => inflow_set_vector end type inflow_t - + contains !> No-op scalar apply @@ -68,7 +68,7 @@ subroutine inflow_apply_scalar_dev(this, x_d, t, tstep) real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep end subroutine inflow_apply_scalar_dev - + !> Apply inflow conditions (vector valued) subroutine inflow_apply_vector(this, x, y, z, n, t, tstep) class(inflow_t), intent(inout) :: this @@ -97,10 +97,10 @@ subroutine inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) type(c_ptr) :: z_d real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep - + call device_inflow_apply_vector(this%msk_d, x_d, y_d, z_d, & c_loc(this%x), this%msk(0)) - + end subroutine inflow_apply_vector_dev !> Set inflow vector @@ -109,6 +109,6 @@ subroutine inflow_set_vector(this, x) real(kind=rp), dimension(3), intent(inout) :: x this%x = x end subroutine inflow_set_vector - - + + end module inflow diff --git a/src/bc/neumann.f90 b/src/bc/neumann.f90 new file mode 100644 index 00000000000..1e4ab0aee63 --- /dev/null +++ b/src/bc/neumann.f90 @@ -0,0 +1,149 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Defines a Neumann boundary condition. +module neumann + use num_types + use bc, only : bc_t + use, intrinsic :: iso_c_binding, only : c_ptr + use utils, only : neko_error, nonlinear_index + use coefs, only : coef_t + implicit none + private + + !> Generic Neumann boundary condition. + !! This sets the flux of the field to the chosen value. + !! @note The condition is imposed weekly by adding an appropriate source term + !! to the right-hand-side. + type, public, extends(bc_t) :: neumann_t + real(kind=rp), private :: flux_ + contains + procedure, pass(this) :: apply_scalar => neumann_apply_scalar + procedure, pass(this) :: apply_vector => neumann_apply_vector + procedure, pass(this) :: apply_scalar_dev => neumann_apply_scalar_dev + procedure, pass(this) :: apply_vector_dev => neumann_apply_vector_dev + procedure, pass(this) :: init_neumann => neumann_init_neumann + procedure, pass(this) :: flux => neumann_flux + end type neumann_t + +contains + + !> Boundary condition apply for a generic Neumann condition + !! to a vector @a x + subroutine neumann_apply_scalar(this, x, n, t, tstep) + class(neumann_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + integer :: i, m, k, facet + ! Store non-linear index + integer :: idx(4) + + m = this%msk(0) + do i = 1, m + k = this%msk(i) + facet = this%facet(i) + idx = nonlinear_index(k, this%coef%Xh%lx, this%coef%Xh%lx,& + this%coef%Xh%lx) + select case(facet) + case(1,2) + x(k) = x(k) + this%flux_*this%coef%area(idx(2), idx(3), facet, idx(4)) + case(3,4) + x(k) = x(k) + this%flux_*this%coef%area(idx(1), idx(3), facet, idx(4)) + case(5,6) + x(k) = x(k) + this%flux_*this%coef%area(idx(1), idx(2), facet, idx(4)) + end select + end do + end subroutine neumann_apply_scalar + + !> Boundary condition apply for a generic Neumann condition + !! to vectors @a x, @a y and @a z + subroutine neumann_apply_vector(this, x, y, z, n, t, tstep) + class(neumann_t), intent(inout) :: this + integer, intent(in) :: n + real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(inout), dimension(n) :: y + real(kind=rp), intent(inout), dimension(n) :: z + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + integer :: i, m, k + + call neko_error("Neumann bc not implemented for vectors") + + end subroutine neumann_apply_vector + + !> Boundary condition apply for a generic Neumann condition + !! to a vector @a x (device version) + subroutine neumann_apply_scalar_dev(this, x_d, t, tstep) + class(neumann_t), intent(inout), target :: this + type(c_ptr) :: x_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("Neumann bc not implemented on the device") + + end subroutine neumann_apply_scalar_dev + + !> Boundary condition apply for a generic Neumann condition + !! to vectors @a x, @a y and @a z (device version) + subroutine neumann_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) + class(neumann_t), intent(inout), target :: this + type(c_ptr) :: x_d + type(c_ptr) :: y_d + type(c_ptr) :: z_d + real(kind=rp), intent(in), optional :: t + integer, intent(in), optional :: tstep + + call neko_error("Neumann bc not implemented on the device") + + end subroutine neumann_apply_vector_dev + + !> Constructor + !> @param flux The desired flux. + !> @param coef The SEM coefficients. + subroutine neumann_init_neumann(this, flux) + class(neumann_t), intent(inout) :: this + real(kind=rp), intent(in) :: flux + + this%flux_ = flux + end subroutine neumann_init_neumann + + !> Get the set flux. + pure function neumann_flux(this) result(flux) + class(neumann_t), intent(in) :: this + real(kind=rp) :: flux + + flux = this%flux_ + end function neumann_flux + +end module neumann diff --git a/src/bc/non_normal.f90 b/src/bc/non_normal.f90 index 74b0b84337f..17a3a1ddfe7 100644 --- a/src/bc/non_normal.f90 +++ b/src/bc/non_normal.f90 @@ -55,23 +55,23 @@ module non_normal contains !> Initialize symmetry mask for each axis - subroutine non_normal_init_msk(this, c) + subroutine non_normal_init_msk(this) class(non_normal_t), intent(inout) :: this - type(coef_t), intent(in) :: c - integer :: i, j, k, l + integer :: i, j, k, l type(tuple_i4_t), pointer :: bfp(:) real(kind=rp) :: sx,sy,sz real(kind=rp), parameter :: TOL = 1d-3 type(tuple_i4_t) :: bc_facet integer :: facet, el - + call non_normal_free(this) - call this%bc_x%init(c%dof) - call this%bc_y%init(c%dof) - call this%bc_z%init(c%dof) - - associate(nx => c%nx, ny => c%ny, nz => c%nz) + call this%bc_x%init(this%coef) + call this%bc_y%init(this%coef) + call this%bc_z%init(this%coef) + + associate(c=>this%coef, nx => this%coef%nx, ny => this%coef%ny, & + nz => this%coef%nz) bfp => this%marked_facet%array() do i = 1, this%marked_facet%size() bc_facet = bfp(i) @@ -80,7 +80,7 @@ subroutine non_normal_init_msk(this, c) sx = 0d0 sy = 0d0 sz = 0d0 - select case (facet) + select case (facet) case(1,2) do l = 2, c%Xh%lx - 1 do j = 2, c%Xh%lx -1 @@ -104,7 +104,7 @@ subroutine non_normal_init_msk(this, c) sy = sy + abs(abs(ny(l, j, facet, el)) - 1d0) sz = sz + abs(abs(nz(l, j, facet, el)) - 1d0) end do - end do + end do end select sx = sx / (c%Xh%lx - 2)**2 sy = sy / (c%Xh%lx - 2)**2 @@ -134,7 +134,7 @@ subroutine non_normal_init_msk(this, c) call this%bc_z%set_g(0.0_rp) end subroutine non_normal_init_msk - + subroutine non_normal_free(this) type(non_normal_t), intent(inout) :: this @@ -143,4 +143,4 @@ subroutine non_normal_free(this) call this%bc_z%free() end subroutine non_normal_free - end module non_normal +end module non_normal diff --git a/src/bc/symmetry.f90 b/src/bc/symmetry.f90 index 50d93d1d0d5..e2c259c02ef 100644 --- a/src/bc/symmetry.f90 +++ b/src/bc/symmetry.f90 @@ -37,11 +37,11 @@ module symmetry use num_types use dirichlet use bc - use coefs use math use utils use stack use tuple + use coefs, only : coef_t use, intrinsic :: iso_c_binding, only : c_ptr implicit none private @@ -62,23 +62,23 @@ module symmetry contains !> Initialize symmetry mask for each axis - subroutine symmetry_init_msk(this, c) + subroutine symmetry_init_msk(this) class(symmetry_t), intent(inout) :: this - type(coef_t), intent(in) :: c integer :: i, m, j, l type(tuple_i4_t), pointer :: bfp(:) real(kind=rp) :: sx,sy,sz real(kind=rp), parameter :: TOL = 1d-3 type(tuple_i4_t) :: bc_facet integer :: facet, el - + call symmetry_free(this) - call this%bc_x%init(c%dof) - call this%bc_y%init(c%dof) - call this%bc_z%init(c%dof) - - associate(nx => c%nx, ny => c%ny, nz => c%nz) + call this%bc_x%init(this%coef) + call this%bc_y%init(this%coef) + call this%bc_z%init(this%coef) + + associate(c=>this%coef, nx => this%coef%nx, ny => this%coef%ny, & + nz => this%coef%nz) bfp => this%marked_facet%array() do i = 1, this%marked_facet%size() bc_facet = bfp(i) @@ -87,7 +87,7 @@ subroutine symmetry_init_msk(this, c) sx = 0d0 sy = 0d0 sz = 0d0 - select case (facet) + select case (facet) case(1,2) do l = 2, c%Xh%lx - 1 do j = 2, c%Xh%lx -1 @@ -111,7 +111,7 @@ subroutine symmetry_init_msk(this, c) sy = sy + abs(abs(ny(l, j, facet, el)) - 1d0) sz = sz + abs(abs(nz(l, j, facet, el)) - 1d0) end do - end do + end do end select sx = sx / (c%Xh%lx - 2)**2 sy = sy / (c%Xh%lx - 2)**2 @@ -138,16 +138,16 @@ subroutine symmetry_init_msk(this, c) call this%bc_z%set_g(0.0_rp) end subroutine symmetry_init_msk - + subroutine symmetry_free(this) type(symmetry_t), intent(inout) :: this - + call this%bc_x%free() call this%bc_y%free() call this%bc_z%free() end subroutine symmetry_free - + !> No-op scalar apply subroutine symmetry_apply_scalar(this, x, n, t, tstep) class(symmetry_t), intent(inout) :: this @@ -171,7 +171,7 @@ subroutine symmetry_apply_vector(this, x, y, z, n, t, tstep) call this%bc_x%apply_scalar(x,n) call this%bc_y%apply_scalar(y,n) call this%bc_z%apply_scalar(z,n) - + end subroutine symmetry_apply_vector !> No-op scalar apply (device version) @@ -198,5 +198,5 @@ subroutine symmetry_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) this%bc_z%msk(0)) end subroutine symmetry_apply_vector_dev - + end module symmetry diff --git a/src/bc/usr_inflow.f90 b/src/bc/usr_inflow.f90 index 51c12a8fd7e..cf2b4be6edb 100644 --- a/src/bc/usr_inflow.f90 +++ b/src/bc/usr_inflow.f90 @@ -40,7 +40,7 @@ module usr_inflow use utils implicit none private - + !> User defined dirichlet condition for inlet (vector valued) type, public, extends(inflow_t) :: usr_inflow_t type(coef_t), pointer :: c => null() @@ -59,7 +59,7 @@ module usr_inflow end type usr_inflow_t abstract interface - + !> Abstract interface defining a user defined inflow condition (pointwise) !! @param u The x componenet of the velocity in this point !! @param v The y componenet of the velocity in this point @@ -100,14 +100,14 @@ end subroutine usr_inflow_eval public :: usr_inflow_eval contains - + subroutine usr_inflow_free(this) type(usr_inflow_t), intent(inout) :: this if (c_associated(this%usr_x_d)) then call device_free(this%usr_x_d) end if - + if (c_associated(this%usr_y_d)) then call device_free(this%usr_y_d) end if @@ -115,10 +115,10 @@ subroutine usr_inflow_free(this) if (c_associated(this%usr_z_d)) then call device_free(this%usr_z_d) end if - + end subroutine usr_inflow_free - - !> No-op scalar apply + + !> No-op scalar apply subroutine usr_inflow_apply_scalar(this, x, n, t, tstep) class(usr_inflow_t), intent(inout) :: this integer, intent(in) :: n @@ -126,7 +126,7 @@ subroutine usr_inflow_apply_scalar(this, x, n, t, tstep) real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep end subroutine usr_inflow_apply_scalar - + !> No-op scalar apply (device version) subroutine usr_inflow_apply_scalar_dev(this, x_d, t, tstep) class(usr_inflow_t), intent(inout), target :: this @@ -168,7 +168,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep) facet = this%facet(i) idx = nonlinear_index(k, lx, lx, lx) select case(facet) - case(1,2) + case(1,2) call this%eval(x(k), y(k), z(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & @@ -182,7 +182,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep) call this%eval(x(k), y(k), z(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(3), facet, idx(4)), & ny(idx(1), idx(3), facet, idx(4)), & nz(idx(1), idx(3), facet, idx(4)), & @@ -192,7 +192,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep) call this%eval(x(k), y(k), z(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(2), facet, idx(4)), & ny(idx(1), idx(2), facet, idx(4)), & nz(idx(1), idx(2), facet, idx(4)), & @@ -201,7 +201,7 @@ subroutine usr_inflow_apply_vector(this, x, y, z, n, t, tstep) end select end do end associate - + end subroutine usr_inflow_apply_vector subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) @@ -241,7 +241,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) ! Pretabulate values during first call to apply if (.not. c_associated(usr_x_d)) then allocate(x(m), y(m), z(m)) ! Temp arrays - + s = m*rp call device_alloc(usr_x_d, s) @@ -256,7 +256,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) facet = this%facet(i) idx = nonlinear_index(k, lx, lx, lx) select case(facet) - case(1,2) + case(1,2) call this%eval(x(i), y(i), z(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & @@ -270,7 +270,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call this%eval(x(i), y(i), z(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(3), facet, idx(4)), & ny(idx(1), idx(3), facet, idx(4)), & nz(idx(1), idx(3), facet, idx(4)), & @@ -280,7 +280,7 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call this%eval(x(i), y(i), z(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(2), facet, idx(4)), & ny(idx(1), idx(2), facet, idx(4)), & nz(idx(1), idx(2), facet, idx(4)), & @@ -289,10 +289,9 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) end select end do end associate - - call device_memcpy(x, usr_x_d, m, HOST_TO_DEVICE) - call device_memcpy(y, usr_y_d, m, HOST_TO_DEVICE) + call device_memcpy(x, usr_x_d, m, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(y, usr_y_d, m, HOST_TO_DEVICE, sync=.false.) call device_memcpy(z, usr_z_d, m, HOST_TO_DEVICE, sync=.true.) deallocate(x, y, z) @@ -300,11 +299,11 @@ subroutine usr_inflow_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call device_inhom_dirichlet_apply_vector(this%msk_d, x_d, y_d, z_d, & usr_x_d, usr_y_d, usr_z_d, m) - + end associate end subroutine usr_inflow_apply_vector_dev - + !> Assign coefficients (facet normals etc) subroutine usr_inflow_set_coef(this, c) class(usr_inflow_t), intent(inout) :: this @@ -325,10 +324,10 @@ subroutine usr_inflow_validate(this) class(usr_inflow_t), intent(inout) :: this logical :: valid - valid = .true. ! Assert it's going to be ok... + valid = .true. ! Assert it's going to be ok... if (.not. associated(this%c)) then call neko_warning('Missing coefficients') - valid = .false. + valid = .false. end if if (.not. associated(this%eval)) then @@ -339,7 +338,7 @@ subroutine usr_inflow_validate(this) if (.not. valid) then call neko_error('Invalid user defined inflow condition') end if - + end subroutine usr_inflow_validate - + end module usr_inflow diff --git a/src/bc/usr_scalar.f90 b/src/bc/usr_scalar.f90 index 4b4aa4e45ca..efb81be6727 100644 --- a/src/bc/usr_scalar.f90 +++ b/src/bc/usr_scalar.f90 @@ -40,7 +40,7 @@ module usr_scalar use utils implicit none private - + !> User defined dirichlet condition for scalars type, public, extends(dirichlet_t) :: usr_scalar_t type(coef_t), pointer :: c => null() @@ -97,16 +97,16 @@ end subroutine usr_scalar_bc_eval public :: usr_scalar_bc_eval contains - + subroutine usr_inflow_free(this) type(usr_scalar_t), intent(inout) :: this if (c_associated(this%usr_x_d)) then call device_free(this%usr_x_d) end if - + end subroutine usr_inflow_free - + !> Scalar apply !! Just imitating inflow for now, but we should look this over !! Applies boundary conditions in eval on x @@ -142,7 +142,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep) facet = this%facet(i) idx = nonlinear_index(k, lx, lx, lx) select case(facet) - case(1,2) + case(1,2) call this%eval(x(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & @@ -156,7 +156,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep) call this%eval(x(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(3), facet, idx(4)), & ny(idx(1), idx(3), facet, idx(4)), & nz(idx(1), idx(3), facet, idx(4)), & @@ -166,7 +166,7 @@ subroutine usr_scalar_apply_scalar(this, x, n, t, tstep) call this%eval(x(k), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(2), facet, idx(4)), & ny(idx(1), idx(2), facet, idx(4)), & nz(idx(1), idx(2), facet, idx(4)), & @@ -222,7 +222,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep) facet = this%facet(i) idx = nonlinear_index(k, lx, lx, lx) select case(facet) - case(1,2) + case(1,2) call this%eval(x(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & @@ -236,7 +236,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep) call this%eval(x(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(3), facet, idx(4)), & ny(idx(1), idx(3), facet, idx(4)), & nz(idx(1), idx(3), facet, idx(4)), & @@ -246,7 +246,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep) call this%eval(x(i), & xc(idx(1), idx(2), idx(3), idx(4)), & yc(idx(1), idx(2), idx(3), idx(4)), & - zc(idx(1), idx(2), idx(3), idx(4)), & + zc(idx(1), idx(2), idx(3), idx(4)), & nx(idx(1), idx(2), facet, idx(4)), & ny(idx(1), idx(2), facet, idx(4)), & nz(idx(1), idx(2), facet, idx(4)), & @@ -254,7 +254,6 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep) t_, tstep_) end select end do - call device_memcpy(x, this%usr_x_d, m, HOST_TO_DEVICE, sync=.true.) @@ -267,7 +266,7 @@ subroutine usr_scalar_apply_scalar_dev(this, x_d, t, tstep) end subroutine usr_scalar_apply_scalar_dev - + !> No-op vector apply subroutine usr_scalar_apply_vector(this, x, y, z, n, t, tstep) class(usr_scalar_t), intent(inout) :: this @@ -277,7 +276,7 @@ subroutine usr_scalar_apply_vector(this, x, y, z, n, t, tstep) real(kind=rp), intent(inout), dimension(n) :: z real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep - integer :: i, m, k, idx(4), facet + integer :: i, m, k, idx(4), facet end subroutine usr_scalar_apply_vector !> No-op vector apply (device version) @@ -315,10 +314,10 @@ subroutine usr_scalar_validate(this) class(usr_scalar_t), intent(inout) :: this logical :: valid - valid = .true. ! Assert it's going to be ok... + valid = .true. ! Assert it's going to be ok... if (.not. associated(this%c)) then call neko_warning('Missing coefficients') - valid = .false. + valid = .false. end if if (.not. associated(this%eval)) then @@ -329,7 +328,7 @@ subroutine usr_scalar_validate(this) if (.not. valid) then call neko_error('Invalid user defined scalar condition') end if - + end subroutine usr_scalar_validate - + end module usr_scalar diff --git a/src/bc/wall.f90 b/src/bc/wall.f90 index 469350010e1..910dd3102c3 100644 --- a/src/bc/wall.f90 +++ b/src/bc/wall.f90 @@ -65,16 +65,16 @@ subroutine no_slip_wall_apply_scalar(this, x, n, t, tstep) k = this%msk(i) x(k) = 0d0 end do - + end subroutine no_slip_wall_apply_scalar - + !> Boundary condition apply for a no-slip wall condition !! to vectors @a x, @a y and @a z subroutine no_slip_wall_apply_vector(this, x, y, z, n, t, tstep) class(no_slip_wall_t), intent(inout) :: this integer, intent(in) :: n real(kind=rp), intent(inout), dimension(n) :: x - real(kind=rp), intent(inout), dimension(n) :: y + real(kind=rp), intent(inout), dimension(n) :: y real(kind=rp), intent(inout), dimension(n) :: z real(kind=rp), intent(in), optional :: t integer, intent(in), optional :: tstep @@ -87,7 +87,7 @@ subroutine no_slip_wall_apply_vector(this, x, y, z, n, t, tstep) y(k) = 0d0 z(k) = 0d0 end do - + end subroutine no_slip_wall_apply_vector !> Boundary condition apply for a no-slip wall condition @@ -99,9 +99,9 @@ subroutine no_slip_wall_apply_scalar_dev(this, x_d, t, tstep) integer, intent(in), optional :: tstep call device_no_slip_wall_apply_scalar(this%msk_d, x_d, size(this%msk)) - + end subroutine no_slip_wall_apply_scalar_dev - + !> Boundary condition apply for a no-slip wall condition !! to vectors @a x, @a y and @a z (device version) subroutine no_slip_wall_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) @@ -114,7 +114,7 @@ subroutine no_slip_wall_apply_vector_dev(this, x_d, y_d, z_d, t, tstep) call device_no_slip_wall_apply_vector(this%msk_d, x_d, y_d, z_d, & size(this%msk)) - + end subroutine no_slip_wall_apply_vector_dev - + end module wall diff --git a/src/case.f90 b/src/case.f90 index 65219a51a7c..5aa8a470cb4 100644 --- a/src/case.f90 +++ b/src/case.f90 @@ -44,7 +44,8 @@ module case use parmetis use redist use sampler - use flow_ic + use flow_ic + use scalar_ic, only : set_scalar_ic use stats use file use utils @@ -53,7 +54,7 @@ module case use time_scheme_controller, only : time_scheme_controller_t use logger use jobctrl - use user_intf + use user_intf use scalar_pnpn ! todo directly load the pnpn? can we have other use json_module, only : json_file, json_core, json_value use json_utils, only : json_get, json_get_or_default @@ -61,7 +62,7 @@ module case use point_zone_registry, only: neko_point_zone_registry use material_properties, only : material_properties_t implicit none - + type :: case_t type(mesh_t) :: msh type(json_file) :: params @@ -76,7 +77,7 @@ module case type(chkp_output_t) :: f_chkp type(mean_flow_output_t) :: f_mf type(mean_sqr_flow_output_t) :: f_msqrf - type(stats_t) :: q + type(stats_t) :: q type(user_t) :: usr class(fluid_scheme_t), allocatable :: fluid type(scalar_pnpn_t), allocatable :: scalar @@ -84,7 +85,7 @@ module case end type case_t interface case_init - module procedure case_init_from_file + module procedure case_init_from_file, case_init_from_json end interface case_init private :: case_init_from_file, case_init_from_json, case_init_common @@ -97,14 +98,15 @@ subroutine case_init_from_file(C, case_file) character(len=*), intent(in) :: case_file integer :: ierr, integer_val character(len=:), allocatable :: json_buffer - + call neko_log%section('Case') - call neko_log%message('Reading case file ' // trim(case_file)) - + call neko_log%message('Reading case file ' // trim(case_file), & + NEKO_LOG_QUIET) + if (pe_rank .eq. 0) then - call C%params%load_file(filename=trim(case_file)) - call C%params%print_to_string(json_buffer) - integer_val = len(json_buffer) + call C%params%load_file(filename=trim(case_file)) + call C%params%print_to_string(json_buffer) + integer_val = len(json_buffer) end if call MPI_Bcast(integer_val, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) @@ -115,7 +117,7 @@ subroutine case_init_from_file(C, case_file) deallocate(json_buffer) call case_init_common(C) - + end subroutine case_init_from_file !> Initialize a case from a JSON object describing a case @@ -124,12 +126,12 @@ subroutine case_init_from_json(C, case_json) type(json_file), intent(in) :: case_json call neko_log%section('Case') - call neko_log%message('Creating case from JSON object') + call neko_log%message('Creating case from JSON object', NEKO_LOG_QUIET) C%params = case_json call case_init_common(C) - + end subroutine case_init_from_json !> Initialize a case from its (loaded) params object @@ -145,21 +147,19 @@ subroutine case_init_common(C) real(kind=rp) :: real_val character(len=:), allocatable :: string_val real(kind=rp) :: stats_start_time, stats_output_val - integer :: stats_sampling_interval + integer :: stats_sampling_interval integer :: output_dir_len - integer :: n_simcomps, i - type(json_core) :: core - type(json_value), pointer :: json_val1, json_val2 - type(json_file) :: json_subdict + integer :: n_simcomps + integer :: precision ! ! Load mesh ! call json_get(C%params, 'case.mesh_file', string_val) msh_file = file_t(string_val) - + call msh_file%read(C%msh) - + ! ! Load Balancing ! @@ -170,19 +170,30 @@ subroutine case_init_common(C) call neko_log%section('Load Balancing') call parmetis_partmeshkway(C%msh, parts) call redist_mesh(C%msh, parts) - call neko_log%end_section() + call neko_log%end_section() end if ! ! Time step ! - call json_get(C%params, 'case.timestep', C%dt) + call C%params%get('case.variable_timestep', logical_val, found) + if (.not. logical_val) then + call json_get(C%params, 'case.timestep', C%dt) + else + ! randomly set an initial dt to get cfl when dt is variable + C%dt = 1.0_rp + end if ! ! End time ! call json_get(C%params, 'case.end_time', C%end_time) + ! + ! Initialize point_zones registry + ! + call neko_point_zone_registry%init(C%params, C%msh) + ! ! Setup user defined functions ! @@ -194,7 +205,7 @@ subroutine case_init_common(C) ! Material properties ! call C%material_properties%init(C%params, C%usr) - + ! ! Setup fluid scheme ! @@ -208,25 +219,20 @@ subroutine case_init_common(C) C%fluid%chkp%dtlag => C%dtlag select type(f => C%fluid) type is(fluid_pnpn_t) - f%chkp%abx1 => f%abx1 - f%chkp%abx2 => f%abx2 - f%chkp%aby1 => f%aby1 - f%chkp%aby2 => f%aby2 - f%chkp%abz1 => f%abz1 - f%chkp%abz2 => f%abz2 + f%chkp%abx1 => f%abx1 + f%chkp%abx2 => f%abx2 + f%chkp%aby1 => f%aby1 + f%chkp%aby2 => f%aby2 + f%chkp%abz1 => f%abz1 + f%chkp%abz2 => f%abz2 end select - + ! ! Setup scratch registry ! neko_scratch_registry = scratch_registry_t(C%fluid%dm_Xh, 10, 10) - ! - ! Initialize point_zones registry - ! - call neko_point_zone_registry%init(C%params, C%fluid%u%dof) - ! ! Setup scalar scheme ! @@ -256,29 +262,15 @@ subroutine case_init_common(C) call C%fluid%set_usr_inflow(C%usr%fluid_user_if) end if end if - - ! Setup source term for the scalar - ! @todo should be expanded for user sources etc. Now copies the fluid one - if (scalar) then - logical_val = C%params%valid_path('case.scalar.source_term') - call json_get_or_default(C%params, 'case.scalar.source_term.type',& - string_val, 'noforce') - if (trim(string_val) .eq. 'user') then - call C%scalar%set_source(trim(string_val), & - usr_f=C%usr%scalar_user_f) - else if (trim(string_val) .eq. 'user_vector') then - call C%scalar%set_source(trim(string_val), & - usr_f_vec=C%usr%scalar_user_f_vector) - else - call C%scalar%set_source(trim(string_val)) - end if + ! Setup user boundary conditions for the scalar. + if (scalar) then call C%scalar%set_user_bc(C%usr%scalar_user_bc) end if ! ! Setup initial conditions - ! + ! call json_get(C%params, 'case.fluid.initial_condition.type',& string_val) if (trim(string_val) .ne. 'user') then @@ -289,6 +281,17 @@ subroutine case_init_common(C) C%fluid%c_Xh, C%fluid%gs_Xh, C%usr%fluid_user_ic, C%params) end if + if (scalar) then + call json_get(C%params, 'case.scalar.initial_condition.type', string_val) + if (trim(string_val) .ne. 'user') then + call set_scalar_ic(C%scalar%s, & + C%scalar%c_Xh, C%scalar%gs_Xh, string_val, C%params) + else + call set_scalar_ic(C%scalar%s, & + C%scalar%c_Xh, C%scalar%gs_Xh, C%usr%scalar_user_ic, C%params) + end if + end if + ! Add initial conditions to BDF scheme (if present) select type(f => C%fluid) type is(fluid_pnpn_t) @@ -297,7 +300,6 @@ subroutine case_init_common(C) call f%wlag%set(f%w) end select - ! ! Validate that the case is properly setup for time-stepping ! @@ -329,10 +331,10 @@ subroutine case_init_common(C) end if end if end if - + ! ! Save boundary markings for fluid (if requested) - ! + ! call json_get_or_default(C%params, 'case.output_boundary',& logical_val, .false.) if (logical_val) then @@ -353,14 +355,28 @@ subroutine case_init_common(C) call mesh_field_free(msh_part) end if + ! + ! Setup output precision of the field files + ! + call json_get_or_default(C%params, 'case.output_precision', string_val,& + 'single') + + if (trim(string_val) .eq. 'double') then + precision = dp + else + precision = sp + end if + ! ! Setup sampler ! call C%s%init(C%end_time) if (scalar) then - C%f_out = fluid_output_t(C%fluid, C%scalar, path=trim(output_directory)) + C%f_out = fluid_output_t(precision, C%fluid, C%scalar, & + path=trim(output_directory)) else - C%f_out = fluid_output_t(C%fluid, path=trim(output_directory)) + C%f_out = fluid_output_t(precision, C%fluid, & + path=trim(output_directory)) end if call json_get_or_default(C%params, 'case.fluid.output_control',& @@ -372,11 +388,13 @@ subroutine case_init_common(C) call C%s%add(C%f_out, real_val, 'nsamples') else if (trim(string_val) .eq. 'never') then ! Fix a dummy 0.0 output_value + call json_get_or_default(C%params, 'case.fluid.output_value', real_val, & + 0.0_rp) call C%s%add(C%f_out, 0.0_rp, string_val) - else + else call json_get(C%params, 'case.fluid.output_value', real_val) call C%s%add(C%f_out, real_val, string_val) - end if + end if ! ! Save checkpoints (if nothing specified, default to saving at end of sim) @@ -385,15 +403,17 @@ subroutine case_init_common(C) logical_val, .true.) if (logical_val) then C%f_chkp = chkp_output_t(C%fluid%chkp, path=output_directory) - call json_get_or_default(C%params, 'case.checkpoint_control', string_val,"simulationtime") - call json_get_or_default(C%params, 'case.checkpoint_value', real_val,1e10_rp) + call json_get_or_default(C%params, 'case.checkpoint_control', & + string_val, "simulationtime") + call json_get_or_default(C%params, 'case.checkpoint_value', real_val,& + 1e10_rp) call C%s%add(C%f_chkp, real_val, string_val) end if ! ! Setup statistics ! - + ! Always init, so that we can call eval in simulation.f90 with no if. ! Note, don't use json_get_or_default here, because that will break the ! valid_path if statement below (the path will become valid always). @@ -424,7 +444,7 @@ subroutine case_init_common(C) string_val) call json_get(C%params, 'case.statistics.output_value', & stats_output_val) - + call C%s%add(C%f_mf, stats_output_val, string_val) call C%q%add(C%fluid%stats) @@ -458,10 +478,10 @@ subroutine case_init_common(C) end if call neko_log%end_section() - + end subroutine case_init_common - - !> Deallocate a case + + !> Deallocate a case subroutine case_free(C) type(case_t), intent(inout) :: C @@ -480,7 +500,7 @@ subroutine case_free(C) call C%s%free() call C%q%free() - + end subroutine case_free - + end module case diff --git a/src/comm/mpi_types.f90 b/src/comm/mpi_types.f90 index 2b95f57531c..102cba2c412 100644 --- a/src/comm/mpi_types.f90 +++ b/src/comm/mpi_types.f90 @@ -135,7 +135,7 @@ subroutine mpi_type_nmsh_hex_init call MPI_Get_address(nmsh_hex%v(7)%v_xyz, disp(15), ierr) call MPI_Get_address(nmsh_hex%v(8)%v_idx, disp(16), ierr) call MPI_Get_address(nmsh_hex%v(8)%v_xyz, disp(17), ierr) - + base = disp(1) do i = 1, 17 @@ -150,7 +150,7 @@ subroutine mpi_type_nmsh_hex_init type(2:16:2) = MPI_INTEGER type(3:17:2) = MPI_DOUBLE_PRECISION call MPI_Type_create_struct(17, len, disp, type, MPI_NMSH_HEX, ierr) - call MPI_Type_commit(MPI_NMSH_HEX, ierr) + call MPI_Type_commit(MPI_NMSH_HEX, ierr) end subroutine mpi_type_nmsh_hex_init !> Define a MPI derived type for a 2d nmsh quad @@ -169,7 +169,7 @@ subroutine mpi_type_nmsh_quad_init call MPI_Get_address(nmsh_quad%v(3)%v_xyz, disp(7), ierr) call MPI_Get_address(nmsh_quad%v(4)%v_idx, disp(8), ierr) call MPI_Get_address(nmsh_quad%v(4)%v_xyz, disp(9), ierr) - + base = disp(1) do i = 1, 9 @@ -184,7 +184,7 @@ subroutine mpi_type_nmsh_quad_init type(2:8:2) = MPI_INTEGER type(3:9:2) = MPI_DOUBLE_PRECISION call MPI_Type_create_struct(9, len, disp, type, MPI_NMSH_QUAD, ierr) - call MPI_Type_commit(MPI_NMSH_QUAD, ierr) + call MPI_Type_commit(MPI_NMSH_QUAD, ierr) end subroutine mpi_type_nmsh_quad_init !> Define a MPI derived type for a nmsh zone @@ -213,9 +213,9 @@ subroutine mpi_type_nmsh_zone_init call MPI_Type_create_struct(6, len, disp, type, MPI_NMSH_ZONE, ierr) call MPI_Type_commit(MPI_NMSH_ZONE, ierr) - + end subroutine mpi_type_nmsh_zone_init - + !> Define a MPI derived type for a nmsh curved element subroutine mpi_type_nmsh_curve_init type(nmsh_curve_el_t) :: nmsh_curve_el @@ -241,22 +241,22 @@ subroutine mpi_type_nmsh_curve_init call MPI_Type_create_struct(3, len, disp, type, MPI_NMSH_CURVE, ierr) call MPI_Type_commit(MPI_NMSH_CURVE, ierr) - + end subroutine mpi_type_nmsh_curve_init - - + + !> Define a MPI derived type for a 3d re2 data subroutine mpi_type_re2_xyz_init type(re2v1_xyz_t) :: re2v1_data type(re2v2_xyz_t) :: re2v2_data type(MPI_Datatype) :: type(4) - integer(kind=MPI_ADDRESS_KIND) :: disp(4), base + integer(kind=MPI_ADDRESS_KIND) :: disp(4), base integer :: len(4), ierr, i ! ! Setup version 1 ! - + call MPI_Get_address(re2v1_data%rgroup, disp(1), ierr) call MPI_Get_address(re2v1_data%x, disp(2), ierr) call MPI_Get_address(re2v1_data%y, disp(3), ierr) @@ -277,7 +277,7 @@ subroutine mpi_type_re2_xyz_init ! ! Setup version 2 ! - + call MPI_Get_address(re2v2_data%rgroup, disp(1), ierr) call MPI_Get_address(re2v2_data%x, disp(2), ierr) call MPI_Get_address(re2v2_data%y, disp(3), ierr) @@ -302,7 +302,7 @@ subroutine mpi_type_re2_xy_init type(re2v1_xy_t) :: re2v1_data type(re2v2_xy_t) :: re2v2_data type(MPI_Datatype) :: type(3) - integer(kind=MPI_ADDRESS_KIND) :: disp(3), base + integer(kind=MPI_ADDRESS_KIND) :: disp(3), base integer :: len(3), ierr, i ! @@ -324,7 +324,7 @@ subroutine mpi_type_re2_xy_init call MPI_Type_create_struct(3, len, disp, type, MPI_RE2V1_DATA_XY, ierr) call MPI_Type_commit(MPI_RE2V1_DATA_XY, ierr) - + ! ! Setup version 2 ! @@ -358,7 +358,7 @@ subroutine mpi_type_re2_cv_init ! ! Setup version 1 ! - + call MPI_Get_address(re2v1_data%elem, disp(1), ierr) call MPI_Get_address(re2v1_data%zone, disp(2), ierr) call MPI_Get_address(re2v1_data%point, disp(3), ierr) @@ -382,7 +382,7 @@ subroutine mpi_type_re2_cv_init ! ! Setup version 2 ! - + call MPI_Get_address(re2v2_data%elem, disp(1), ierr) call MPI_Get_address(re2v2_data%zone, disp(2), ierr) call MPI_Get_address(re2v2_data%point, disp(3), ierr) @@ -402,9 +402,9 @@ subroutine mpi_type_re2_cv_init call MPI_Type_create_struct(4, len, disp, type, MPI_RE2V2_DATA_CV, ierr) call MPI_Type_commit(MPI_RE2V2_DATA_CV, ierr) - + end subroutine mpi_type_re2_cv_init - + !> Define a MPI derived type for re2 bc data subroutine mpi_type_re2_bc_init type(re2v1_bc_t) :: re2v1_data @@ -460,7 +460,7 @@ subroutine mpi_type_re2_bc_init call MPI_Type_create_struct(4, len, disp, type, MPI_RE2V2_DATA_BC, ierr) call MPI_Type_commit(MPI_RE2V2_DATA_BC, ierr) - + end subroutine mpi_type_re2_bc_init !> Define a MPI dervied type for a STL header @@ -486,7 +486,7 @@ subroutine mpi_type_stl_header_init call MPI_Type_create_struct(2, len, disp, type, MPI_STL_HEADER, ierr) call MPI_Type_commit(MPI_STL_HEADER, ierr) - + end subroutine mpi_type_stl_header_init !> Define a MPI derived type for a STL triangle @@ -515,7 +515,7 @@ subroutine mpi_type_stl_triangle_init call MPI_Type_create_struct(5, len, disp, type, MPI_STL_TRIANGLE, ierr) call MPI_Type_commit(MPI_STL_TRIANGLE, ierr) - + end subroutine mpi_type_stl_triangle_init !> Deallocate all derived MPI types @@ -548,13 +548,13 @@ subroutine mpi_type_nmsh_zone_free integer ierr call MPI_Type_free(MPI_NMSH_ZONE, ierr) end subroutine mpi_type_nmsh_zone_free - + !> Deallocate nmsh curve derived MPI type subroutine mpi_type_nmsh_curve_free integer ierr call MPI_Type_free(MPI_NMSH_CURVE, ierr) end subroutine mpi_type_nmsh_curve_free - + !> Deallocate re2 xyz derived MPI type subroutine mpi_type_re2_xyz_free integer ierr @@ -575,7 +575,7 @@ subroutine mpi_type_re2_cv_free call MPI_Type_free(MPI_RE2V1_DATA_CV, ierr) call MPI_Type_free(MPI_RE2V2_DATA_CV, ierr) end subroutine mpi_type_re2_cv_free - + !> Deallocate re2 bc derived MPI type subroutine mpi_type_re2_bc_free integer ierr @@ -588,7 +588,7 @@ subroutine mpi_type_stl_header_free integer ierr call MPI_Type_free(MPI_STL_HEADER, ierr) end subroutine mpi_type_stl_header_free - + !> Deallocate STL triangle derived MPI type subroutine mpi_type_stl_triangle_free integer ierr diff --git a/src/comm/parmetis.F90 b/src/comm/parmetis.F90 index 96c5e745309..cf742dca5f1 100644 --- a/src/comm/parmetis.F90 +++ b/src/comm/parmetis.F90 @@ -36,7 +36,7 @@ module parmetis use point use utils use num_types - use mesh_field + use mesh_field use mesh use, intrinsic :: iso_c_binding implicit none @@ -60,7 +60,7 @@ integer (c_int) function parmetis_v3_partmeshkway & type(c_ptr), value :: tpwgts, ubvec end function parmetis_v3_partmeshkway end interface - + interface integer (c_int) function parmetis_v3_partgeom & (vtxdist, ndims, xyz, part) & @@ -101,12 +101,12 @@ end function parmetis_v3_partgeom #define neko_idx(i) (i) #endif #endif - + contains #ifdef HAVE_PARMETIS - !> Compute a k-way partitioning of a mesh @a msh + !> Compute a k-way partitioning of a mesh @a msh subroutine parmetis_partmeshkway(msh, parts, weights, nprts) type(mesh_t), intent(inout) :: msh !< Mesh type(mesh_fld_t), intent(inout) :: parts !< Partitions @@ -132,13 +132,13 @@ subroutine parmetis_partmeshkway(msh, parts, weights, nprts) if (present(nprts)) then nparts = nprts - else + else nparts = pe_size end if - + allocate(elmdist(0:pe_size), eptr(0:msh%nelv)) allocate(eind(0:(msh%nelv * msh%npts)), part(msh%nelv)) - allocate(elmwgt(msh%nelv), tpwgts(ncon * nparts), ubvec(ncon)) + allocate(elmwgt(msh%nelv), tpwgts(ncon * nparts), ubvec(ncon)) call parmetis_dist(elmdist, msh%nelv) @@ -171,11 +171,11 @@ subroutine parmetis_partmeshkway(msh, parts, weights, nprts) else call neko_error(rcode) end if - + deallocate(elmdist, eptr, eind, part, elmwgt, tpwgts, ubvec) end subroutine parmetis_partmeshkway - + !> Compute a k-way partitioning of a mesh @a msh using !! a coordinated-based space-filing curves method subroutine parmetis_partgeom(msh, parts) @@ -193,7 +193,7 @@ subroutine parmetis_partgeom(msh, parts) allocate(vtxdist(0:pe_size)) call parmetis_dist(vtxdist, msh%nelv) - + i = 1 do j = 1, msh%nelv c = msh%elements(j)%e%centroid() @@ -202,18 +202,18 @@ subroutine parmetis_partgeom(msh, parts) xyz(i + 2) = parmetis_real(c%x(3)) i = i + 3 end do - + rcode = parmetis_v3_partgeom(c_loc(vtxdist), c_loc(ndims), & c_loc(xyz), c_loc(part)) - + if (rcode .eq. METIS_OK) then call parmetis_mark_parts(parts, msh, part) else call neko_error(rcode) end if - deallocate(part, xyz, vtxdist) - + deallocate(part, xyz, vtxdist) + end subroutine parmetis_partgeom !> Fill mesh field according to new partitions @@ -228,7 +228,7 @@ subroutine parmetis_mark_parts(parts, msh, part) do i = 1, msh%nelv parts%data(i) = neko_idx(part(i)) end do - + end subroutine parmetis_mark_parts !> Setup weights and balance constraints for the dual graph @@ -240,7 +240,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight) integer, intent(in) :: nparts, ncon type(mesh_fld_t), intent(in), optional :: weight integer :: i - + if (present(weight)) then do i = 1, msh%nelv wgt(i) = parmetis_idx(weight%data(i)) @@ -248,7 +248,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight) else wgt = parmetis_idx(1) end if - + do i = 1, (ncon * nparts) tpwgts(i) = parmetis_real(1) / parmetis_real(nparts) end do @@ -258,7 +258,7 @@ subroutine parmetis_wgt(msh, wgt, tpwgts, ubvec, nparts, ncon, weight) end do end subroutine parmetis_wgt - + !> Compute the (parallel) vertex distribution of the dual graph subroutine parmetis_dist(dist, nelv) integer(kind=M_INT), intent(inout) :: dist(0:pe_size) @@ -283,7 +283,7 @@ end subroutine parmetis_dist #else - !> Compute a k-way partitioning of a mesh @a msh + !> Compute a k-way partitioning of a mesh @a msh subroutine parmetis_partmeshkway(msh, parts, weights, nprts) type(mesh_t), intent(inout) :: msh !< Mesh type(mesh_fld_t), intent(inout) :: parts !< Partitions @@ -299,7 +299,7 @@ subroutine parmetis_partgeom(msh, parts) type(mesh_fld_t), intent(inout) :: parts !< Partitions call neko_error('NEKO needs to be built with ParMETIS support') end subroutine parmetis_partgeom - + #endif - + end module parmetis diff --git a/src/comm/redist.f90 b/src/comm/redist.f90 index 6eecd11ea22..a9fa8d25a2b 100644 --- a/src/comm/redist.f90 +++ b/src/comm/redist.f90 @@ -34,7 +34,7 @@ module redist use mesh_field use neko_mpi_types - use mpi_f08 + use mpi_f08 use htable use point use stack @@ -48,7 +48,7 @@ module redist private public :: redist_mesh - + contains !> Redistribute a mesh @a msh according to new partitions @@ -80,10 +80,10 @@ subroutine redist_mesh(msh, parts) ! Reset possible periodic ids ! call msh%reset_periodic_ids() - + ! ! Extract new zone distributions - ! + ! allocate(new_zone_dist(0:pe_size - 1)) do i = 0, pe_size - 1 @@ -111,11 +111,11 @@ subroutine redist_mesh(msh, parts) end do call redist_curve(msh, msh%curve, parts, new_curve_dist) - + ! ! Extract new mesh distribution ! - + allocate(new_mesh_dist(0:(pe_size - 1))) do i = 0, pe_size - 1 call new_mesh_dist(i)%init() @@ -127,12 +127,12 @@ subroutine redist_mesh(msh, parts) do j = 1, 8 el%v(j)%v_idx = ep%pts(j)%p%id() el%v(j)%v_xyz = ep%pts(j)%p%x - end do + end do call new_mesh_dist(parts%data(i))%push(el) end do - - gdim = msh%gdim + + gdim = msh%gdim call msh%free() max_recv = 0 @@ -141,13 +141,13 @@ subroutine redist_mesh(msh, parts) max_recv(2) = max(max_recv(2), new_zone_dist(i)%size()) max_recv(3) = max(max_recv(3), new_curve_dist(i)%size()) end do - + call MPI_Allreduce(MPI_IN_PLACE, max_recv, 3, MPI_INTEGER, & MPI_MAX, NEKO_COMM, ierr) allocate(recv_buf_msh(max_recv(1))) allocate(recv_buf_zone(max_recv(2))) allocate(recv_buf_curve(max_recv(3))) - + do i = 1, pe_size - 1 src = modulo(pe_rank - i + pe_size, pe_size) dst = modulo(pe_rank + i, pe_size) @@ -189,9 +189,9 @@ subroutine redist_mesh(msh, parts) do j = 1, recv_size call new_curve_dist(pe_rank)%push(recv_buf_curve(j)) - end do + end do end do - + deallocate(recv_buf_msh) deallocate(recv_buf_zone) deallocate(recv_buf_curve) @@ -223,12 +223,12 @@ subroutine redist_mesh(msh, parts) end do call msh%add_element(i, & p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8)) - + if (el_map%get(np(i)%el_idx, tmp) .gt. 0) then ! Old glb to new local tmp = i call el_map%set(np(i)%el_idx, tmp) - + ! Old glb to new glb tmp = msh%elements(i)%e%id() call glb_map%set(np(i)%el_idx, tmp) @@ -244,7 +244,7 @@ subroutine redist_mesh(msh, parts) ! Figure out new mesh distribution (necessary for periodic zones) ! call pe_lst%init() - + ! We should use the %array() procedure, which works great for ! GNU, Intel and NEC, but it breaks horribly on Cray when using ! certain data types @@ -256,13 +256,13 @@ subroutine redist_mesh(msh, parts) end if end do end select - + max_recv_idx = 2 * pe_lst%size() call MPI_Allreduce(MPI_IN_PLACE, max_recv_idx, 1, MPI_INTEGER, & MPI_MAX, NEKO_COMM, ierr) allocate(recv_buf_idx(max_recv_idx)) allocate(send_buf_idx(max_recv_idx)) - + do i = 1, pe_size - 1 src = modulo(pe_rank - i + pe_size, pe_size) dst = modulo(pe_rank + i, pe_size) @@ -295,15 +295,15 @@ subroutine redist_mesh(msh, parts) do j = 1, recv_size, 2 call glb_map%set(recv_buf_idx(j), recv_buf_idx(j+1)) - end do + end do end do deallocate(recv_buf_idx) deallocate(send_buf_idx) call pe_lst%free() - + ! ! Add zone data for new mesh distribution - ! + ! zp => new_zone_dist(pe_rank)%array() do i = 1, new_zone_dist(pe_rank)%size() if (el_map%get(zp(i)%e, new_el_idx) .gt. 0) then @@ -322,7 +322,7 @@ subroutine redist_mesh(msh, parts) if (glb_map%get(zp(i)%p_e, new_pel_idx) .gt. 0) then call neko_error('Missing periodic element after redistribution') end if - + call msh%mark_periodic_facet(zp(i)%f, new_el_idx, & zp(i)%p_f, new_pel_idx, zp(i)%glb_pt_ids) case(7) @@ -338,12 +338,12 @@ subroutine redist_mesh(msh, parts) if (glb_map%get(zp(i)%p_e, new_pel_idx) .gt. 0) then call neko_error('Missing periodic element after redistribution') end if - + call msh%apply_periodic_facet(zp(i)%f, new_el_idx, & zp(i)%p_f, new_pel_idx, zp(i)%glb_pt_ids) end select end do - + call new_zone_dist(pe_rank)%free() ! @@ -359,7 +359,7 @@ subroutine redist_mesh(msh, parts) call new_curve_dist(pe_rank)%free() call msh%finalize() - + end subroutine redist_mesh !> Fill redistribution list for zone data @@ -378,7 +378,7 @@ subroutine redist_zone(msh, z, type, parts, new_dist, label) else lbl = 0 end if - + select type(zp => z) type is (facet_zone_periodic_t) do i = 1, zp%size @@ -387,8 +387,8 @@ subroutine redist_zone(msh, z, type, parts, new_dist, label) nmsh_zone%f = zp%facet_el(i)%x(1) nmsh_zone%p_e = zp%p_facet_el(i)%x(2) nmsh_zone%p_f = zp%p_facet_el(i)%x(1) - nmsh_zone%glb_pt_ids = zp%p_ids(i)%x - nmsh_zone%type = type + nmsh_zone%glb_pt_ids = zp%p_ids(i)%x + nmsh_zone%type = type call new_dist(parts%data(zone_el))%push(nmsh_zone) end do type is (facet_zone_t) @@ -419,7 +419,7 @@ subroutine redist_curve(msh, c, parts, new_dist) nmsh_curve%type = c%curve_el(i)%curve_type call new_dist(parts%data(curve_el))%push(nmsh_curve) end do - + end subroutine redist_curve end module redist diff --git a/src/common/bcknd/cpu/rhs_maker_cpu.f90 b/src/common/bcknd/cpu/rhs_maker_cpu.f90 index 7e766e07103..1998f4e70c7 100644 --- a/src/common/bcknd/cpu/rhs_maker_cpu.f90 +++ b/src/common/bcknd/cpu/rhs_maker_cpu.f90 @@ -6,7 +6,7 @@ module rhs_maker_cpu use scratch_registry implicit none private - + type, public, extends(rhs_maker_sumab_t) :: rhs_maker_sumab_cpu_t contains procedure, nopass :: compute_fluid => rhs_maker_sumab_cpu @@ -23,7 +23,7 @@ module rhs_maker_cpu procedure, nopass :: compute_fluid => rhs_maker_bdf_cpu procedure, nopass :: compute_scalar => scalar_rhs_maker_bdf_cpu end type rhs_maker_bdf_cpu_t - + contains subroutine rhs_maker_sumab_cpu(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) @@ -36,20 +36,20 @@ subroutine rhs_maker_sumab_cpu(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab n = uu%dof%size() - do i = 1, n + do concurrent (i = 1:n) u%x(i,1,1,1) = ab(1) * uu%x(i,1,1,1) + ab(2) * uulag%lf(1)%x(i,1,1,1) v%x(i,1,1,1) = ab(1) * vv%x(i,1,1,1) + ab(2) * vvlag%lf(1)%x(i,1,1,1) w%x(i,1,1,1) = ab(1) * ww%x(i,1,1,1) + ab(2) * wwlag%lf(1)%x(i,1,1,1) end do if (nab .eq. 3) then - do i = 1, n + do concurrent (i = 1:n) u%x(i,1,1,1) = u%x(i,1,1,1) + ab(3) * uulag%lf(2)%x(i,1,1,1) v%x(i,1,1,1) = v%x(i,1,1,1) + ab(3) * vvlag%lf(2)%x(i,1,1,1) w%x(i,1,1,1) = w%x(i,1,1,1) + ab(3) * wwlag%lf(2)%x(i,1,1,1) end do end if - + end subroutine rhs_maker_sumab_cpu subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, & @@ -63,12 +63,12 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, & integer :: i type(field_t), pointer :: temp1, temp2, temp3 integer :: temp_indices(3) - + call neko_scratch_registry%request_field(temp1, temp_indices(1)) call neko_scratch_registry%request_field(temp2, temp_indices(2)) call neko_scratch_registry%request_field(temp3, temp_indices(3)) - do i = 1, n + do concurrent (i = 1:n) temp1%x(i,1,1,1) = ext_coeffs(2) * fx_lag%x(i,1,1,1) + & ext_coeffs(3) * fx_laglag%x(i,1,1,1) temp2%x(i,1,1,1) = ext_coeffs(2) * fy_lag%x(i,1,1,1) + & @@ -77,7 +77,7 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, & ext_coeffs(3) * fz_laglag%x(i,1,1,1) end do - do i = 1, n + do concurrent (i = 1:n) fx_laglag%x(i,1,1,1) = fx_lag%x(i,1,1,1) fy_laglag%x(i,1,1,1) = fy_lag%x(i,1,1,1) fz_laglag%x(i,1,1,1) = fz_lag%x(i,1,1,1) @@ -86,47 +86,50 @@ subroutine rhs_maker_ext_cpu(fx_lag, fy_lag, fz_lag, & fz_lag%x(i,1,1,1) = fz(i) end do - do i = 1, n + do concurrent (i = 1:n) fx(i) = (ext_coeffs(1) * fx(i) + temp1%x(i,1,1,1)) * rho fy(i) = (ext_coeffs(1) * fy(i) + temp2%x(i,1,1,1)) * rho fz(i) = (ext_coeffs(1) * fz(i) + temp3%x(i,1,1,1)) * rho end do - + call neko_scratch_registry%relinquish_field(temp_indices) - + end subroutine rhs_maker_ext_cpu - subroutine scalar_rhs_maker_ext_cpu(temp1, fs_lag, fs_laglag, fs, rho, ext_coeffs, & - n) - type(field_t), intent(inout) :: temp1 + subroutine scalar_rhs_maker_ext_cpu(fs_lag, fs_laglag, fs, rho, ext_coeffs, n) type(field_t), intent(inout) :: fs_lag type(field_t), intent(inout) :: fs_laglag real(kind=rp), intent(inout) :: rho, ext_coeffs(4) integer, intent(in) :: n real(kind=rp), intent(inout) :: fs(n) integer :: i + type(field_t), pointer :: temp1 + integer :: temp_index - do i = 1, n + call neko_scratch_registry%request_field(temp1, temp_index) + + do concurrent (i = 1:n) temp1%x(i,1,1,1) = ext_coeffs(2) * fs_lag%x(i,1,1,1) + & ext_coeffs(3) * fs_laglag%x(i,1,1,1) end do - do i = 1, n + do concurrent (i = 1:n) fs_laglag%x(i,1,1,1) = fs_lag%x(i,1,1,1) fs_lag%x(i,1,1,1) = fs(i) end do - do i = 1, n + do concurrent (i = 1:n) fs(i) = (ext_coeffs(1) * fs(i) + temp1%x(i,1,1,1)) * rho end do - + + call neko_scratch_registry%relinquish_field(temp_index) end subroutine scalar_rhs_maker_ext_cpu subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, & - u, v, w, B, rho, dt, bd, nbd, n) + u, v, w, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd type(field_t), intent(in) :: u, v, w - type(field_series_t), intent(in) :: ulag, vlag, wlag + type(field_series_t), intent(in) :: ulag, vlag, wlag real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) @@ -142,27 +145,27 @@ subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, & call neko_scratch_registry%request_field(tb2, temp_indices(5)) call neko_scratch_registry%request_field(tb3, temp_indices(6)) - do i = 1, n + do concurrent (i = 1:n) tb1%x(i,1,1,1) = u%x(i,1,1,1) * B(i) * bd(2) tb2%x(i,1,1,1) = v%x(i,1,1,1) * B(i) * bd(2) tb3%x(i,1,1,1) = w%x(i,1,1,1) * B(i) * bd(2) end do do ilag = 2, nbd - do i = 1, n + do concurrent (i = 1:n) ta1%x(i,1,1,1) = ulag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1) ta2%x(i,1,1,1) = vlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1) ta3%x(i,1,1,1) = wlag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1) end do - do i = 1, n + do concurrent (i = 1:n) tb1%x(i,1,1,1) = tb1%x(i,1,1,1) + ta1%x(i,1,1,1) tb2%x(i,1,1,1) = tb2%x(i,1,1,1) + ta2%x(i,1,1,1) tb3%x(i,1,1,1) = tb3%x(i,1,1,1) + ta3%x(i,1,1,1) end do end do - do i = 1, n + do concurrent (i = 1:n) bfx(i) = bfx(i) + tb1%x(i,1,1,1) * (rho / dt) bfy(i) = bfy(i) + tb2%x(i,1,1,1) * (rho / dt) bfz(i) = bfz(i) + tb3%x(i,1,1,1) * (rho / dt) @@ -172,35 +175,39 @@ subroutine rhs_maker_bdf_cpu(ulag, vlag, wlag, bfx, bfy, bfz, & end subroutine rhs_maker_bdf_cpu - subroutine scalar_rhs_maker_bdf_cpu(temp1, temp2, s_lag, fs, s, B, rho, dt, & - bd, nbd, n) + subroutine scalar_rhs_maker_bdf_cpu(s_lag, fs, s, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd - type(field_t), intent(inout) :: temp1, temp2 type(field_t), intent(in) :: s type(field_series_t), intent(in) :: s_lag real(kind=rp), intent(inout) :: fs(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) integer :: i, ilag + type(field_t), pointer :: temp1, temp2 + integer :: temp_indices(2) + + call neko_scratch_registry%request_field(temp1, temp_indices(1)) + call neko_scratch_registry%request_field(temp2, temp_indices(2)) - do i = 1, n + do concurrent (i = 1:n) temp2%x(i,1,1,1) = s%x(i,1,1,1) * B(i) * bd(2) end do do ilag = 2, nbd - do i = 1, n + do concurrent (i = 1:n) temp1%x(i,1,1,1) = s_lag%lf(ilag-1)%x(i,1,1,1) * B(i) * bd(ilag+1) end do - do i = 1, n + do concurrent (i = 1:n) temp2%x(i,1,1,1) = temp2%x(i,1,1,1) + temp1%x(i,1,1,1) end do end do - do i = 1, n + do concurrent (i = 1:n) fs(i) = fs(i) + temp2%x(i,1,1,1) * (rho / dt) end do + call neko_scratch_registry%relinquish_field(temp_indices) end subroutine scalar_rhs_maker_bdf_cpu end module rhs_maker_cpu diff --git a/src/common/bcknd/device/cuda/makebdf_kernel.h b/src/common/bcknd/device/cuda/makebdf_kernel.h index 32ebf958a28..8c420ee7f88 100644 --- a/src/common/bcknd/device/cuda/makebdf_kernel.h +++ b/src/common/bcknd/device/cuda/makebdf_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_MAKEBDF_KERNEL__ +#define __COMMON_MAKEBDF_KERNEL__ + template< typename T > __global__ void makebdf_kernel(const T * __restrict__ ulag1, const T * __restrict__ ulag2, @@ -115,3 +118,5 @@ __global__ void scalar_makebdf_kernel(const T * __restrict__ s_lag, } } + +#endif // __COMMON_MAKEBDF_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/cuda/makeext_kernel.h b/src/common/bcknd/device/cuda/makeext_kernel.h index 69967a19f62..155d2790961 100644 --- a/src/common/bcknd/device/cuda/makeext_kernel.h +++ b/src/common/bcknd/device/cuda/makeext_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_MAKEEXT_KERNEL__ +#define __COMMON_MAKEEXT_KERNEL__ + template< typename T > __global__ void makeext_kernel(T * __restrict__ abx1, T * __restrict__ aby1, @@ -93,3 +96,5 @@ __global__ void scalar_makeext_kernel(T * __restrict__ fs_lag, } } + +#endif // __COMMON_MAKEEXT_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/cuda/projection_kernel.h b/src/common/bcknd/device/cuda/projection_kernel.h index f50838d6f04..a59fb7b3236 100644 --- a/src/common/bcknd/device/cuda/projection_kernel.h +++ b/src/common/bcknd/device/cuda/projection_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_PROJECTION_KERNEL__ +#define __COMMON_PROJECTION_KERNEL__ + /** * Project on vector operations */ @@ -90,3 +93,4 @@ __global__ void project_ortho_vec_kernel(T * __restrict__ x, } +#endif // __COMMON_PROJECTION_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/cuda/sumab_kernel.h b/src/common/bcknd/device/cuda/sumab_kernel.h index 6681b2bd55e..da49b9e6320 100644 --- a/src/common/bcknd/device/cuda/sumab_kernel.h +++ b/src/common/bcknd/device/cuda/sumab_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_SUMAB_KERNEL__ +#define __COMMON_SUMAB_KERNEL__ + template< typename T > __global__ void sumab_kernel(T * __restrict__ u, T * __restrict__ v, @@ -70,3 +73,4 @@ __global__ void sumab_kernel(T * __restrict__ u, } +#endif // __COMMON_SUMAB_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/device_projection.F90 b/src/common/bcknd/device/device_projection.F90 index a31a5e84ecf..4b253e8bd13 100644 --- a/src/common/bcknd/device/device_projection.F90 +++ b/src/common/bcknd/device/device_projection.F90 @@ -120,5 +120,5 @@ subroutine device_project_ortho(alpha_d, b_d, x_d_d, b_d_d, & call neko_error('No device backend configured') #endif end subroutine device_project_ortho - + end module device_projection diff --git a/src/common/bcknd/device/hip/makebdf_kernel.h b/src/common/bcknd/device/hip/makebdf_kernel.h index d1204825e20..8ef70a6bfdb 100644 --- a/src/common/bcknd/device/hip/makebdf_kernel.h +++ b/src/common/bcknd/device/hip/makebdf_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_MAKEBDF_KERNEL__ +#define __COMMON_MAKEBDF_KERNEL__ + template< typename T > __global__ void makebdf_kernel(const T * __restrict__ ulag1, const T * __restrict__ ulag2, @@ -116,3 +119,5 @@ __global__ void scalar_makebdf_kernel(const T * __restrict__ s_lag, } } + +#endif //__COMMON_MAKEBDF_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/hip/makeext_kernel.h b/src/common/bcknd/device/hip/makeext_kernel.h index 3534c34b8e8..4327d9f2bca 100644 --- a/src/common/bcknd/device/hip/makeext_kernel.h +++ b/src/common/bcknd/device/hip/makeext_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_MAKEEXT_KERNEL__ +#define __COMMON_MAKEEXT_KERNEL__ + template< typename T > __global__ void makeext_kernel(T * __restrict__ abx1, T * __restrict__ aby1, @@ -93,3 +96,5 @@ __global__ void scalar_makeext_kernel(T * __restrict__ fs_lag, } } + +#endif // __COMMON_MAKEEXT_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/hip/projection_kernel.h b/src/common/bcknd/device/hip/projection_kernel.h index 67d96b099ef..0dc225d2a36 100644 --- a/src/common/bcknd/device/hip/projection_kernel.h +++ b/src/common/bcknd/device/hip/projection_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_PROJECTION_KERNEL__ +#define __COMMON_PROJECTION_KERNEL__ + /** * Project on vector operations */ @@ -88,3 +91,5 @@ __global__ void project_ortho_vec_kernel(T * __restrict__ x, } } + +#endif // __COMMON_PROJECTION_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/hip/sumab_kernel.h b/src/common/bcknd/device/hip/sumab_kernel.h index 6681b2bd55e..da49b9e6320 100644 --- a/src/common/bcknd/device/hip/sumab_kernel.h +++ b/src/common/bcknd/device/hip/sumab_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_SUMAB_KERNEL__ +#define __COMMON_SUMAB_KERNEL__ + template< typename T > __global__ void sumab_kernel(T * __restrict__ u, T * __restrict__ v, @@ -70,3 +73,4 @@ __global__ void sumab_kernel(T * __restrict__ u, } +#endif // __COMMON_SUMAB_KERNEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/opencl/rhs_maker_kernel.cl b/src/common/bcknd/device/opencl/rhs_maker_kernel.cl index 771531d08fb..5efc6b56028 100644 --- a/src/common/bcknd/device/opencl/rhs_maker_kernel.cl +++ b/src/common/bcknd/device/opencl/rhs_maker_kernel.cl @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __COMMON_RHS_MAKER_KENEL__ +#define __COMMON_RHS_MAKER_KENEL__ + __kernel void sumab_kernel(__global real * __restrict__ u, __global real * __restrict__ v, __global real * __restrict__ w, @@ -212,3 +215,4 @@ __kernel void scalar_makebdf_kernel(__global const real * __restrict__ s_lag, } +#endif // __COMMON_RHS_MAKER_KENEL__ \ No newline at end of file diff --git a/src/common/bcknd/device/rhs_maker_device.F90 b/src/common/bcknd/device/rhs_maker_device.F90 index 89713916462..f06bedcf020 100644 --- a/src/common/bcknd/device/rhs_maker_device.F90 +++ b/src/common/bcknd/device/rhs_maker_device.F90 @@ -59,7 +59,7 @@ module rhs_maker_device end type rhs_maker_bdf_device_t #ifdef HAVE_HIP - interface + interface subroutine rhs_maker_sumab_hip(u_d, v_d, w_d, uu_d, vv_d, ww_d, & uulag1, uulag2, vvlag1, vvlag2, wwlag1, wwlag2, ab1, ab2, ab3, nab, n)& bind(c, name='rhs_maker_sumab_hip') @@ -80,7 +80,7 @@ subroutine rhs_maker_ext_hip(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_hip') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -94,7 +94,7 @@ subroutine scalar_rhs_maker_ext_hip(fs_lag_d, fs_laglag_d, fs_d, rho, & bind(c, name='scalar_rhs_maker_ext_hip') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d + type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d real(c_rp) :: rho, ext1, ext2, ext3 integer(c_int) :: n end subroutine scalar_rhs_maker_ext_hip @@ -112,7 +112,7 @@ subroutine rhs_maker_bdf_hip(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_hip - end interface + end interface interface subroutine scalar_rhs_maker_bdf_hip(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & @@ -125,7 +125,7 @@ subroutine scalar_rhs_maker_bdf_hip(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine scalar_rhs_maker_bdf_hip - end interface + end interface #elif HAVE_CUDA interface subroutine rhs_maker_sumab_cuda(u_d, v_d, w_d, uu_d, vv_d, ww_d, & @@ -148,7 +148,7 @@ subroutine rhs_maker_ext_cuda(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_cuda') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -162,7 +162,7 @@ subroutine scalar_rhs_maker_ext_cuda(fs_lag_d, fs_laglag_d, fs_d, rho, & bind(c, name='scalar_rhs_maker_ext_cuda') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d + type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d real(c_rp) :: rho, ext1, ext2, ext3 integer(c_int) :: n end subroutine scalar_rhs_maker_ext_cuda @@ -180,8 +180,8 @@ subroutine rhs_maker_bdf_cuda(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_cuda - end interface - + end interface + interface subroutine scalar_rhs_maker_bdf_cuda(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & rho, dt, bd2, bd3, bd4, nbd, n) & @@ -193,7 +193,7 @@ subroutine scalar_rhs_maker_bdf_cuda(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine scalar_rhs_maker_bdf_cuda - end interface + end interface #elif HAVE_OPENCL interface subroutine rhs_maker_sumab_opencl(u_d, v_d, w_d, uu_d, vv_d, ww_d, & @@ -216,7 +216,7 @@ subroutine rhs_maker_ext_opencl(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_opencl') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -230,7 +230,7 @@ subroutine scalar_rhs_maker_ext_opencl(fs_lag_d, fs_laglag_d, fs_d, rho, & bind(c, name='scalar_rhs_maker_ext_opencl') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d + type(c_ptr), value :: fs_lag_d, fs_laglag_d, fs_d real(c_rp) :: rho, ext1, ext2, ext3 integer(c_int) :: n end subroutine scalar_rhs_maker_ext_opencl @@ -248,7 +248,7 @@ subroutine rhs_maker_bdf_opencl(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_opencl - end interface + end interface interface subroutine scalar_rhs_maker_bdf_opencl(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & @@ -261,12 +261,12 @@ subroutine scalar_rhs_maker_bdf_opencl(s_lag_d, s_laglag_d, fs_d, s_d, B_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine scalar_rhs_maker_bdf_opencl - end interface + end interface #endif contains - subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) + subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) type(field_t), intent(inout) :: u,v, w type(field_t), intent(inout) :: uu, vv, ww type(field_series_t), intent(inout) :: uulag, vvlag, wwlag @@ -278,7 +278,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, & wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & uu%dof%size()) -#elif HAVE_CUDA +#elif HAVE_CUDA call rhs_maker_sumab_cuda(u%x_d, v%x_d, w%x_d, uu%x_d, vv%x_d, ww%x_d, & uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, & wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & @@ -289,7 +289,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & uu%dof%size()) #endif - + end subroutine rhs_maker_sumab_device subroutine rhs_maker_ext_device(fx_lag, fy_lag, fz_lag, & @@ -322,12 +322,11 @@ subroutine rhs_maker_ext_device(fx_lag, fy_lag, fz_lag, & fx_d, fy_d, fz_d, rho, & ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n) #endif - + end subroutine rhs_maker_ext_device - subroutine scalar_rhs_maker_ext_device(temp1, fs_lag, fs_laglag, fs, & + subroutine scalar_rhs_maker_ext_device(fs_lag, fs_laglag, fs, & rho, ext_coeffs, n) - type(field_t), intent(inout) :: temp1 type(field_t), intent(inout) :: fs_lag type(field_t), intent(inout) :: fs_laglag real(kind=rp), intent(inout) :: rho, ext_coeffs(4) @@ -347,14 +346,14 @@ subroutine scalar_rhs_maker_ext_device(temp1, fs_lag, fs_laglag, fs, & call scalar_rhs_maker_ext_opencl(fs_lag%x_d, fs_laglag%x_d, fs_d, rho, & ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n) #endif - + end subroutine scalar_rhs_maker_ext_device subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, & - u, v, w, B, rho, dt, bd, nbd, n) + u, v, w, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd type(field_t), intent(in) :: u, v, w - type(field_series_t), intent(in) :: ulag, vlag, wlag + type(field_series_t), intent(in) :: ulag, vlag, wlag real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) @@ -364,7 +363,7 @@ subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, & bfy_d = device_get_ptr(bfy) bfz_d = device_get_ptr(bfz) B_d = device_get_ptr(B) - + #ifdef HAVE_HIP call rhs_maker_bdf_hip(ulag%lf(1)%x_d, ulag%lf(2)%x_d, & vlag%lf(1)%x_d, vlag%lf(2)%x_d, & @@ -387,12 +386,10 @@ subroutine rhs_maker_bdf_device(ulag, vlag, wlag, bfx, bfy, bfz, & end subroutine rhs_maker_bdf_device - subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, & - bd, nbd, n) + subroutine scalar_rhs_maker_bdf_device(s_lag, fs, s, B, rho, dt, & + bd, nbd, n) integer, intent(in) :: n, nbd - type(field_t), intent(inout) :: temp1 - type(field_t), intent(in) :: s - type(field_t), intent(inout) :: temp2 + type(field_t), intent(in) :: s type(field_series_t), intent(in) :: s_lag real(kind=rp), intent(inout) :: fs(n) real(kind=rp), intent(in) :: B(n) @@ -401,7 +398,7 @@ subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, & fs_d = device_get_ptr(fs) B_d = device_get_ptr(B) - + #ifdef HAVE_HIP call scalar_rhs_maker_bdf_hip(s_lag%lf(1)%x_d, s_lag%lf(2)%x_d, & fs_d, s%x_d, B_d, rho, dt, bd(2), bd(3), bd(4), & @@ -417,5 +414,5 @@ subroutine scalar_rhs_maker_bdf_device(temp1, temp2, s_lag, fs, s, B, rho, dt, & #endif end subroutine scalar_rhs_maker_bdf_device - + end module rhs_maker_device diff --git a/src/common/bcknd/sx/rhs_maker_sx.f90 b/src/common/bcknd/sx/rhs_maker_sx.f90 index 7bfa7eedc13..abe5357e805 100644 --- a/src/common/bcknd/sx/rhs_maker_sx.f90 +++ b/src/common/bcknd/sx/rhs_maker_sx.f90 @@ -6,7 +6,7 @@ module rhs_maker_sx use scratch_registry, only : neko_scratch_registry implicit none private - + type, public, extends(rhs_maker_sumab_t) :: rhs_maker_sumab_sx_t contains procedure, nopass :: compute_fluid => rhs_maker_sumab_sx @@ -23,7 +23,7 @@ module rhs_maker_sx procedure, nopass :: compute_fluid => rhs_maker_bdf_sx procedure, nopass :: compute_scalar => scalar_rhs_maker_bdf_sx end type rhs_maker_bdf_sx_t - + contains subroutine rhs_maker_sumab_sx(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) @@ -49,7 +49,7 @@ subroutine rhs_maker_sumab_sx(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) w%x(i,1,1,1) = w%x(i,1,1,1) + ab(3) * wwlag%lf(2)%x(i,1,1,1) end do end if - + end subroutine rhs_maker_sumab_sx subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, & @@ -63,7 +63,7 @@ subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, & integer :: i type(field_t), pointer :: temp1, temp2, temp3 integer :: temp_indices(3) - + call neko_scratch_registry%request_field(temp1, temp_indices(1)) call neko_scratch_registry%request_field(temp2, temp_indices(2)) call neko_scratch_registry%request_field(temp3, temp_indices(3)) @@ -93,18 +93,20 @@ subroutine rhs_maker_ext_sx(fx_lag, fy_lag, fz_lag, & end do call neko_scratch_registry%relinquish_field(temp_indices) - + end subroutine rhs_maker_ext_sx - subroutine scalar_rhs_maker_ext_sx(temp1, fs_lag, fs_laglag, fs, rho, & - ext_coeffs, n) - type(field_t), intent(inout) :: temp1 + subroutine scalar_rhs_maker_ext_sx(fs_lag, fs_laglag, fs, rho, ext_coeffs, n) type(field_t), intent(inout) :: fs_lag type(field_t), intent(inout) :: fs_laglag real(kind=rp), intent(inout) :: rho, ext_coeffs(4) integer, intent(in) :: n real(kind=rp), intent(inout) :: fs(n) integer :: i + type(field_t), pointer :: temp1 + integer :: temp_index + + call neko_scratch_registry%request_field(temp1, temp_index) do i = 1, n temp1%x(i,1,1,1) = ext_coeffs(2) * fs_lag%x(i,1,1,1) + & @@ -119,14 +121,15 @@ subroutine scalar_rhs_maker_ext_sx(temp1, fs_lag, fs_laglag, fs, rho, & do i = 1, n fs(i) = (ext_coeffs(1) * fs(i) + temp1%x(i,1,1,1)) * rho end do - + + call neko_scratch_registry%relinquish_field(temp_index) end subroutine scalar_rhs_maker_ext_sx subroutine rhs_maker_bdf_sx(ulag, vlag, wlag, bfx, bfy, bfz, & - u, v, w, B, rho, dt, bd, nbd, n) + u, v, w, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd type(field_t), intent(in) :: u, v, w - type(field_series_t), intent(in) :: ulag, vlag, wlag + type(field_series_t), intent(in) :: ulag, vlag, wlag real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) @@ -172,16 +175,19 @@ subroutine rhs_maker_bdf_sx(ulag, vlag, wlag, bfx, bfy, bfz, & end subroutine rhs_maker_bdf_sx - subroutine scalar_rhs_maker_bdf_sx(temp1, temp2, s_lag, fs, s, B, rho, dt, & - bd, nbd, n) + subroutine scalar_rhs_maker_bdf_sx(s_lag, fs, s, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd - type(field_t), intent(inout) :: temp1, temp2 type(field_t), intent(in) :: s type(field_series_t), intent(in) :: s_lag real(kind=rp), intent(inout) :: fs(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) integer :: i, ilag + type(field_t), pointer :: temp1, temp2 + integer :: temp_indices(2) + + call neko_scratch_registry%request_field(temp1, temp_indices(1)) + call neko_scratch_registry%request_field(temp2, temp_indices(2)) do i = 1, n temp2%x(i,1,1,1) = s%x(i,1,1,1) * B(i) * bd(2) @@ -201,6 +207,7 @@ subroutine scalar_rhs_maker_bdf_sx(temp1, temp2, s_lag, fs, s, B, rho, dt, & fs(i) = fs(i) + temp2%x(i,1,1,1) * (rho / dt) end do + call neko_scratch_registry%relinquish_field(temp_indices) end subroutine scalar_rhs_maker_bdf_sx end module rhs_maker_sx diff --git a/src/common/checkpoint.f90 b/src/common/checkpoint.f90 index cec1a8a4811..d769080cbb2 100644 --- a/src/common/checkpoint.f90 +++ b/src/common/checkpoint.f90 @@ -69,6 +69,7 @@ module checkpoint type(field_t), pointer :: abz1 => null() type(field_t), pointer :: abz2 => null() + type(field_t), pointer :: s => null() type(field_series_t), pointer :: slag => null() @@ -110,14 +111,14 @@ subroutine chkp_init(this, u, v, w, p) if ( u%msh%nelv .ne. p%msh%nelv ) then call neko_error('Velocity and pressure defined on different meshes') end if - + this%u => u this%v => v this%w => w this%p => p this%t = 0d0 - + end subroutine chkp_init !> Reset checkpoint @@ -132,7 +133,7 @@ subroutine chkp_free(this) nullify(this%ulag) nullify(this%vlag) nullify(this%wlag) - + end subroutine chkp_free !> Synchronize checkpoint with device @@ -146,57 +147,57 @@ subroutine chkp_sync_host(this) if (associated(this%u) .and. associated(this%v) .and. & associated(this%w) .and. associated(this%p)) then - call device_memcpy(u%x, u%x_d, u%dof%size(), DEVICE_TO_HOST) - call device_memcpy(v%x, v%x_d, v%dof%size(), DEVICE_TO_HOST) - call device_memcpy(w%x, w%x_d, w%dof%size(), DEVICE_TO_HOST) - call device_memcpy(p%x, p%x_d, p%dof%size(), DEVICE_TO_HOST) + call device_memcpy(u%x, u%x_d, u%dof%size(), DEVICE_TO_HOST, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), DEVICE_TO_HOST, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), DEVICE_TO_HOST, sync=.false.) + call device_memcpy(p%x, p%x_d, p%dof%size(), DEVICE_TO_HOST, sync=.false.) end if - + if (associated(this%ulag) .and. associated(this%vlag) .and. & associated(this%wlag)) then call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, & - u%dof%size(), DEVICE_TO_HOST) + u%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, & - u%dof%size(), DEVICE_TO_HOST) - + u%dof%size(), DEVICE_TO_HOST, sync=.false.) + call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, & - v%dof%size(), DEVICE_TO_HOST) + v%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, & - v%dof%size(), DEVICE_TO_HOST) - + v%dof%size(), DEVICE_TO_HOST, sync=.false.) + call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abx1%x, this%abx1%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abx2%x, this%abx2%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%aby1%x, this%aby1%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%aby2%x, this%aby2%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abz1%x, this%abz1%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abz2%x, this%abz2%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) end if if (associated(this%s)) then call device_memcpy(this%s%x, this%s%x_d, & - this%s%dof%size(), DEVICE_TO_HOST) + this%s%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, & - this%s%dof%size(), DEVICE_TO_HOST) + this%s%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, & - this%s%dof%size(), DEVICE_TO_HOST) + this%s%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abs1%x, this%abs1%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) call device_memcpy(this%abs2%x, this%abs2%x_d, & - w%dof%size(), DEVICE_TO_HOST) + w%dof%size(), DEVICE_TO_HOST, sync=.false.) end if end associate - call device_sync() + call device_sync(glb_cmd_queue) end if - + end subroutine chkp_sync_host !> Synchronize device with checkpoint @@ -210,41 +211,54 @@ subroutine chkp_sync_device(this) if (associated(this%u) .and. associated(this%v) .and. & associated(this%w)) then - call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE) - call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE) - call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE) + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(p%x, p%x_d, p%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if - + if (associated(this%ulag) .and. associated(this%vlag) .and. & associated(this%wlag)) then - call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, & - u%dof%size(), HOST_TO_DEVICE) - call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, & - u%dof%size(), HOST_TO_DEVICE) - - call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, & - v%dof%size(), HOST_TO_DEVICE) - call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, & - v%dof%size(), HOST_TO_DEVICE) - - call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & - w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + + call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + + call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if if (associated(this%s)) then - call device_memcpy(this%s%x, this%s%x_d, & - this%s%dof%size(), HOST_TO_DEVICE) + call device_memcpy(this%s%x, this%s%x_d, this%s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + + call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, & + this%s%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, & + this%s%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abs1%x, this%abs1%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abs2%x, this%abs2%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) end if end associate end if - + end subroutine chkp_sync_device !> Add lagged velocity terms subroutine chkp_add_lag(this, ulag, vlag, wlag) - class(chkp_t), intent(inout) :: this + class(chkp_t), intent(inout) :: this type(field_series_t), target :: ulag type(field_series_t), target :: vlag type(field_series_t), target :: wlag @@ -252,16 +266,16 @@ subroutine chkp_add_lag(this, ulag, vlag, wlag) this%ulag => ulag this%vlag => vlag this%wlag => wlag - + end subroutine chkp_add_lag !> Add scalars subroutine chkp_add_scalar(this, s) - class(chkp_t), intent(inout) :: this + class(chkp_t), intent(inout) :: this type(field_t), target :: s this%s => s - + end subroutine chkp_add_scalar @@ -272,5 +286,5 @@ pure function chkp_restart_time(this) result(rtime) rtime = this%t end function chkp_restart_time - + end module checkpoint diff --git a/src/common/cpuid.c b/src/common/cpuid.c index 9d1ac211f7d..5036d9a786a 100644 --- a/src/common/cpuid.c +++ b/src/common/cpuid.c @@ -35,6 +35,7 @@ void system_cpuid(char *name, int len) { /* Generic ARM unless we found something known */ strncpy(name, "ARM", len); int cpufj = 0; + int cpuarm = 0; #endif while (fgets (buf, MAXLEN, fp)) { #if defined(_ARCH_PPC64) @@ -52,6 +53,10 @@ void system_cpuid(char *name, int len) { cpufj = 1; continue; } + else if(strstr(token, "0x41")) { + cpuarm = 1; + continue; + } } if (strstr(buf, "CPU part") && cpufj) { @@ -60,7 +65,16 @@ void system_cpuid(char *name, int len) { if (strstr(token, "0x001")) { strncpy(name, "A64FX", len); break; - } + } + } + + if (strstr(buf, "CPU part") && cpuarm) { + char *token = strtok (buf, delim); + token = strtok (NULL, delim); + if (strstr(token, "0xd4f")) { + strncpy(name, "ARM Neoverse V2", len); + break; + } } #endif } diff --git a/src/common/craypat.F90 b/src/common/craypat.F90 index dac30814671..8f3dba40a27 100644 --- a/src/common/craypat.F90 +++ b/src/common/craypat.F90 @@ -4,7 +4,7 @@ module craypat use stack, only : stack_i4_t use utils, only : neko_error implicit none - + type(stack_i4_t), private :: region_depth logical, private :: craypat_on = .false. #ifdef CRAYPAT @@ -31,7 +31,7 @@ end subroutine craypat_record_stop subroutine craypat_region_begin(name) character(kind=c_char,len=*) :: name integer :: ierr, region_id - + if (craypat_on) then !> @todo Don't hardcode region names... if (name .eq. 'Time-Step') then @@ -43,13 +43,13 @@ subroutine craypat_region_begin(name) else if (name .eq. 'gather-scatter') then region_id = 4 else ! User defined region - region_id = 99 + region_id = 99 end if - + call region_depth%push(region_id) call PAT_region_begin(region_id, name, ierr) end if - + end subroutine craypat_region_begin !> End a CrayPat region @@ -62,9 +62,9 @@ subroutine craypat_region_end end if call PAT_region_end(region_depth%pop(), ierr) end if - + end subroutine craypat_region_end #endif - + end module craypat diff --git a/src/common/datadist.f90 b/src/common/datadist.f90 index 83d2f771590..27ba56e2e19 100644 --- a/src/common/datadist.f90 +++ b/src/common/datadist.f90 @@ -40,7 +40,7 @@ module datadist type(MPI_Comm) :: comm !< Communicator on which the dist. is defined integer :: pe_rank !< Pe's rank in the given distribution integer :: pe_size !< Size of communicator in the given dist. - integer :: L + integer :: L integer :: R integer :: M !< Total, global, size integer :: Ip !< Number of local values on this process @@ -48,7 +48,7 @@ module datadist !> Load-balanced linear distribution \f$ M = PL + R \f$ type, extends(dist_t) :: linear_dist_t - contains + contains procedure :: num_local => linear_dist_Ip procedure :: num_global => linear_dist_M procedure :: start_idx => linear_dist_start @@ -62,7 +62,7 @@ module datadist public :: linear_dist_t contains - + function linear_dist_init(n, rank, size, comm) result(this) integer, intent(in) :: n !< Total size integer :: rank !< PE's rank to define the dist. over @@ -74,11 +74,11 @@ function linear_dist_init(n, rank, size, comm) result(this) this%comm = comm this%pe_rank = rank this%pe_size = size - + this%L = floor(dble(this%M) / dble(this%pe_size)) this%R = modulo(this%M, this%pe_size) this%Ip = floor((dble(this%M) + dble(this%pe_size) - & - dble(this%pe_rank) - 1d0) / dble(this%pe_size)) + dble(this%pe_rank) - 1d0) / dble(this%pe_size)) end function linear_dist_init pure function linear_dist_Ip(this) result(n) diff --git a/src/common/distdata.f90 b/src/common/distdata.f90 index 5e58f5aaa43..2311a7d1315 100644 --- a/src/common/distdata.f90 +++ b/src/common/distdata.f90 @@ -40,14 +40,14 @@ module distdata type, public :: distdata_t type(stack_i4t2_t) :: shared_el_facet !< Elemenets with shared facets - + type(uset_i4_t) :: shared_facet !< List of shared facets type(uset_i4_t) :: shared_edge !< List of shared edges type(uset_i4_t) :: shared_point !< List of shared points - + integer, allocatable :: local_to_global_facet(:)!< Local to global (facets) integer, allocatable :: local_to_global_edge(:) !< Local to global (edges) - + end type distdata_t public :: distdata_init, distdata_free, distdata_set_shared_el_facet, & @@ -62,13 +62,13 @@ subroutine distdata_init(ddata) type(distdata_t), intent(inout) :: ddata call distdata_free(ddata) - + call ddata%shared_el_facet%init() call ddata%shared_facet%init() - call ddata%shared_edge%init() + call ddata%shared_edge%init() call ddata%shared_point%init() - + end subroutine distdata_init !> Free a distdata type @@ -76,7 +76,7 @@ subroutine distdata_free(ddata) type(distdata_t), intent(inout) :: ddata call ddata%shared_el_facet%free() - + call ddata%shared_facet%free() call ddata%shared_edge%free() call ddata%shared_point%free() @@ -84,11 +84,11 @@ subroutine distdata_free(ddata) if (allocated(ddata%local_to_global_facet)) then deallocate(ddata%local_to_global_facet) end if - + if (allocated(ddata%local_to_global_edge)) then deallocate(ddata%local_to_global_edge) end if - + end subroutine distdata_free !> Mark an element's facet as shared @@ -100,7 +100,7 @@ subroutine distdata_set_shared_el_facet(ddata, element, side) t%x = (/ element, side /) call ddata%shared_el_facet%push(t) - + end subroutine distdata_set_shared_el_facet !> Mark a facet as shared @@ -109,17 +109,17 @@ subroutine distdata_set_shared_facet(ddata, facet) integer, value :: facet !< Facet index (local numbering) call ddata%shared_facet%add(facet) - + end subroutine distdata_set_shared_facet - + !> Mark an element's edge as shared !! @attention only defined for elements where facet .ne. edges subroutine distdata_set_shared_edge(ddata, edge) type(distdata_t), intent(inout) :: ddata - integer, value :: edge !< Edge index (local numbering) + integer, value :: edge !< Edge index (local numbering) call ddata%shared_edge%add(edge) - + end subroutine distdata_set_shared_edge !> Mark a point as shared @@ -128,7 +128,7 @@ subroutine distdata_set_shared_point(ddata, point) integer, value :: point !< Point index (local numbering) call ddata%shared_point%add(point) - + end subroutine distdata_set_shared_point !> Set local to global mapping (facets) @@ -138,7 +138,7 @@ subroutine distdata_set_local_to_global_facet(ddata, local, global) integer, intent(in), value :: global !< Global facet index ddata%local_to_global_facet(local) = global - + end subroutine distdata_set_local_to_global_facet !> Set local to global mapping (edges) @@ -148,7 +148,7 @@ subroutine distdata_set_local_to_global_edge(ddata, local, global) integer, intent(in) , value :: global !< Global edge index ddata%local_to_global_edge(local) = global - + end subroutine distdata_set_local_to_global_edge - + end module distdata diff --git a/src/common/global_interpolation.F90 b/src/common/global_interpolation.F90 index ed58ab06743..5d9e703182b 100644 --- a/src/common/global_interpolation.F90 +++ b/src/common/global_interpolation.F90 @@ -34,7 +34,7 @@ !! @note This modules uses functions from `gslib`, namely `findpts_setup`, !! `findpts`, and `findpts_eval`. A full description of these subroutines can !! be found at https://github.com/Nek5000/gslib/blob/master/src/findpts.c -!! +!! module global_interpolation use num_types, only: rp use space, only: space_t @@ -49,13 +49,13 @@ module global_interpolation use, intrinsic :: iso_c_binding implicit none private - !> Implements global interpolation for arbitrary points in the domain. + !> Implements global interpolation for arbitrary points in the domain. type, public :: global_interpolation_t !> Dofmap from which we interpolate the points type(dofmap_t), pointer :: dof !> Mesh on which we interpolate type(mesh_t), pointer :: mesh - !> Space + !> Space type(space_t), pointer :: Xh !> Interpolator for local points type(local_interpolator_t) :: local_interp @@ -123,14 +123,14 @@ subroutine global_interpolation_init(this, dof, tol) if(present(tol)) this%tol = tol #ifdef HAVE_GSLIB - + lx = this%Xh%lx ly = this%Xh%ly lz = this%Xh%lz nelv = this%mesh%nelv !Number of points to iterate on simultaneosuly max_pts_per_iter = 128 - + call fgslib_findpts_setup(this%gs_handle, & NEKO_COMM, pe_size, & this%mesh%gdim, & @@ -159,10 +159,10 @@ subroutine global_interpolation_free(this) call this%free_points() #ifdef HAVE_GSLIB - if (this%gs_init) then - call fgslib_findpts_free(this%gs_handle) - this%gs_init = .false. - end if + if (this%gs_init) then + call fgslib_findpts_free(this%gs_handle) + this%gs_init = .false. + end if #endif end subroutine global_interpolation_free @@ -186,8 +186,8 @@ subroutine global_interpolation_free_points(this) end if end subroutine global_interpolation_free_points - - !> Common routine for finding the points. + + !> Common routine for finding the points. subroutine global_interpolation_find_common(this) class(global_interpolation_t), intent(inout) :: this !!Perhaps this should be kind dp @@ -243,22 +243,22 @@ subroutine global_interpolation_find_common(this) allocate(x_check(this%n_points)) allocate(y_check(this%n_points)) allocate(z_check(this%n_points)) - + call fgslib_findpts_eval(this%gs_handle, x_check, & 1, this%error_code, 1, & - this%proc_owner, 1, this%el_owner, 1, & + this%proc_owner, 1, this%el_owner, 1, & this%rst, this%mesh%gdim, & this%n_points, this%dof%x) - + call fgslib_findpts_eval(this%gs_handle, y_check, & 1, this%error_code, 1, & - this%proc_owner, 1, this%el_owner, 1, & + this%proc_owner, 1, this%el_owner, 1, & this%rst, this%mesh%gdim, & this%n_points, this%dof%y) - + call fgslib_findpts_eval(this%gs_handle, z_check, & 1, this%error_code, 1, & - this%proc_owner, 1, this%el_owner, 1, & + this%proc_owner, 1, this%el_owner, 1, & this%rst, this%mesh%gdim, & this%n_points, this%dof%z) @@ -286,7 +286,7 @@ subroutine global_interpolation_find_common(this) end if end do - + deallocate(x_check) deallocate(y_check) deallocate(z_check) @@ -299,8 +299,8 @@ subroutine global_interpolation_find_common(this) call neko_error('Neko needs to be built with GSLIB support') #endif end subroutine global_interpolation_find_common - - !> Finds the corresponding r,s,t coordinates + + !> Finds the corresponding r,s,t coordinates !! in the correct global element as well as which process that owns the point. !! After this the values at these points can be evaluated. !! If the locations of the points change this must be called again. @@ -328,7 +328,7 @@ subroutine global_interpolation_find_coords(this, x, y, z, n_points) this%n_points = n_points call global_interpolation_init_point_arrays(this) - + do i = 1, n_points this%xyz(1,i) = x(i,1,1,1) this%xyz(2,i) = y(i,1,1,1) @@ -354,7 +354,7 @@ subroutine global_interpolation_init_point_arrays(this) end subroutine global_interpolation_init_point_arrays - !> Finds the corresponding r,s,t coordinates + !> Finds the corresponding r,s,t coordinates !! in the correct global element as well as which process that owns the point. !! After this the values at these points can be evaluated. !! If the locations of the points change this must be called again. @@ -376,7 +376,7 @@ subroutine global_interpolation_find_xyz(this, xyz, n_points) this%n_points = n_points call global_interpolation_init_point_arrays(this) - + !> make deep copy incase xyz goes out of scope or deallocated call copy(this%xyz,xyz,3*n_points) @@ -406,7 +406,7 @@ subroutine global_interpolation_find_and_redist(this, xyz, n_points) this%n_points = n_points call global_interpolation_init_point_arrays(this) - + !> make deep copy incase xyz goes out of scope or deallocated call copy(this%xyz,xyz,3*n_points) @@ -414,14 +414,14 @@ subroutine global_interpolation_find_and_redist(this, xyz, n_points) !> Sets new points and redistributes them call global_interpolation_redist(this) call global_interpolation_find_common(this) - + do i = 1, this%n_points if (this%proc_owner(i) .ne. pe_rank) then write(*,*) 'Redistribution failed on rank: ', pe_rank,& 'for point with coord: ', & this%xyz(1,i),this%xyz(2,i),this%xyz(3,i) exit - end if + end if end do n_points = this%n_points @@ -462,7 +462,7 @@ subroutine global_interpolation_redist(this) call MPI_Gather(n_points_per_pe(i), 1, MPI_INTEGER,& n_points_from_pe, 1, MPI_INTEGER, i, NEKO_COMM, ierr) end do - + allocate(n_point_offset_from_pe(0:(pe_size-1))) n_point_offset_from_pe(0) = 0 do i = 1,(pe_size-1) @@ -522,7 +522,7 @@ subroutine global_interpolation_evaluate(this, interp_values, field) if (.not. this%all_points_local) then call fgslib_findpts_eval(this%gs_handle, interp_values, & 1, this%error_code, 1, & - this%proc_owner, 1, this%el_owner, 1, & + this%proc_owner, 1, this%el_owner, 1, & this%rst, this%mesh%gdim, & this%n_points, field) else diff --git a/src/common/jobctrl.f90 b/src/common/jobctrl.f90 index 16abd336c44..5c415cd41c1 100644 --- a/src/common/jobctrl.f90 +++ b/src/common/jobctrl.f90 @@ -39,7 +39,7 @@ module jobctrl use logger implicit none private - + interface jobctrl_set_time_limit module procedure jobctrl_set_time_limit_sec, jobctrl_set_time_limit_str end interface jobctrl_set_time_limit @@ -70,7 +70,7 @@ subroutine jobctrl_set_time_limit_str(limit_str) integer :: str_len, sep_h, h, m, s str_len = len_trim(limit_str) - + if (str_len .lt. 8) then call neko_error('Invalid job limit') end if @@ -86,9 +86,9 @@ subroutine jobctrl_set_time_limit_str(limit_str) read(limit_str(sep_h+4:str_len), *) s call jobctrl_set_time_limit_sec(h*3600 + m * 60 + s) - + end subroutine jobctrl_set_time_limit_str - + !> Set a job's time limit (in seconds) subroutine jobctrl_set_time_limit_sec(sec) integer :: sec @@ -98,9 +98,9 @@ subroutine jobctrl_set_time_limit_sec(sec) jstop_sec = sec - jobctrl_jobtime() call signal_set_timeout(jstop_sec) end if - + end subroutine jobctrl_set_time_limit_sec - + !> Check if the job's time limit has been reached function jobctrl_time_limit() result(jstop) logical :: jstop @@ -110,13 +110,14 @@ function jobctrl_time_limit() result(jstop) jstop = (signal_timeout() .or. signal_usr(1)) if (jstop) then + ! Todo: This might be a warning instead of a message? write(log_buf, '(A)') '! stop at job limit >>>' - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) end if ! Let rank zero decide if we should stop call MPI_Bcast(jstop, 1, MPI_LOGICAL, 0, NEKO_COMM, ierr) - + end function jobctrl_time_limit !> Returns a job's time in seconds relative to the first call @@ -124,13 +125,13 @@ function jobctrl_jobtime() result(jobtime) real(kind=rp), save :: stime real(kind=rp) :: jobtime logical, save :: init = .false. - + if (.not. init) then stime = MPI_WTIME() init = .true. end if - + jobtime = MPI_WTIME() - stime end function jobctrl_jobtime - + end module jobctrl diff --git a/src/common/json_utils.f90 b/src/common/json_utils.f90 index 0d997d8a05f..0a3cda4270c 100644 --- a/src/common/json_utils.f90 +++ b/src/common/json_utils.f90 @@ -32,162 +32,199 @@ ! !> Utilities for retrieving parameters from the case files. module json_utils - use num_types, only : rp + use num_types, only : rp, dp, sp use json_module, only : json_file, json_value, json_core use utils, only : neko_error implicit none private - public :: json_get, json_get_or_default - + public :: json_get, json_get_or_default, json_extract_item + !> Retrieves a parameter by name or throws an error interface json_get - module procedure json_get_real, json_get_integer, json_get_logical, & - json_get_string, json_get_real_array, & - json_get_integer_array, json_get_logical_array, & - json_get_string_array + module procedure json_get_real, json_get_double, json_get_integer, & + json_get_logical, json_get_string, json_get_real_array, & + json_get_double_array, json_get_integer_array, json_get_logical_array, & + json_get_string_array end interface json_get !> Retrieves a parameter by name or assigns a provided default value. !! In the latter case also adds the missing paramter to the json interface json_get_or_default - module procedure json_get_or_default_real, json_get_or_default_integer,& - json_get_or_default_string, json_get_or_default_logical + module procedure json_get_or_default_real,json_get_or_default_double, & + json_get_or_default_integer, & + json_get_or_default_string, json_get_or_default_logical end interface json_get_or_default - + contains - + !> Retrieves a real parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_real(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name - real(kind=rp), intent(out) :: value + real(kind=sp), intent(out) :: value logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_real + !> Retrieves a double precision real parameter by name or throws an error + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. + subroutine json_get_double(json, name, value) + type(json_file), intent(inout) :: json + character(len=*), intent(in) :: name + real(kind=dp), intent(out) :: value + logical :: found + + call json%get(name, value, found) + + if (.not. found) then + call neko_error("Parameter "//name//" missing from the case file") + end if + end subroutine json_get_double + !> Retrieves an integer parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_integer(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name integer, intent(out) :: value logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_integer !> Retrieves a logical parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_logical(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name logical, intent(out) :: value logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_logical !> Retrieves a string parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_string(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name character(len=:), allocatable, intent(out) :: value logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_string !> Retrieves a real array parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_real_array(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name - real(kind=rp), allocatable, intent(out) :: value(:) + real(kind=sp), allocatable, intent(out) :: value(:) logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_real_array + !> Retrieves a real array parameter by name or throws an error + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. + subroutine json_get_double_array(json, name, value) + type(json_file), intent(inout) :: json + character(len=*), intent(in) :: name + real(kind=dp), allocatable, intent(out) :: value(:) + logical :: found + + call json%get(name, value, found) + + if (.not. found) then + call neko_error("Parameter "//name//" missing from the case file") + end if + end subroutine json_get_double_array + !> Retrieves a integer array parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_integer_array(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name integer, allocatable, intent(out) :: value(:) logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_integer_array !> Retrieves a logical array parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_logical_array(json, name, value) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name logical, allocatable, intent(out) :: value(:) logical :: found - + call json%get(name, value, found) - + if (.not. found) then call neko_error("Parameter "//name//" missing from the case file") end if end subroutine json_get_logical_array !> Retrieves a string array parameter by name or throws an error - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. - subroutine json_get_string_array(json, name, value) + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. + !! @param[in] filler The default string to fill empty array items with. + subroutine json_get_string_array(json, name, value, filler) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name character(len=*), allocatable, intent(out) :: value(:) - logical :: found, logical_val + character(len=*), optional, intent(in) :: filler + logical :: found type(json_value), pointer :: json_val, val_ptr type(json_core) :: core - character(len=:), allocatable :: string_value + character(len=:), allocatable :: string_value integer :: i, n_children if (.not. json%valid_path(name)) then @@ -201,55 +238,77 @@ subroutine json_get_string_array(json, name, value) deallocate(value) allocate(value(n_children)) end if - + call json%get(name, json_val, found) call json%get_core(core) do i = 1, n_children call core%get_child(json_val, i, val_ptr, found) call core%get(val_ptr, string_value) - + if (len(string_value) .gt. 0) then value(i) = string_value + else if(present(filler)) then + value(i) = filler end if end do - + end subroutine json_get_string_array !> Retrieves a real parameter by name or assigns a provided default value. !! In the latter case also adds the missing paramter to the json. - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_or_default_real(json, name, value, default) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name - real(kind=rp), intent(out) :: value - real(kind=rp), intent(in) :: default + real(kind=sp), intent(out) :: value + real(kind=sp), intent(in) :: default logical :: found - + call json%get(name, value, found) - + if (.not. found) then value = default call json%add(name, value) end if end subroutine json_get_or_default_real + !> Retrieves a real parameter by name or assigns a provided default value. + !! In the latter case also adds the missing paramter to the json. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. + subroutine json_get_or_default_double(json, name, value, default) + type(json_file), intent(inout) :: json + character(len=*), intent(in) :: name + real(kind=dp), intent(out) :: value + real(kind=dp), intent(in) :: default + logical :: found + + call json%get(name, value, found) + + if (.not. found) then + value = default + call json%add(name, value) + end if + end subroutine json_get_or_default_double + !> Retrieves an integer parameter by name or assigns a provided default value. !! In the latter case also adds the missing paramter to the json. - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_or_default_integer(json, name, value, default) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name integer, intent(out) :: value integer, intent(in) :: default logical :: found - + call json%get(name, value, found) - + if (.not. found) then value = default call json%add(name, value) @@ -258,18 +317,18 @@ end subroutine json_get_or_default_integer !> Retrieves a logical parameter by name or assigns a provided default value. !! In the latter case also adds the missing paramter to the json. - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_or_default_logical(json, name, value, default) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name logical, intent(out) :: value logical, intent(in) :: default logical :: found - + call json%get(name, value, found) - + if (.not. found) then value = default call json%add(name, value) @@ -278,22 +337,42 @@ end subroutine json_get_or_default_logical !> Retrieves a string parameter by name or assigns a provided default value. !! In the latter case also adds the missing paramter to the json. - !! @param json The json to retrieve the parameter from. - !! @param name The full path to the parameter. - !! @value value The variable to be populated with the retrieved parameter. + !! @param[inout] json The json to retrieve the parameter from. + !! @param[in] name The full path to the parameter. + !! @param[out] value The variable to be populated with the retrieved parameter. subroutine json_get_or_default_string(json, name, value, default) type(json_file), intent(inout) :: json character(len=*), intent(in) :: name character(len=:), allocatable, intent(out) :: value character(len=*), intent(in) :: default logical :: found - + call json%get(name, value, found) - + if (.not. found) then value = default call json%add(name, value) end if end subroutine json_get_or_default_string + !> Extract `i`th item from a JSON array as a separate JSON object. + !! @param[inout] core JSON core object. + !! @param[in] array The JSON object with the array. + !! @param[in] i The index of the item to extract. + !! @param[inout] item JSON object object to be filled with the subdict. + subroutine json_extract_item(core, array, i, item) + type(json_core), intent(inout) :: core + type(json_value), pointer, intent(in) :: array + integer, intent(in) :: i + type(json_file), intent(inout) :: item + type(json_value), pointer :: ptr + logical :: found + character(len=:), allocatable :: buffer + + call core%get_child(array, i, ptr, found) + call core%print_to_string(ptr, buffer) + call item%load_from_string(buffer) + + end subroutine json_extract_item + end module json_utils diff --git a/src/common/log.f90 b/src/common/log.f90 index b003f250400..86ed3c685d6 100644 --- a/src/common/log.f90 +++ b/src/common/log.f90 @@ -38,7 +38,7 @@ module logger private integer, public, parameter :: LOG_SIZE = 80 - + type, public :: log_t integer :: indent_ integer :: section_id_ @@ -48,7 +48,7 @@ module logger procedure, pass(this) :: begin => log_begin procedure, pass(this) :: end => log_end procedure, pass(this) :: indent => log_indent - procedure, nopass :: newline => log_newline + procedure, nopass :: newline => log_newline procedure, pass(this) :: message => log_message procedure, pass(this) :: section => log_section procedure, pass(this) :: status => log_status @@ -56,10 +56,18 @@ module logger procedure, pass(this) :: warning => log_warning procedure, pass(this) :: end_section => log_end_section end type log_t - + !> Global log stream type(log_t), public :: neko_log - + !> Always logged + integer, public, parameter :: NEKO_LOG_QUIET = 0 + !> Default log level + integer, public, parameter :: NEKO_LOG_INFO = 1 + !> Verbose log level + integer, public, parameter :: NEKO_LOG_VERBOSE = 2 + !> Debug log level + integer, public, parameter :: NEKO_LOG_DEBUG = 10 + contains !> Initialize a log @@ -75,9 +83,9 @@ subroutine log_init(this) if (envvar_len .gt. 0) then read(log_level(1:envvar_len), *) this%level_ else - this%level_ = 1 + this%level_ = NEKO_LOG_INFO end if - + end subroutine log_init !> Increase indention level @@ -87,7 +95,7 @@ subroutine log_begin(this) if (pe_rank .eq. 0) then this%indent_ = this%indent_ + 1 end if - + end subroutine log_begin !> Decrease indention level @@ -97,20 +105,20 @@ subroutine log_end(this) if (pe_rank .eq. 0) then this%indent_ = this%indent_ - 1 end if - + end subroutine log_end - - !> Indent a log + + !> Indent a log subroutine log_indent(this) class(log_t), intent(in) :: this integer :: i if (pe_rank .eq. 0) then do i = 1, this%indent_ - write(*,'(A)', advance='no') ' ' + write(*,'(A)', advance='no') ' ' end do end if - + end subroutine log_indent !> Write a new line to a log @@ -119,26 +127,31 @@ subroutine log_newline if (pe_rank .eq. 0) then write(*,*) ' ' end if - + end subroutine log_newline !> Write a message to a log subroutine log_message(this, msg, lvl) class(log_t), intent(in) :: this character(len=*), intent(in) :: msg - integer, optional, intent(in) :: lvl + integer, optional :: lvl + integer :: lvl_ if (present(lvl)) then - if (lvl .gt. this%level_) then - return - end if + lvl_ = lvl + else + lvl_ = NEKO_LOG_INFO + end if + + if (lvl_ .gt. this%level_) then + return end if if (pe_rank .eq. 0) then call this%indent() write(*, '(A)') trim(msg) end if - + end subroutine log_message !> Write an error message to a log @@ -148,7 +161,7 @@ subroutine log_error(this, msg) if (pe_rank .eq. 0) then call this%indent() - write(*, '(A,A,A)') '*** ERROR: ', trim(msg),' ***' + write(*, '(A,A,A)') '*** ERROR: ', trim(msg),' ***' end if end subroutine log_error @@ -160,7 +173,7 @@ subroutine log_warning(this, msg) if (pe_rank .eq. 0) then call this%indent() - write(*, '(A,A,A)') '*** WARNING: ', trim(msg),' ***' + write(*, '(A,A,A)') '*** WARNING: ', trim(msg),' ***' end if end subroutine log_warning @@ -177,20 +190,20 @@ subroutine log_section(this, msg) this%section_id_ = this%section_id_ + 1 pre = (30 - len_trim(msg)) / 2 - + write(*,*) ' ' call this%indent() do i = 1, pre write(*,'(A)', advance='no') '-' end do - + write(*,'(A)', advance='no') trim(msg) do i = 1, 30 - (len_trim(msg) + pre) write(*,'(A)', advance='no') '-' end do write(*,*) ' ' end if - + end subroutine log_section !> End a log section @@ -199,16 +212,16 @@ subroutine log_end_section(this, msg) character(len=*), intent(in), optional :: msg if (present(msg)) then - call this%message(msg) + call this%message(msg, NEKO_LOG_QUIET) end if - + if (pe_rank .eq. 0) then this%section_id_ = this%section_id_ - 1 this%indent_ = this%indent_ - this%section_id_ end if - - end subroutine log_end_Section - + + end subroutine log_end_section + !> Write status banner !! @todo move to a future Time module subroutine log_status(this, t, T_end) @@ -218,28 +231,30 @@ subroutine log_status(this, t, T_end) character(len=LOG_SIZE) :: log_buf real(kind=rp) :: t_prog - t_prog = 100d0 * t / T_end + t_prog = 100d0 * t / T_end - call this%message('----------------------------------------------------------------') - write(log_buf, '(A,E15.7,A,F6.2,A)') 't = ', t,& - ' [ ',t_prog,'% ]' + call this%message('----------------------------------------------------------------', & + NEKO_LOG_QUIET) + write(log_buf, '(A,E15.7,A,F6.2,A)') & + 't = ', t, ' [ ',t_prog,'% ]' - call this%message(log_buf) - call this%message('----------------------------------------------------------------') + call this%message(log_buf, NEKO_LOG_QUIET) + call this%message('----------------------------------------------------------------', & + NEKO_LOG_QUIET) end subroutine log_status - + ! ! Rudimentary C interface ! !> Write a message to a log (from C) - !! @note This assumes the global log stream @a neko_log - subroutine log_message_c(c_msg) bind(c, name='log_message') + !! @note This assumes the global log stream @a neko_log + subroutine log_message_c(c_msg) bind(c, name='log_message') use, intrinsic :: iso_c_binding character(kind=c_char), dimension(*), intent(in) :: c_msg character(len=LOG_SIZE) :: msg integer :: len, i - + if (pe_rank .eq. 0) then len = 0 do @@ -247,21 +262,21 @@ subroutine log_message_c(c_msg) bind(c, name='log_message') len = len + 1 msg(len:len) = c_msg(len) end do - + call neko_log%indent() write(*, '(A)') trim(msg(1:len)) end if - + end subroutine log_message_c !> Write an error message to a log (from C) - !! @note This assumes the global log stream @a neko_log + !! @note This assumes the global log stream @a neko_log subroutine log_error_c(c_msg) bind(c, name="log_error") use, intrinsic :: iso_c_binding character(kind=c_char), dimension(*), intent(in) :: c_msg character(len=LOG_SIZE) :: msg integer :: len, i - + if (pe_rank .eq. 0) then len = 0 do @@ -269,21 +284,21 @@ subroutine log_error_c(c_msg) bind(c, name="log_error") len = len + 1 msg(len:len) = c_msg(len) end do - + call neko_log%indent() - write(*, '(A,A,A)') '*** ERROR: ',trim(msg(1:len)),' ***' + write(*, '(A,A,A)') '*** ERROR: ',trim(msg(1:len)),' ***' end if - + end subroutine log_error_c !> Write a warning message to a log (from C) - !! @note This assumes the global log stream @a neko_log + !! @note This assumes the global log stream @a neko_log subroutine log_warning_c(c_msg) bind(c, name="log_warning") use, intrinsic :: iso_c_binding character(kind=c_char), dimension(*), intent(in) :: c_msg character(len=LOG_SIZE) :: msg integer :: len, i - + if (pe_rank .eq. 0) then len = 0 do @@ -291,21 +306,21 @@ subroutine log_warning_c(c_msg) bind(c, name="log_warning") len = len + 1 msg(len:len) = c_msg(len) end do - + call neko_log%indent() - write(*, '(A,A,A)') '*** WARNING: ',trim(msg(1:len)),' ***' + write(*, '(A,A,A)') '*** WARNING: ',trim(msg(1:len)),' ***' end if - + end subroutine log_warning_c !> Begin a new log section (from C) - !! @note This assumes the global log stream @a neko_log + !! @note This assumes the global log stream @a neko_log subroutine log_section_c(c_msg) bind(c, name="log_section") use, intrinsic :: iso_c_binding character(kind=c_char), dimension(*), intent(in) :: c_msg character(len=LOG_SIZE) :: msg integer :: len, i - + if (pe_rank .eq. 0) then len = 0 do @@ -316,15 +331,15 @@ subroutine log_section_c(c_msg) bind(c, name="log_section") call neko_log%section(trim(msg(1:len))) end if - + end subroutine log_section_c !> End a log section (from C) !! @note This assumes the global log stream @a neko_log subroutine log_end_section_c() bind(c, name="log_end_section") - + call neko_log%end_section() - + end subroutine log_end_section_c - + end module logger diff --git a/src/common/material_properties.f90 b/src/common/material_properties.f90 index 950d0374b00..bb0bd69850f 100644 --- a/src/common/material_properties.f90 +++ b/src/common/material_properties.f90 @@ -33,16 +33,16 @@ !> Implements material_properties_t type. module material_properties use num_types, only: rp - use json_utils, only : json_get, json_get_or_default - use json_module, only : json_file, json_core, json_value - use logger, only : neko_log, LOG_SIZE + use json_utils, only : json_get + use json_module, only : json_file + use logger, only : neko_log, LOG_SIZE, NEKO_LOG_VERBOSE use user_intf, only : user_t, dummy_user_material_properties, & user_material_properties use utils, only : neko_warning, neko_error use comm, only : pe_rank implicit none private - + !> Contains all the material properties necessary in the simulation. type, public :: material_properties_t !> Density, f$\rho \f$. @@ -58,7 +58,7 @@ module material_properties procedure, pass(this) :: init => material_properties_init !> Write final dimensional values to the log. procedure, private, pass(this) :: write_to_log - end type material_properties_t + end type material_properties_t contains @@ -79,10 +79,10 @@ subroutine material_properties_init(this, params, user) if (.not. associated(user%material_properties, dummy_mp_ptr)) then - write(log_buf, '(A)') "Material properties must be set in the user& - & file!" - call neko_log%message(log_buf) - call user%material_properties(0.0_rp, 0, this%rho, this%mu, & + write(log_buf, '(A)') "Material properties must be set in the user& + & file!" + call neko_log%message(log_buf) + call user%material_properties(0.0_rp, 0, this%rho, this%mu, & this%cp, this%lambda, params) else @@ -94,19 +94,19 @@ subroutine material_properties_init(this, params, user) if (params%valid_path('case.fluid.Re') .and. & (params%valid_path('case.fluid.mu') .or. & params%valid_path('case.fluid.rho'))) then - call neko_error("To set the material properties for the fluid,& - & either provide Re OR mu and rho in the case file.") + call neko_error("To set the material properties for the fluid,& + & either provide Re OR mu and rho in the case file.") - ! Non-dimensional case + ! Non-dimensional case else if (params%valid_path('case.fluid.Re')) then nondimensional = .true. write(log_buf, '(A)') 'Non-dimensional fluid material properties & - & input.' - call neko_log%message(log_buf, lvl=2) + & input.' + call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE) write(log_buf, '(A)') 'Density will be set to 1, dynamic viscosity to& - & 1/Re.' - call neko_log%message(log_buf, lvl=2) + & 1/Re.' + call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE) ! Read Re into mu for further manipulation. call json_get(params, 'case.fluid.Re', this%mu) @@ -119,8 +119,8 @@ subroutine material_properties_init(this, params, user) this%rho = 1.0_rp ! Invert the Re to get viscosity. this%mu = 1.0_rp/this%mu - ! Dimensional case - else + ! Dimensional case + else call json_get(params, 'case.fluid.mu', this%mu) call json_get(params, 'case.fluid.rho', this%rho) end if @@ -129,33 +129,33 @@ subroutine material_properties_init(this, params, user) ! Scalar ! if (.not. params%valid_path('case.scalar')) then - ! Set dummy values - this%cp = 1.0_rp - this%lambda = 1.0_rp - call this%write_to_log(.false.) - return + ! Set dummy values + this%cp = 1.0_rp + this%lambda = 1.0_rp + call this%write_to_log(.false.) + return end if ! Incorrect user input if (nondimensional .and. & (params%valid_path('case.scalar.lambda') .or. & params%valid_path('case.scalar.cp'))) then - call neko_error("For non-dimensional setup set the Pe number for& - & the scalar") + call neko_error("For non-dimensional setup set the Pe number for& + & the scalar") else if (.not. nondimensional .and. & params%valid_path('case.scalar.Pe')) then - call neko_error("Dimensional material properties input detected,& - & because you set rho and mu for the fluid. & - & Please set cp and lambda for the scalar.") + call neko_error("Dimensional material properties input detected,& + & because you set rho and mu for the fluid. & + & Please set cp and lambda for the scalar.") - ! Non-dimensional case + ! Non-dimensional case else if (nondimensional) then write(log_buf, '(A)') 'Non-dimensional scalar material properties & - & input.' - call neko_log%message(log_buf, lvl=2) + & input.' + call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE) write(log_buf, '(A)') 'Specific heat capacity will be set to 1, & - & conductivity to 1/Pe.' - call neko_log%message(log_buf, lvl=2) + & conductivity to 1/Pe.' + call neko_log%message(log_buf, lvl=NEKO_LOG_VERBOSE) ! Read Pe into lambda for further manipulation. call json_get(params, 'case.scalar.Pe', this%lambda) @@ -167,8 +167,8 @@ subroutine material_properties_init(this, params, user) this%rho = 1.0_rp ! Invert the Pe to get conductivity this%lambda = 1.0_rp/this%lambda - ! Dimensional case - else + ! Dimensional case + else call json_get(params, 'case.scalar.lambda', this%lambda) call json_get(params, 'case.scalar.cp', this%cp) end if diff --git a/src/common/profiler.F90 b/src/common/profiler.F90 index 44b63ed642c..ae00c94f102 100644 --- a/src/common/profiler.F90 +++ b/src/common/profiler.F90 @@ -38,13 +38,17 @@ module profiler use roctx use craypat implicit none + private + + public :: profiler_start, profiler_start_region, profiler_end_region, & + profiler_stop contains !> Start profiling subroutine profiler_start if ((NEKO_BCKND_CUDA .eq. 1)) then -#if defined(HAVE_NVTX) +#if defined(HAVE_NVTX) call device_profiler_start #endif else @@ -66,19 +70,24 @@ subroutine profiler_stop #endif end if end subroutine profiler_stop - + !> Started a named (@a name) profiler region - subroutine profiler_start_region(name) + subroutine profiler_start_region(name, region_id) character(kind=c_char,len=*) :: name + integer, optional :: region_id #ifdef HAVE_NVTX - call nvtxStartRange(name) + if (present(region_id)) then + call nvtxStartRange(name, region_id) + else + call nvtxStartRange(name) + end if #elif HAVE_ROCTX call roctxStartRange(name) #elif CRAYPAT - ! call craypat_region_begin(name) + ! call craypat_region_begin(name) #endif - + end subroutine profiler_start_region !> End the most recently started profiler region @@ -89,9 +98,9 @@ subroutine profiler_end_region #elif HAVE_ROCTX call roctxRangePop #elif CRAYPAT - ! call craypat_region_end + ! call craypat_region_end #endif - + end subroutine profiler_end_region - + end module profiler diff --git a/src/common/projection.f90 b/src/common/projection.f90 index 631a408ceb3..36b3005f3eb 100644 --- a/src/common/projection.f90 +++ b/src/common/projection.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Project x onto X, the space of old solutions and back again @@ -75,6 +75,8 @@ module projection use profiler use logger use, intrinsic :: iso_c_binding + use time_step_controller + implicit none private @@ -93,25 +95,29 @@ module projection !logging variables real(kind=rp) :: proj_res integer :: proj_m = 0 + integer :: activ_step ! steps to activate projection contains + procedure, pass(this) :: clear => bcknd_clear procedure, pass(this) :: project_on => bcknd_project_on procedure, pass(this) :: project_back => bcknd_project_back procedure, pass(this) :: log_info => print_proj_info procedure, pass(this) :: init => projection_init procedure, pass(this) :: free => projection_free + procedure, pass(this) :: pre_solving => projection_pre_solving + procedure, pass(this) :: post_solving => projection_post_solving end type projection_t contains - subroutine projection_init(this, n, L) + subroutine projection_init(this, n, L, activ_step) class(projection_t), target, intent(inout) :: this integer, intent(in) :: n - integer, optional, intent(in) :: L + integer, optional, intent(in) :: L, activ_step integer :: i integer(c_size_t) :: ptr_size type(c_ptr) :: ptr real(c_rp) :: dummy - + call this%free() if (present(L)) then @@ -120,7 +126,13 @@ subroutine projection_init(this, n, L) this%L = 20 end if - this%m = 0 + if (present(activ_step)) then + this%activ_step = activ_step + else + this%activ_step = 5 + end if + + this%m = 0 allocate(this%xx(n,this%L)) allocate(this%bb(n,this%L)) @@ -152,10 +164,12 @@ subroutine projection_init(this, n, L) ptr_size = c_sizeof(C_NULL_PTR) * this%L call device_alloc(this%xx_d_d, ptr_size) ptr = c_loc(this%xx_d) - call device_memcpy(ptr,this%xx_d_d, ptr_size, HOST_TO_DEVICE) + call device_memcpy(ptr,this%xx_d_d, ptr_size, & + HOST_TO_DEVICE, sync=.false.) call device_alloc(this%bb_d_d, ptr_size) ptr = c_loc(this%bb_d) - call device_memcpy(ptr,this%bb_d_d, ptr_size, HOST_TO_DEVICE) + call device_memcpy(ptr,this%bb_d_d, ptr_size, & + HOST_TO_DEVICE, sync=.false.) end if @@ -202,12 +216,63 @@ subroutine projection_free(this) end subroutine projection_free + subroutine projection_pre_solving(this, b, tstep, coef, n, dt_controller, string) + class(projection_t), intent(inout) :: this + integer, intent(inout) :: n + real(kind=rp), intent(inout), dimension(n) :: b + integer, intent(in) :: tstep + class(coef_t), intent(inout) :: coef + type(time_step_controller_t), intent(in) :: dt_controller + character(len=*), optional :: string + + if( tstep .gt. this%activ_step .and. this%L .gt. 0) then + if (dt_controller%if_variable_dt) then + if (dt_controller%dt_last_change .eq. 0) then ! the time step at which dt is changed + call this%clear(n) + else if (dt_controller%dt_last_change .gt. this%activ_step - 1) then + ! activate projection some steps after dt is changed + ! note that dt_last_change start from 0 + call this%project_on(b, coef, n) + if (present(string)) then + call this%log_info(string) + end if + end if + else + call this%project_on(b, coef, n) + if (present(string)) then + call this%log_info(string) + end if + end if + end if + + end subroutine projection_pre_solving + + subroutine projection_post_solving(this, x, Ax, coef, bclst, gs_h, n, tstep, dt_controller) + class(projection_t), intent(inout) :: this + integer, intent(inout) :: n + class(Ax_t), intent(inout) :: Ax + class(coef_t), intent(inout) :: coef + class(bc_list_t), intent(inout) :: bclst + type(gs_t), intent(inout) :: gs_h + real(kind=rp), intent(inout), dimension(n) :: x + integer, intent(in) :: tstep + type(time_step_controller_t), intent(in) :: dt_controller + + if (tstep .gt. this%activ_step .and. this%L .gt. 0) then + if (.not.(dt_controller%if_variable_dt) .or. & + (dt_controller%dt_last_change .gt. this%activ_step - 1)) then + call this%project_back(x, Ax, coef, bclst, gs_h, n) + end if + end if + + end subroutine projection_post_solving + subroutine bcknd_project_on(this, b, coef, n) class(projection_t), intent(inout) :: this integer, intent(inout) :: n - class(coef_t), intent(inout) :: coef - real(kind=rp), intent(inout), dimension(n) :: b - call profiler_start_region('Project on') + class(coef_t), intent(inout) :: coef + real(kind=rp), intent(inout), dimension(n) :: b + call profiler_start_region('Project on', 16) if (NEKO_BCKND_DEVICE .eq. 1) then call device_project_on(this, b, coef, n) else @@ -215,19 +280,18 @@ subroutine bcknd_project_on(this, b, coef, n) end if call profiler_end_region end subroutine bcknd_project_on - + subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n) class(projection_t) :: this integer, intent(inout) :: n - class(Ax_t), intent(inout) :: Ax - class(coef_t), intent(inout) :: coef + class(Ax_t), intent(inout) :: Ax + class(coef_t), intent(inout) :: coef class(bc_list_t), intent(inout) :: bclst type(gs_t), intent(inout) :: gs_h - real(kind=rp), intent(inout), dimension(n) :: x + real(kind=rp), intent(inout), dimension(n) :: x type(c_ptr) :: x_d - call profiler_start_region('Project back') - this%m = min(this%m+1,this%L) + call profiler_start_region('Project back', 17) if (NEKO_BCKND_DEVICE .eq. 1) then x_d = device_get_ptr(x) @@ -242,7 +306,12 @@ subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n) else if (this%m.gt.0) call add2(x,this%xbar,n) ! Restore desired solution - this%m = min(this%m+1,this%L) + if (this%m .eq. this%L) then + this%m = 1 + else + this%m = min(this%m+1,this%L) + end if + call copy (this%xx(1,this%m),x,n) ! Update (X,B) end if @@ -253,43 +322,43 @@ subroutine bcknd_project_back(this,x,Ax,coef, bclst, gs_h, n) if (NEKO_BCKND_DEVICE .eq. 1) then call device_proj_ortho(this, this%xx_d, this%bb_d, coef%mult_d, n) else - call cpu_proj_ortho (this,this%xx,this%bb,coef%mult,n) + call cpu_proj_ortho (this,this%xx,this%bb,coef%mult,n) end if call profiler_end_region end subroutine bcknd_project_back - + subroutine cpu_project_on(this, b, coef, n) class(projection_t), intent(inout) :: this integer, intent(inout) :: n - class(coef_t), intent(inout) :: coef - real(kind=rp), intent(inout), dimension(n) :: b + class(coef_t), intent(inout) :: coef + real(kind=rp), intent(inout), dimension(n) :: b integer :: i, j, k, ierr real(kind=rp) :: work(this%L), alpha(this%L) associate(xbar => this%xbar, xx => this%xx, & bb => this%bb) - + if (this%m .le. 0) return - + !First round of CGS call rzero(alpha, this%m) - this%proj_res = glsc3(b,b,coef%mult,n) + this%proj_res = sqrt(glsc3(b,b,coef%mult,n)/coef%volume) this%proj_m = this%m do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) - do k = 1, this%m + do k = 1, this%m alpha(k) = alpha(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j) end do end do - + !First one outside loop to avoid zeroing xbar and bbar call MPI_Allreduce(MPI_IN_PLACE, alpha, this%m, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - + call rzero(work, this%m) - + do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) call cmult2(xbar(i), xx(i,1), alpha(1), j) @@ -303,10 +372,10 @@ subroutine cpu_project_on(this, b, coef, n) work(k) = work(k) + vlsc3(xx(i,k), coef%mult(i,1,1,1), b(i), j) end do end do - + call MPI_Allreduce(work, alpha, this%m, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - + do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) do k = 1,this%m @@ -320,8 +389,8 @@ end subroutine cpu_project_on subroutine device_project_on(this, b, coef, n) class(projection_t), intent(inout) :: this integer, intent(inout) :: n - class(coef_t), intent(inout) :: coef - real(kind=rp), intent(inout), dimension(n) :: b + class(coef_t), intent(inout) :: coef + real(kind=rp), intent(inout), dimension(n) :: b real(kind=rp) :: alpha(this%L) type(c_ptr) :: b_d integer :: i @@ -329,12 +398,12 @@ subroutine device_project_on(this, b, coef, n) associate(xbar_d => this%xbar_d, xx_d => this%xx_d, xx_d_d => this%xx_d_d, & bb_d => this%bb_d, bb_d_d => this%bb_d_d, alpha_d => this%alpha_d) - + if (this%m .le. 0) return - this%proj_res = device_glsc3(b_d,b_d,coef%mult_d,n) + this%proj_res = sqrt(device_glsc3(b_d,b_d,coef%mult_d,n)/coef%volume) this%proj_m = this%m if (NEKO_DEVICE_MPI .and. (NEKO_BCKND_OPENCL .ne. 1)) then call device_proj_on(alpha_d, b_d, xx_d_d, bb_d_d, & @@ -346,12 +415,13 @@ subroutine device_project_on(this, b, coef, n) end do else call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n) - call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, this%m, & + HOST_TO_DEVICE, sync=.false.) end if call device_rzero(xbar_d, n) if (NEKO_BCKND_OPENCL .eq. 1) then do i = 1, this%m - call device_add2s2(xbar_d, xx_d(i), alpha(i), n) + call device_add2s2(xbar_d, xx_d(i), alpha(i), n) end do call cmult(alpha, -1.0_rp, this%m) else @@ -367,7 +437,8 @@ subroutine device_project_on(this, b, coef, n) else call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n) call device_glsc3_many(alpha,b_d,xx_d_d,coef%mult_d,this%m,n) - call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, this%m, & + HOST_TO_DEVICE, sync=.false.) end if if (NEKO_BCKND_OPENCL .eq. 1) then @@ -382,7 +453,7 @@ subroutine device_project_on(this, b, coef, n) call device_add2s2_many(b_d, bb_d_d, alpha_d, this%m, n) end if end if - + end associate end subroutine device_project_on @@ -398,7 +469,7 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n) associate(m => this%m, xx_d_d => this%xx_d_d, & bb_d_d => this%bb_d_d, alpha_d => this%alpha_d) - + if(m .le. 0) return if (NEKO_DEVICE_MPI .and. (NEKO_BCKND_OPENCL .ne. 1)) then @@ -418,14 +489,15 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n) do i = 1, m - 1 call device_add2s2(xx_d(m),xx_d(i),alpha(i), n) call device_add2s2(bb_d(m),bb_d(i),alpha(i),n) - + alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n) end do else - call device_memcpy(alpha, alpha_d, this%m, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, this%m, & + HOST_TO_DEVICE, sync=.false.) call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n) call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n) - + call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n) end if call cmult(alpha, -1.0_rp,m) @@ -436,22 +508,23 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n) alpha(i) = device_glsc3(bb_d(m),xx_d(i),w_d,n) end do else - call device_memcpy(alpha, alpha_d, m, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, m, & + HOST_TO_DEVICE, sync=.false.) call device_add2s2_many(xx_d(m),xx_d_d,alpha_d,m-1,n) call device_add2s2_many(bb_d(m),bb_d_d,alpha_d,m-1,n) call device_glsc3_many(alpha,bb_d(m),xx_d_d,w_d,m,n) end if end if - + alpha(m) = device_glsc3(xx_d(m), w_d, bb_d(m), n) alpha(m) = sqrt(alpha(m)) - if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent - scl = 1.0_rp / alpha(m) - call device_cmult(xx_d(m), scl, n) - call device_cmult(bb_d(m), scl, n) + if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent + scl = 1.0_rp / alpha(m) + call device_cmult(xx_d(m), scl, n) + call device_cmult(bb_d(m), scl, n) + - else !New vector is not linearly independent, forget about it if(pe_rank .eq. 0) then call neko_warning('New vector not linearly indepependent!') @@ -462,8 +535,8 @@ subroutine device_proj_ortho(this, xx_d, bb_d, w_d, n) end associate end subroutine device_proj_ortho - - + + subroutine cpu_proj_ortho(this, xx, bb, w, n) type(projection_t) :: this integer, intent(inout) :: n @@ -474,11 +547,11 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) integer :: i, j, k, h, ierr associate(m => this%m) - - if(m .le. 0) return !No vectors to ortho-normalize + + if(m .le. 0) return !No vectors to ortho-normalize ! AX = B - ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb + ! Calculate dx, db: dx = x-XX^Tb, db=b-BX^Tb call rzero(alpha, m) do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) @@ -487,13 +560,13 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) + vlsc3(bb(i,k), w(i), xx(i,m), j)) end do end do - + call MPI_Allreduce(MPI_IN_PLACE, alpha, this%m, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - + nrm = sqrt(alpha(m)) !Calculate A-norm of new vector - - + + do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) do k = 1,m-1 @@ -502,7 +575,7 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) end do end do call rzero(beta,m) - + do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE, n-i+1) do k = 1,m-1 @@ -515,7 +588,7 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) alpha(m) = 0.0_rp - + do i = 1, n, NEKO_BLK_SIZE j = min(NEKO_BLK_SIZE,n-i+1) do k = 1, m-1 @@ -527,37 +600,19 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) do k = 1, m-1 alpha(k) = alpha(k) + beta(k) end do - - !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n) + + !alpha(m) = glsc3(xx(1,m), w, bb(1,m), n) call MPI_Allreduce(MPI_IN_PLACE, alpha(m), 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) alpha(m) = sqrt(alpha(m)) !dx and db now stored in last column of xx and bb - if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent + if(alpha(m) .gt. this%tol*nrm) then !New vector is linearly independent !Normalize dx and db - scl1 = 1.0_rp / alpha(m) - call cmult(xx(1,m), scl1, n) - call cmult(bb(1,m), scl1, n) - - !We want to throw away the oldest information - !The below propagates newest information to first vector. - !This will make the first vector a scalar - !multiple of x. - do k = m, 2, -1 - h = k - 1 - call givens_rotation(alpha(h), alpha(k), c, s, nrm) - alpha(h) = nrm - do i = 1, n !Apply rotation to xx and bb - scl1 = c*xx(i,h) + s*xx(i,k) - xx(i,k) = -s*xx(i,h) + c*xx(i,k) - xx(i,h) = scl1 - scl2 = c*bb(i,h) + s*bb(i,k) - bb(i,k) = -s*bb(i,h) + c*bb(i,k) - bb(i,h) = scl2 - end do - end do - + scl1 = 1.0_rp / alpha(m) + call cmult(xx(1,m), scl1, n) + call cmult(bb(1,m), scl1, n) + else !New vector is not linearly independent, forget about it k = m !location of rank deficient column if(pe_rank .eq. 0) then @@ -567,40 +622,43 @@ subroutine cpu_proj_ortho(this, xx, bb, w, n) endif end associate - + end subroutine cpu_proj_ortho - - subroutine givens_rotation(a, b, c, s, r) - real(kind=rp), intent(inout) :: a, b, c, s, r - real(kind=rp) :: h, d - - if(b .ne. 0.0_rp) then - h = hypot(a, b) - d = 1.0_rp / h - c = abs(a) * d - s = sign(d, a) * b - r = sign(1.0_rp, a) * h - else - c = 1.0_rp - s = 0.0_rp - r = a - endif - - return - end subroutine givens_rotation subroutine print_proj_info(this,string) class(projection_t) :: this character(len=*) :: string character(len=LOG_SIZE) :: log_buf - - write(log_buf, '(A,A)') 'Projection ', string - call neko_log%message(log_buf) - write(log_buf, '(A,A)') 'Proj. vec.:',' Orig. residual:' - call neko_log%message(log_buf) - write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res - call neko_log%message(log_buf) + + if (this%proj_m .gt. 0) then + write(log_buf, '(A,A)') 'Projection ', string + call neko_log%message(log_buf) + write(log_buf, '(A,A)') 'Proj. vec.:',' Orig. residual:' + call neko_log%message(log_buf) + write(log_buf, '(I11,3x, E15.7,5x)') this%proj_m, this%proj_res + call neko_log%message(log_buf) + end if end subroutine print_proj_info + subroutine bcknd_clear(this, n) + class(projection_t) :: this + integer, intent(in) :: n + integer :: i + + this%m = 0 + this%proj_m = 0 + + do i = 1, this%L + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_rzero(this%xx_d(i), n) + call device_rzero(this%xx_d(i), n) + else + call rzero(this%xx(1,i),n) + call rzero(this%bb(1,i),n) + end if + end do + + end subroutine bcknd_clear + end module projection diff --git a/src/common/rhs_maker.f90 b/src/common/rhs_maker.f90 index dce8aa36fd8..287649214c2 100644 --- a/src/common/rhs_maker.f90 +++ b/src/common/rhs_maker.f90 @@ -90,11 +90,10 @@ end subroutine rhs_maker_ext end interface abstract interface - subroutine scalar_rhs_maker_ext(temp1, fs_lag, fs_laglag, fs, rho, & + subroutine scalar_rhs_maker_ext(fs_lag, fs_laglag, fs, rho, & ext_coeffs, n) import field_t import rp - type(field_t), intent(inout) :: temp1 type(field_t), intent(inout) :: fs_lag type(field_t), intent(inout) :: fs_laglag real(kind=rp), intent(inout) :: rho, ext_coeffs(4) @@ -111,7 +110,7 @@ subroutine rhs_maker_bdf(ulag, vlag, wlag, bfx, bfy, bfz, & import rp integer, intent(in) :: n, nbd type(field_t), intent(in) :: u, v, w - type(field_series_t), intent(in) :: ulag, vlag, wlag + type(field_series_t), intent(in) :: ulag, vlag, wlag real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(4) @@ -119,13 +118,12 @@ end subroutine rhs_maker_bdf end interface abstract interface - subroutine scalar_rhs_maker_bdf(temp1, temp2, s_lag, fs, s, B, rho, dt,& + subroutine scalar_rhs_maker_bdf(s_lag, fs, s, B, rho, dt,& bd, nbd, n) import field_series_t import field_t import rp integer, intent(in) :: n, nbd - type(field_t), intent(inout) :: temp1, temp2 type(field_t), intent(in) :: s type(field_series_t), intent(in) :: s_lag real(kind=rp), intent(inout) :: fs(n) diff --git a/src/common/rhs_maker_fctry.f90 b/src/common/rhs_maker_fctry.f90 index 44428a13d7c..462b5008f8d 100644 --- a/src/common/rhs_maker_fctry.f90 +++ b/src/common/rhs_maker_fctry.f90 @@ -38,6 +38,9 @@ module rhs_maker_fctry use rhs_maker_device use neko_config implicit none + private + + public :: rhs_maker_sumab_fctry, rhs_maker_ext_fctry, rhs_maker_bdf_fctry contains @@ -86,7 +89,7 @@ subroutine rhs_maker_bdf_fctry(makebdf) allocate(rhs_maker_bdf_sx_t::makebdf) else if (NEKO_BCKND_DEVICE .eq. 1) then allocate(rhs_maker_bdf_device_t::makebdf) - else + else allocate(rhs_maker_bdf_cpu_t::makebdf) end if diff --git a/src/common/sampler.f90 b/src/common/sampler.f90 index 3ca495bf077..13cbfcb2ab5 100644 --- a/src/common/sampler.f90 +++ b/src/common/sampler.f90 @@ -30,9 +30,10 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Defines a sampler +!> Defines a sampler module sampler - use output + use output, only: output_t + use fld_file, only: fld_file_t use comm use logger use utils, only : neko_error @@ -56,7 +57,7 @@ module sampler !> Number of outputs. integer :: n !> Number of entries in the list. - integer :: size + integer :: size !> Final time of the simulation. real(kind=rp) :: T_end contains @@ -109,10 +110,10 @@ subroutine sampler_free(this) class(sampler_t), intent(inout) :: this if (allocated(this%output_list)) then - deallocate(this%output_list) + deallocate(this%output_list) end if if (allocated(this%controllers)) then - deallocate(this%controllers) + deallocate(this%controllers) end if this%n = 0 @@ -130,6 +131,7 @@ subroutine sampler_add(this, out, write_par, write_control) type(time_based_controller_t), allocatable :: tmp_ctrl(:) character(len=LOG_SIZE) :: log_buf integer :: n + class(*), pointer :: ft if (this%n .ge. this%size) then allocate(tmp(this%size * 2)) @@ -152,37 +154,48 @@ subroutine sampler_add(this, out, write_par, write_control) else call this%controllers(n)%init(this%T_end, write_control, write_par) end if - + ! The code below only prints to console call neko_log%section('Adding write output') - call neko_log%message('File name: '// & + call neko_log%message('File name : '// & trim(this%output_list(this%n)%outp%file_%file_type%fname)) - call neko_log%message( 'Write control: '//trim(write_control)) - if (trim(write_control) .eq. 'simulationtime') then - write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', & + call neko_log%message('Write control : '//trim(write_control)) + + ! Show the output precision if we are outputting an fld file + select type(ft => out%file_%file_type) + type is (fld_file_t) + if (ft%dp_precision) then + call neko_log%message('Output precision : double') + else + call neko_log%message('Output precision : single') + end if + end select + + if (trim(write_control) .eq. 'simulationtime') then + write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', & this%controllers(n)%frequency - call neko_log%message(log_buf) - write(log_buf, '(A,ES13.6)') 'Time between writes: ', & + call neko_log%message(log_buf) + write(log_buf, '(A,ES13.6)') 'Time between writes: ', & this%controllers(n)%time_interval - call neko_log%message(log_buf) + call neko_log%message(log_buf) else if (trim(write_control) .eq. 'nsamples') then - write(log_buf, '(A,I13)') 'Total samples: ', int(write_par) - call neko_log%message(log_buf) - write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', & + write(log_buf, '(A,I13)') 'Total samples: ', int(write_par) + call neko_log%message(log_buf) + write(log_buf, '(A,ES13.6)') 'Writes per time unit (Freq.): ', & this%controllers(n)%frequency - call neko_log%message(log_buf) - write(log_buf, '(A,ES13.6)') 'Time between writes: ', & + call neko_log%message(log_buf) + write(log_buf, '(A,ES13.6)') 'Time between writes: ', & this%controllers(n)%time_interval - call neko_log%message(log_buf) - else if (trim(write_control) .eq. 'tsteps') then - write(log_buf, '(A,I13)') 'Time step interval: ', int(write_par) - call neko_log%message(log_buf) + call neko_log%message(log_buf) + else if (trim(write_control) .eq. 'tsteps') then + write(log_buf, '(A,I13)') 'Time step interval: ', int(write_par) + call neko_log%message(log_buf) else if (trim(write_control) .eq. 'org') then - write(log_buf, '(A)') & + write(log_buf, '(A)') & 'Write control not set, defaulting to first output settings' - call neko_log%message(log_buf) + call neko_log%message(log_buf) end if - + call neko_log%end_section() end subroutine sampler_add @@ -212,13 +225,13 @@ subroutine sampler_sample(this, t, tstep, ifforce) write_output = .false. ! Determine if at least one output needs to be written ! We should not need this extra select block, and it works great - ! without it for GNU, Intel and NEC, but breaks horribly on Cray - ! (>11.0.x) when using high opt. levels. + ! without it for GNU, Intel and NEC, but breaks horribly on Cray + ! (>11.0.x) when using high opt. levels. select type (samp => this) type is (sampler_t) do i = 1, samp%n if (this%controllers(i)%check(t, tstep, force)) then - write_output = .true. + write_output = .true. exit end if end do @@ -230,16 +243,16 @@ subroutine sampler_sample(this, t, tstep, ifforce) ! Loop through the outputs and write if necessary. ! We should not need this extra select block, and it works great - ! without it for GNU, Intel and NEC, but breaks horribly on Cray - ! (>11.0.x) when using high opt. levels. + ! without it for GNU, Intel and NEC, but breaks horribly on Cray + ! (>11.0.x) when using high opt. levels. select type (samp => this) type is (sampler_t) do i = 1, this%n if (this%controllers(i)%check(t, tstep, force)) then - call neko_log%message('File name: '// & + call neko_log%message('File name : '// & trim(samp%output_list(i)%outp%file_%file_type%fname)) - write(log_buf, '(A,I6)') 'Output number:', & + write(log_buf, '(A,I6)') 'Output number :', & int(this%controllers(i)%nexecutions) call neko_log%message(log_buf) @@ -251,7 +264,7 @@ subroutine sampler_sample(this, t, tstep, ifforce) class default call neko_error('Invalid sampler output list') end select - + call MPI_Barrier(NEKO_COMM, ierr) sample_end_time = MPI_WTIME() @@ -281,9 +294,9 @@ subroutine sampler_set_counter(this, t) call this%output_list(i)%outp%set_start_counter(this%controllers(i)%nexecutions) end if end do - + end subroutine sampler_set_counter - + !> Set sampling counter (after restart) explicitly subroutine sampler_set_sample_count(this, sample_number) class(sampler_t), intent(inout) :: this @@ -295,8 +308,8 @@ subroutine sampler_set_sample_count(this, sample_number) call this%output_list(i)%outp%set_counter(this%controllers(i)%nexecutions) call this%output_list(i)%outp%set_start_counter(this%controllers(i)%nexecutions) end do - + end subroutine sampler_set_sample_count - - + + end module sampler diff --git a/src/common/signal.f90 b/src/common/signal.f90 index fdc3a3f6c8f..29358e24e1e 100644 --- a/src/common/signal.f90 +++ b/src/common/signal.f90 @@ -49,7 +49,7 @@ integer (c_int8_t) function sighdl_usr() & use, intrinsic :: iso_c_binding end function sighdl_usr end interface - + interface integer (c_int) function sighdl_set_timeout(sec) & bind(c, name='sighdl_set_timeout') @@ -58,7 +58,7 @@ integer (c_int) function sighdl_set_timeout(sec) & integer(c_int) :: sec end function sighdl_set_timeout end interface - + interface integer (c_int) function sighdl_trap_cpulimit() & bind(c, name='sighdl_trap_cpulimit') @@ -74,7 +74,7 @@ end function sighdl_trap_usr end interface contains - + !> Check if any timeout has occurred (either SIGXCPU or SIGALRM) function signal_timeout() result(timeout) logical :: timeout @@ -84,7 +84,7 @@ function signal_timeout() result(timeout) else timeout = .false. end if - + end function signal_timeout !> Check if a user signal has been raised @@ -98,15 +98,15 @@ function signal_usr(usr) result(raised) end if usr12 = sighdl_usr() - + if (bge(usr12, usr)) then raised = .true. else raised = .false. end if - + end function signal_usr - + !> Set a timeout after @a seconds subroutine signal_set_timeout(sec) integer(kind=c_int) :: sec @@ -114,9 +114,9 @@ subroutine signal_set_timeout(sec) if (sighdl_set_timeout(sec) .lt. 0) then call neko_error('sighdl failed to set SIGALRM') end if - + end subroutine signal_set_timeout - + !> Initialize signal handler to trap SIGXCPU subroutine signal_trap_cpulimit() logical, save :: initialized = .false. @@ -127,7 +127,7 @@ subroutine signal_trap_cpulimit() end if initialized = .true. end if - + end subroutine signal_trap_cpulimit !> Initialize signal handler to trap SIGUSR1 and SIGUSR2 @@ -140,7 +140,7 @@ subroutine signal_trap_usr() end if initialized = .true. end if - + end subroutine signal_trap_usr - + end module signal diff --git a/src/common/statistics.f90 b/src/common/statistics.f90 index 4b5c8fde555..074c77ade6f 100644 --- a/src/common/statistics.f90 +++ b/src/common/statistics.f90 @@ -67,7 +67,7 @@ subroutine stats_init(this, T_begin, samp_interval, size) integer, intent(in) :: samp_interval integer, intent(inout), optional ::size integer :: n, i - + call this%free() if (present(size)) then @@ -87,7 +87,7 @@ subroutine stats_init(this, T_begin, samp_interval, size) this%T_begin = T_begin this%samp_interval = samp_interval this%t_diff = 0.0 - + end subroutine stats_init !> Deallocate @@ -99,7 +99,7 @@ subroutine stats_free(this) end if this%n = 0 - this%size = 0 + this%size = 0 end subroutine stats_free !> Add a statistic quantitiy @a quant to the backend @@ -126,7 +126,7 @@ subroutine stats_eval(this, t, dt, tstep) real(kind=rp), intent(in) :: dt integer, intent(in) :: tstep integer :: i, ierr - character(len=LOG_SIZE) :: log_buf + character(len=LOG_SIZE) :: log_buf real(kind=rp) :: sample_start_time, sample_end_time real(kind=dp) :: sample_time diff --git a/src/common/stats_quant.f90 b/src/common/stats_quant.f90 index 6705cd1b55b..750321a68b6 100644 --- a/src/common/stats_quant.f90 +++ b/src/common/stats_quant.f90 @@ -51,5 +51,5 @@ subroutine stats_quant_update(this, k) real(kind=rp), intent(in) :: k end subroutine stats_quant_update end interface - + end module stats_quant diff --git a/src/common/structs.f90 b/src/common/structs.f90 index 97c03c4e205..511d75b0d79 100644 --- a/src/common/structs.f90 +++ b/src/common/structs.f90 @@ -3,11 +3,11 @@ module structs use num_types implicit none private - + type, public :: struct_curve_t - real(kind=dp) :: curve_data(5,12) - integer :: curve_type(12) - integer :: el_idx + real(kind=dp) :: curve_data(5,12) + integer :: curve_type(12) + integer :: el_idx end type struct_curve_t !> Pointer to array diff --git a/src/common/system.f90 b/src/common/system.f90 index 117c80ddd3b..4de8c37d4b5 100644 --- a/src/common/system.f90 +++ b/src/common/system.f90 @@ -51,21 +51,21 @@ end subroutine system_cpuid public :: system_cpuid, system_cpu_name - contains +contains - !> Retrieve the system's CPU name (type) - !! @param name Stores the retrieved name. - subroutine system_cpu_name(name) - character(len=*), intent(inout) :: name - character(kind=c_char, len=80), target :: c_name - integer :: end_pos + !> Retrieve the system's CPU name (type) + !! @param name Stores the retrieved name. + subroutine system_cpu_name(name) + character(len=*), intent(inout) :: name + character(kind=c_char, len=80), target :: c_name + integer :: end_pos - call system_cpuid(c_loc(c_name), 80) + call system_cpuid(c_loc(c_name), 80) + + end_pos = scan(c_name, C_NULL_CHAR) + if(end_pos .ge. 2) then + name(1:end_pos-1) = c_name(1:end_pos-1) + end if + end subroutine system_cpu_name - end_pos = scan(c_name, C_NULL_CHAR) - if(end_pos .ge. 2) then - name(1:end_pos-1) = c_name(1:end_pos-1) - end if - end subroutine system_cpu_name - end module system diff --git a/src/common/time_based_controller.f90 b/src/common/time_based_controller.f90 index f35072489b1..f7fb1d1fed7 100644 --- a/src/common/time_based_controller.f90 +++ b/src/common/time_based_controller.f90 @@ -57,16 +57,21 @@ module time_based_controller integer :: nexecutions = 0 !> Whether to never output. logical :: never = .false. + !> Control mode defining the meaning of `control_value`. + !> Can be `simulationtime`, `tsteps`, `nsamples` or `never`. + character(len=:), allocatable :: control_mode + !> Defines the frequency of writes. + real(kind=rp) :: control_value contains - !> Constructor. + !> Constructor. procedure, pass(this) :: init => time_based_controller_init - !> Check if the execution should be performed. + !> Check if the execution should be performed. procedure, pass(this) :: check => time_based_controller_check - !> Increment `nexectutions`. + !> Increment `nexectutions`. procedure, pass(this) :: register_execution => & time_based_controller_register_execution - !> Set the counter based on a time (for restarts) + !> Set the counter based on a time (for restarts) procedure, pass(this) :: set_counter => & time_based_controller_set_counter @@ -77,7 +82,7 @@ module time_based_controller end interface assignment(=) contains - + !> Constructor. !! @param end_time The final simulation time. !! @param control_mode The way to interpret the `control_value` parameter. @@ -90,29 +95,31 @@ subroutine time_based_controller_init(this, end_time, control_mode, & real(kind=rp), intent(in) :: control_value this%end_time = end_time + this%control_mode = control_mode + this%control_value = control_value if (trim(control_mode) .eq. 'simulationtime') then - this%time_interval = control_value - this%frequency = 1/this%time_interval - this%nsteps = 0 + this%time_interval = control_value + this%frequency = 1/this%time_interval + this%nsteps = 0 else if (trim(control_mode) .eq. 'nsamples') then - if (control_value .le. 0) then - call neko_error("nsamples must be positive") - end if - - this%frequency = control_value / end_time - this%time_interval = 1.0_rp / this%frequency - this%nsteps = 0 - else if (trim(control_mode) .eq. 'tsteps') then - this%nsteps = control_value - ! if the timestep will be variable, we cannot compute these. - this%frequency = 0 - this%time_interval = 0 - else if (trim(control_mode) .eq. 'never') then - this%never = .true. + if (control_value .le. 0) then + call neko_error("nsamples must be positive") + end if + + this%frequency = control_value / end_time + this%time_interval = 1.0_rp / this%frequency + this%nsteps = 0 + else if (trim(control_mode) .eq. 'tsteps') then + this%nsteps = control_value + ! if the timestep will be variable, we cannot compute these. + this%frequency = 0 + this%time_interval = 0 + else if (trim(control_mode) .eq. 'never') then + this%never = .true. else - call neko_error("The control parameter must be simulationtime, nsamples& - & tsteps, or never, but received "//trim(control_mode)) + call neko_error("The control parameter must be simulationtime, nsamples& + & tsteps, or never, but received "//trim(control_mode)) end if end subroutine time_based_controller_init @@ -139,18 +146,18 @@ function time_based_controller_check(this, t, tstep, force) result(check) check = .false. if (ifforce) then - check = .true. + check = .true. else if (this%never) then - check = .false. + check = .false. else if ( (this%nsteps .eq. 0) .and. & (t .ge. this%nexecutions * this%time_interval) ) then - check = .true. + check = .true. else if (this%nsteps .gt. 0) then if (mod(tstep, this%nsteps) .eq. 0) then - check = .true. + check = .true. end if end if - end function + end function time_based_controller_check !> Assignment operator. Simply copies attribute values. !! @param ctrl1 Left-hand side. diff --git a/src/common/time_interpolator.f90 b/src/common/time_interpolator.f90 index 8f73c56632d..5851ce53375 100644 --- a/src/common/time_interpolator.f90 +++ b/src/common/time_interpolator.f90 @@ -76,7 +76,7 @@ end subroutine time_interpolator_init !> Destructor subroutine time_interpolator_free(this) class(time_interpolator_t), intent(inout) :: this - + end subroutine time_interpolator_free !> Interpolate a field at time t from fields at time t-dt and t+dt @@ -88,26 +88,26 @@ end subroutine time_interpolator_free !! @param f_future time in future for interpolation subroutine time_interpolator_interpolate(this, t, f, t_past, f_past, t_future, f_future) class(time_interpolator_t), intent(inout) :: this - real(kind=rp), intent(inout) :: t, t_past, t_future + real(kind=rp), intent(inout) :: t, t_past, t_future type(field_t), intent(inout) :: f, f_past, f_future real(kind=rp) :: w_past, w_future !Weights for the interpolation integer :: n if (this%order .eq. 2) then - n = f%dof%size() - w_past = ( t_future - t ) / ( t_future - t_past ) - w_future = ( t - t_past ) / ( t_future - t_past ) - - if (NEKO_BCKND_DEVICE .eq. 1) then - call device_add3s2(f%x_d, f_past%x_d, f_future%x_d, & - w_past, w_future, n) - else - call add3s2(f%x, f_past%x, f_future%x, w_past, w_future, n) - end if - + n = f%dof%size() + w_past = ( t_future - t ) / ( t_future - t_past ) + w_future = ( t - t_past ) / ( t_future - t_past ) + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_add3s2(f%x_d, f_past%x_d, f_future%x_d, & + w_past, w_future, n) + else + call add3s2(f%x, f_past%x, f_future%x, w_past, w_future, n) + end if + else - call neko_error("Time interpolation of required order is not implemented") + call neko_error("Time interpolation of required order is not implemented") end if end subroutine time_interpolator_interpolate diff --git a/src/common/time_step_controller.f90 b/src/common/time_step_controller.f90 new file mode 100644 index 00000000000..0737ef21041 --- /dev/null +++ b/src/common/time_step_controller.f90 @@ -0,0 +1,149 @@ +! Copyright (c) 2022, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements type time_step_controller. +module time_step_controller + use num_types + use logger + use json_module, only : json_file + use json_utils, only : json_get_or_default + implicit none + private + + !> Provides a tool to set time step dt + type, public :: time_step_controller_t + !> Components recording time stepping info + logical :: if_variable_dt + real(kind=rp) :: set_cfl + real(kind=rp) :: max_dt + integer :: max_update_frequency + integer :: dt_last_change + real(kind=rp) :: alpha !coefficient of running average + real(kind=rp) :: max_dt_increase_factor, min_dt_decrease_factor + contains + !> Initialize object. + procedure, pass(this) :: init => time_step_controller_init + !> Set time stepping + procedure, pass(this) :: set_dt => time_step_controller_set_dt + + end type time_step_controller_t + +contains + + !> Constructor + !! @param order order of the interpolation + subroutine time_step_controller_init(this, params) + class(time_step_controller_t), intent(inout) :: this + type(json_file), intent(inout) :: params + + this%dt_last_change = 0 + call json_get_or_default(params, 'case.variable_timestep',& + this%if_variable_dt, .false.) + call json_get_or_default(params, 'case.target_cfl',& + this%set_cfl, 0.4_rp) + call json_get_or_default(params, 'case.max_timestep',& + this%max_dt, huge(0.0_rp)) + call json_get_or_default(params, 'case.cfl_max_update_frequency',& + this%max_update_frequency, 0) + call json_get_or_default(params, 'case.cfl_running_avg_coeff',& + this%alpha, 0.5_rp) + call json_get_or_default(params, 'case.max_dt_increase_factor',& + this%max_dt_increase_factor, 1.2_rp) + call json_get_or_default(params, 'case.min_dt_decrease_factor',& + this%min_dt_decrease_factor, 0.5_rp) + + end subroutine time_step_controller_init + + !> Set new dt based on cfl if requested + !! @param dt time step in case_t. + !! @param cfl courant number of current iteration. + !! @param cfl_avrg average Courant number. + !! @param tstep the current time step. + !! @Algorithm: + !! 1. Set the first time step such that cfl is the set one; + !! 2. During time-stepping, adjust dt when cfl_avrg is offset by 20%. + subroutine time_step_controller_set_dt(this, dt, cfl, cfl_avrg, tstep) + implicit none + class(time_step_controller_t), intent(inout) :: this + real(kind=rp), intent(inout) :: dt + real(kind=rp), intent(in) :: cfl + real(kind=rp), intent(inout) :: cfl_avrg + real(kind=rp) :: dt_old, scaling_factor + character(len=LOG_SIZE) :: log_buf + integer, intent(in):: tstep + + if (this%if_variable_dt .eqv. .true.) then + if (tstep .eq. 1) then + ! set the first dt for desired cfl + dt = min(this%set_cfl/cfl*dt, this%max_dt) + else + ! Calculate the average of cfl over the desired interval + cfl_avrg = this%alpha * cfl + (1-this%alpha) * cfl_avrg + + if (abs(cfl_avrg - this%set_cfl) .ge. 0.2*this%set_cfl .and. & + this%dt_last_change .ge. this%max_update_frequency) then + + if (this%set_cfl/cfl .ge. 1) then + ! increase of time step + scaling_factor = min(this%max_dt_increase_factor, this%set_cfl/cfl) + else + ! reduction of time step + scaling_factor = max(this%min_dt_decrease_factor, this%set_cfl/cfl) + end if + + dt_old = dt + dt = scaling_factor * dt_old + dt = min(dt, this%max_dt) + + write(log_buf, '(A,E15.7,1x,A,E15.7)') 'Avrg CFL:', cfl_avrg, & + 'set_cfl:', this%set_cfl + call neko_log%message(log_buf) + + write(log_buf, '(A,E15.7,1x,A,E15.7)') 'old dt:', dt_old, & + 'new dt:', dt + call neko_log%message(log_buf) + + this%dt_last_change = 0 + + else + this%dt_last_change = this%dt_last_change + 1 + end if + end if + + end if + + end subroutine time_step_controller_set_dt + + + + +end module time_step_controller diff --git a/src/common/user_intf.f90 b/src/common/user_intf.f90 index d095a66c59f..ce8ee27111c 100644 --- a/src/common/user_intf.f90 +++ b/src/common/user_intf.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2023, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -33,14 +33,18 @@ !> Interfaces for user interaction with NEKO module user_intf use field + use field_list, only : field_list_t use fluid_user_source_term - use source_scalar + use scalar_user_source_term use coefs + use bc, only: bc_list_t use mesh use usr_inflow use usr_scalar + use field_dirichlet, only: field_dirichlet_update use num_types use json_module, only : json_file + use utils, only : neko_error, neko_warning implicit none private @@ -57,6 +61,16 @@ subroutine useric(u, v, w, p, params) end subroutine useric end interface + !> Abstract interface for user defined scalar initial conditions + abstract interface + subroutine useric_scalar(s, params) + import field_t + import json_file + type(field_t), intent(inout) :: s + type(json_file), intent(inout) :: params + end subroutine useric_scalar + end interface + !> Abstract interface for initilialization of modules abstract interface subroutine user_initialize_modules(t, u, v, w, p, coef, params) @@ -131,15 +145,17 @@ end subroutine user_material_properties type, public :: user_t procedure(useric), nopass, pointer :: fluid_user_ic => null() + procedure(useric_scalar), nopass, pointer :: scalar_user_ic => null() procedure(user_initialize_modules), nopass, pointer :: user_init_modules => null() procedure(usermsh), nopass, pointer :: user_mesh_setup => null() procedure(usercheck), nopass, pointer :: user_check => null() procedure(user_final_modules), nopass, pointer :: user_finalize_modules => null() procedure(fluid_source_compute_pointwise), nopass, pointer :: fluid_user_f => null() procedure(fluid_source_compute_vector), nopass, pointer :: fluid_user_f_vector => null() - procedure(source_scalar_term_pw), nopass, pointer :: scalar_user_f => null() - procedure(source_scalar_term), nopass, pointer :: scalar_user_f_vector => null() + procedure(scalar_source_compute_pointwise), nopass, pointer :: scalar_user_f => null() + procedure(scalar_source_compute_vector), nopass, pointer :: scalar_user_f_vector => null() procedure(usr_inflow_eval), nopass, pointer :: fluid_user_if => null() + procedure(field_dirichlet_update), nopass, pointer :: user_dirichlet_update => null() procedure(usr_scalar_bc_eval), nopass, pointer :: scalar_user_bc => null() !> Routine to set material properties procedure(user_material_properties), nopass, pointer :: material_properties => null() @@ -147,10 +163,10 @@ end subroutine user_material_properties procedure, pass(u) :: init => user_intf_init end type user_t - public :: useric, user_initialize_modules, usermsh, & + public :: useric, useric_scalar, user_initialize_modules, usermsh, & dummy_user_material_properties, user_material_properties contains - + !> User interface initialization subroutine user_intf_init(u) class(user_t), intent(inout) :: u @@ -159,10 +175,14 @@ subroutine user_intf_init(u) u%fluid_user_ic => dummy_user_ic end if + if (.not. associated(u%scalar_user_ic)) then + u%scalar_user_ic => dummy_user_ic_scalar + end if + if (.not. associated(u%fluid_user_f)) then u%fluid_user_f => dummy_user_f end if - + if (.not. associated(u%fluid_user_f_vector)) then u%fluid_user_f_vector => dummy_user_f_vector end if @@ -170,7 +190,7 @@ subroutine user_intf_init(u) if (.not. associated(u%scalar_user_f)) then u%scalar_user_f => dummy_scalar_user_f end if - + if (.not. associated(u%scalar_user_f_vector)) then u%scalar_user_f_vector => dummy_user_scalar_f_vector end if @@ -178,6 +198,10 @@ subroutine user_intf_init(u) if (.not. associated(u%scalar_user_bc)) then u%scalar_user_bc => dummy_scalar_user_bc end if + + if (.not. associated(u%user_dirichlet_update)) then + u%user_dirichlet_update => dirichlet_do_nothing + end if if (.not. associated(u%user_mesh_setup)) then u%user_mesh_setup => dummy_user_mesh_setup @@ -200,7 +224,7 @@ subroutine user_intf_init(u) end if end subroutine user_intf_init - + ! ! Below is the dummy user interface ! when running in pure turboNEKO mode @@ -213,14 +237,23 @@ subroutine dummy_user_ic(u, v, w, p, params) type(field_t), intent(inout) :: w type(field_t), intent(inout) :: p type(json_file), intent(inout) :: params - call neko_error('Dummy user defined initial condition set') + call neko_error('Dummy user defined initial condition set') end subroutine dummy_user_ic + !> Dummy user initial condition for scalar field + !! @param s Scalar field. + !! @param params JSON parameters. + subroutine dummy_user_ic_scalar(s, params) + type(field_t), intent(inout) :: s + type(json_file), intent(inout) :: params + call neko_error('Dummy user defined scalar initial condition set') + end subroutine dummy_user_ic_scalar + !> Dummy user (fluid) forcing subroutine dummy_user_f_vector(f, t) class(fluid_user_source_term_t), intent(inout) :: f real(kind=rp), intent(in) :: t - call neko_error('Dummy user defined vector valued forcing set') + call neko_error('Dummy user defined vector valued forcing set') end subroutine dummy_user_f_vector !> Dummy user (fluid) forcing @@ -233,14 +266,14 @@ subroutine dummy_user_f(u, v, w, j, k, l, e, t) integer, intent(in) :: l integer, intent(in) :: e real(kind=rp), intent(in) :: t - call neko_error('Dummy user defined forcing set') + call neko_error('Dummy user defined forcing set') end subroutine dummy_user_f !> Dummy user (scalar) forcing subroutine dummy_user_scalar_f_vector(f, t) - class(source_scalar_t), intent(inout) :: f + class(scalar_user_source_term_t), intent(inout) :: f real(kind=rp), intent(in) :: t - call neko_error('Dummy user defined vector valued forcing set') + call neko_error('Dummy user defined vector valued forcing set') end subroutine dummy_user_scalar_f_vector !> Dummy user (scalar) forcing @@ -251,9 +284,9 @@ subroutine dummy_scalar_user_f(s, j, k, l, e, t) integer, intent(in) :: l integer, intent(in) :: e real(kind=rp), intent(in) :: t - call neko_error('Dummy user defined forcing set') + call neko_error('Dummy user defined forcing set') end subroutine dummy_scalar_user_f - + !> Dummy user boundary condition for scalar subroutine dummy_scalar_user_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep) real(kind=rp), intent(inout) :: s @@ -269,17 +302,17 @@ subroutine dummy_scalar_user_bc(s, x, y, z, nx, ny, nz, ix, iy, iz, ie, t, tstep integer, intent(in) :: ie real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - call neko_warning('Dummy scalar user bc set, applied on all non-labeled zones') + call neko_warning('Dummy scalar user bc set, applied on all non-labeled zones') end subroutine dummy_scalar_user_bc - + !> Dummy user mesh apply subroutine dummy_user_mesh_setup(msh) type(mesh_t), intent(inout) :: msh end subroutine dummy_user_mesh_setup - + !> Dummy user check subroutine dummy_user_check(t, tstep, u, v, w, p, coef, params) - real(kind=rp), intent(in) :: t + real(kind=rp), intent(in) :: t integer, intent(in) :: tstep type(field_t), intent(inout) :: u type(field_t), intent(inout) :: v @@ -304,6 +337,16 @@ subroutine dummy_user_final_no_modules(t, params) type(json_file), intent(inout) :: params end subroutine dummy_user_final_no_modules + subroutine dirichlet_do_nothing(dirichlet_field_list, dirichlet_bc_list, & + coef, t, tstep, which_solver) + type(field_list_t), intent(inout) :: dirichlet_field_list + type(bc_list_t), intent(inout) :: dirichlet_bc_list + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + character(len=*), intent(in) :: which_solver + end subroutine dirichlet_do_nothing + subroutine dummy_user_material_properties(t, tstep, rho, mu, cp, lambda,& params) real(kind=rp), intent(in) :: t diff --git a/src/common/utils.f90 b/src/common/utils.f90 index 35fb782ab23..1a4cc6eab90 100644 --- a/src/common/utils.f90 +++ b/src/common/utils.f90 @@ -40,23 +40,23 @@ module utils interface neko_error module procedure neko_error_plain, neko_error_msg end interface neko_error - + contains - + !> Find position (in the string) of a filename's suffix pure function filename_suffix_pos(fname) result(suffix_pos) character(len=*), intent(in) :: fname integer :: suffix_pos - suffix_pos = scan(trim(fname), '.', back=.true.) + suffix_pos = scan(trim(fname), '.', back=.true.) end function filename_suffix_pos !> Find position (in the string) of a filename's trailing slash pure function filename_tslash_pos(fname) result(tslash_pos) character(len=*), intent(in) :: fname integer :: tslash_pos - tslash_pos = scan(trim(fname), '/', back=.true.) + tslash_pos = scan(trim(fname), '/', back=.true.) end function filename_tslash_pos - + !> Extract a filename's suffix subroutine filename_suffix(fname, suffix) character(len=*) :: fname @@ -76,12 +76,53 @@ subroutine filename_chsuffix(fname, new_fname, new_suffix) end subroutine filename_chsuffix + !> Split a string based on delimiter (tokenizer) + !! OBS: very hacky, this should really be improved, it is rather embarrasing code. + function split_string(string, delimiter) result(split_str) + character(len=*) :: string + character(len=*) :: delimiter + character(len=100), allocatable :: split_str(:) + integer :: length, i, i2,offset, j + i = 0 + offset = 1 + length = 1 + if (len(trim(string)) .eq. 0) then + allocate(split_str(1)) + split_str(1) = trim(string) + return + end if + do while( .true.) + i = scan(string(offset:), delimiter, back=.false.) + if (i .eq. 0) exit + length = length + 1 + offset = offset + i + end do + + allocate(split_str(length)) + i = 0 + j = 1 + offset=1 + do while( .true.) + i2 = scan(trim(string(offset:)), delimiter, back=.false.) + if (i2 .eq. 0) then + split_str(j) = trim(string(offset:)) + exit + end if + split_str(j) = trim(string(offset:offset+i2-2)) + offset = offset+i2 + j = j + 1 + end do + end function split_string + + + + !> Compute the address of a (i,j,k,l) array !! with sizes (1:lx, 1:ly, 1:lz, :) pure function linear_index(i,j,k,l,lx,ly,lz) result(index) integer, intent(in) :: i, j, k, l, lx, ly, lz integer :: index - + index = (i + lx * ((j - 1) + ly * ((k - 1) + lz * ((l - 1))))) end function linear_index @@ -104,11 +145,11 @@ pure function index_is_on_facet(i, j, k, lx, ly, lz, facet) result(is_on) case(6) if (k .eq. lz) is_on = .true. end select - + end function index_is_on_facet - - + + !> Compute (i,j,k,l) array given linear index !! with sizes (1:lx, 1:ly, 1:lz, :) pure function nonlinear_index(linear_index,lx,ly,lz) result(index) @@ -116,7 +157,7 @@ pure function nonlinear_index(linear_index,lx,ly,lz) result(index) integer :: index(4) integer :: lin_idx lin_idx = linear_index -1 - index(4) = lin_idx/(lx*ly*lz) + index(4) = lin_idx/(lx*ly*lz) index(3) = (lin_idx-(lx*ly*lz)*index(4))/(lx*ly) index(2) = (lin_idx-(lx*ly*lz)*index(4)-(lx*ly)*index(3))/lx index(1) = (lin_idx-(lx*ly*lz)*index(4)-(lx*ly)*index(3)-lx*index(2)) @@ -150,5 +191,5 @@ subroutine neko_warning(warning_msg) character(len=*) :: warning_msg write(*,*) '*** WARNING: ', warning_msg,' ***' end subroutine neko_warning - + end module utils diff --git a/src/device/cuda/nvtx.F90 b/src/device/cuda/nvtx.F90 index 32d7bac9bce..2fac8243211 100644 --- a/src/device/cuda/nvtx.F90 +++ b/src/device/cuda/nvtx.F90 @@ -1,42 +1,85 @@ !> Interface to NVTX +!! Based on https://github.com/maxcuda/NVTX_example module nvtx use, intrinsic :: iso_c_binding implicit none private integer, parameter :: NVTX_MAX_LEN = 256 + integer, parameter :: color(24) = [int(Z'00A6CEE3'), int(Z'001F78B4'), & + int(Z'00B2DF8A'), int(Z'0033A02C'), & + int(Z'00FB9A99'), int(Z'00E31A1C'), & + int(Z'00FDBF6F'), int(Z'00FF7F00'), & + int(Z'00CAB2D6'), int(Z'006A3D9A'), & + int(Z'00FFFF99'), int(Z'00B15928'), & + int(Z'008DD3C7'), int(Z'00FFFFB3'), & + int(Z'00BEBADA'), int(Z'00FB8072'), & + int(Z'0080B1D3'), int(Z'00FDB462'), & + int(Z'00B3DE69'), int(Z'00FCCDE5'), & + int(Z'00D9D9D9'), int(Z'00BC89BD'), & + int(Z'00CCEBC5'), int(Z'00FFED6F')] #ifdef HAVE_NVTX + + type, bind(c) :: nvtxEventAttributes + integer(c_int16_t) :: version = 1 + integer(c_int16_t) :: size = 48 + integer(c_int32_t) :: category = 0 + integer(c_int32_t) :: colortype = 1 + integer(c_int32_t) :: color + integer(c_int32_t) :: payloadtype = 0 + integer(c_int32_t) :: reserved0 + integer(c_int64_t) :: payload + integer(c_int) :: messagetype = 1 + type(c_ptr) :: message + end type nvtxEventAttributes + interface nvtxRangePushA subroutine nvtxRangePushA(name) bind(C, name='nvtxRangePushA') use iso_c_binding character(kind=c_char) :: name(256) end subroutine nvtxRangePushA end interface nvtxRangePushA - + + interface nvtxRangePushEx + subroutine nvtxRangePushEx(event) bind(C, name='nvtxRangePushEx') + use iso_c_binding + import :: nvtxEventAttributes + type(nvtxEventAttributes) :: event + end subroutine nvtxRangePushEx + end interface nvtxRangePushEx + interface nvtxRangePop subroutine nvtxRangePop() bind(C, name='nvtxRangePop') end subroutine nvtxRangePop end interface nvtxRangePop public :: nvtxStartRange, nvtxRangePushA, nvtxRangePop - + contains - - subroutine nvtxStartRange(name) + + subroutine nvtxStartRange(name, region_id) character(kind=c_char,len=*) :: name - character :: c_name(NVTX_MAX_LEN) - integer:: i, str_len - - str_len = len(trim(name)) + integer, optional :: region_id + type(nvtxEventAttributes) :: event + character, target :: c_name(NVTX_MAX_LEN) + integer :: i, str_len + + str_len = len(trim(name)) do i = 1, len(trim(name)) c_name(i) = name(i:i) end do c_name(str_len+1) = C_NULL_CHAR - - call nvtxRangePushA(c_name) + + if (present(region_id)) then + event%color = color(mod(region_id, 24) + 1) + event%message = c_loc(c_name) + call nvtxRangePushEx(event) + else + call nvtxRangePushA(c_name) + end if end subroutine nvtxStartRange - + #endif end module nvtx diff --git a/src/device/cuda_intf.F90 b/src/device/cuda_intf.F90 index 8b86d53bc33..37549d2a84b 100644 --- a/src/device/cuda_intf.F90 +++ b/src/device/cuda_intf.F90 @@ -50,22 +50,22 @@ module cuda_intf !> Low priority stream setting integer :: STRM_LOW_PRIO - + !> Enum @a cudaError enum, bind(c) - enumerator :: cudaSuccess = 0 - enumerator :: cudaErrorInvalidValue = 1 - enumerator :: cudaErrorMemoryAllocation = 2 - enumerator :: cudaErrorInitializationError = 3 + enumerator :: cudaSuccess = 0 + enumerator :: cudaErrorInvalidValue = 1 + enumerator :: cudaErrorMemoryAllocation = 2 + enumerator :: cudaErrorInitializationError = 3 end enum !> Enum @a cudaMemcpyKind enum, bind(c) - enumerator :: cudaMemcpyHostToHost = 0 - enumerator :: cudaMemcpyHostToDevice = 1 - enumerator :: cudaMemcpyDeviceToHost = 2 - enumerator :: cudaMemcpyDevicetoDevice = 3 - enumerator :: cudaMemcpyDefault = 4 + enumerator :: cudaMemcpyHostToHost = 0 + enumerator :: cudaMemcpyHostToDevice = 1 + enumerator :: cudaMemcpyDeviceToHost = 2 + enumerator :: cudaMemcpyDevicetoDevice = 3 + enumerator :: cudaMemcpyDefault = 4 end enum interface @@ -86,7 +86,7 @@ integer (c_int) function cudaFree(ptr_d) & type(c_ptr), value :: ptr_d end function cudaFree end interface - + interface integer (c_int) function cudaMemcpy(ptr_dst, ptr_src, s, dir) & bind(c, name='cudaMemcpy') @@ -108,7 +108,7 @@ integer (c_int) function cudaMemcpyAsync(ptr_dst, ptr_src, s, dir, stream) & integer(c_int), value :: dir end function cudaMemcpyAsync end interface - + interface integer (c_int) function cudaDeviceSynchronize() & bind(c, name='cudaDeviceSynchronize') @@ -155,7 +155,7 @@ integer (c_int) function cudaStreamCreateWithPriority(stream, flags, prio) & integer(c_int), value :: flags, prio end function cudaStreamCreateWithPriority end interface - + interface integer (c_int) function cudaStreamDestroy(steam) & bind(c, name='cudaStreamDestroy') @@ -165,7 +165,7 @@ integer (c_int) function cudaStreamDestroy(steam) & end function cudaStreamDestroy end interface - interface + interface integer (c_int) function cudaStreamSynchronize(stream) & bind(c, name='cudaStreamSynchronize') use, intrinsic :: iso_c_binding @@ -174,7 +174,7 @@ integer (c_int) function cudaStreamSynchronize(stream) & end function cudaStreamSynchronize end interface - interface + interface integer (c_int) function cudaStreamWaitEvent(stream, event, flags) & bind(c, name='cudaStreamWaitEvent') use, intrinsic :: iso_c_binding @@ -192,7 +192,7 @@ integer (c_int) function cudaDeviceGetStreamPriorityRange(low_prio, high_prio) & integer(c_int) :: low_prio, high_prio end function cudaDeviceGetStreamPriorityRange end interface - + interface integer (c_int) function cudaProfilerStart() & bind(c, name='cudaProfilerStart') @@ -226,7 +226,7 @@ integer (c_int) function cudaEventDestroy(event) & type(c_ptr), value :: event end function cudaEventDestroy end interface - + interface integer (c_int) function cudaEventCreateWithFlags(event, flags) & bind(c, name='cudaEventCreateWithFlags') @@ -246,7 +246,7 @@ integer (c_int) function cudaEventRecord(event, stream) & end function cudaEventRecord end interface - interface + interface integer (c_int) function cudaEventSynchronize(event) & bind(c, name='cudaEventSynchronize') use, intrinsic :: iso_c_binding @@ -272,7 +272,7 @@ integer (c_int) function cudaSetDevice(device) & integer(c_int), value :: device end function cudaSetDevice end interface - + contains subroutine cuda_init @@ -297,10 +297,10 @@ subroutine cuda_init end if !$omp end parallel end if - + if (cudaDeviceGetStreamPriorityRange(STRM_LOW_PRIO, STRM_HIGH_PRIO) & .ne. cudaSuccess) then - call neko_error('Error retrieving stream priority range') + call neko_error('Error retrieving stream priority range') end if if (cudaStreamCreateWithPriority(glb_cmd_queue, 1, STRM_HIGH_PRIO) & @@ -323,7 +323,7 @@ subroutine cuda_finalize call neko_error('Error destroying aux stream') end if end subroutine cuda_finalize - + subroutine cuda_device_name(name) character(len=*), intent(inout) :: name character(kind=c_char, len=8192), target :: prop @@ -337,17 +337,17 @@ subroutine cuda_device_name(name) ! ! This will of course break if sizeof(cudaDeviceProp) > 8192 ! - + if (cudaGetDeviceProperties(c_loc(prop), 0) .ne. cudaSuccess) then call neko_error('Failed to query device') end if - + end_pos = scan(prop(1:256), C_NULL_CHAR) if(end_pos .ge. 2) then name(1:end_pos-1) = prop(1:end_pos-1) endif end subroutine cuda_device_name - + #endif - + end module cuda_intf diff --git a/src/device/device.F90 b/src/device/device.F90 index e8e7d161e19..89bf48cf7a0 100644 --- a/src/device/device.F90 +++ b/src/device/device.F90 @@ -38,7 +38,7 @@ module device use hip_intf use htable, only : htable_cptr_t, h_cptr_t use utils, only : neko_error - use dummy_device + use dummy_device use opencl_prgm_lib use, intrinsic :: iso_c_binding implicit none @@ -76,7 +76,7 @@ module device module procedure device_deassociate_r1, device_deassociate_r2, & device_deassociate_r3, device_deassociate_r4 end interface device_deassociate - + !> Return the device pointer for an associated Fortran array interface device_get_ptr module procedure device_get_ptr_r1, device_get_ptr_r2, & @@ -87,7 +87,7 @@ module device interface device_sync module procedure device_sync_device, device_sync_stream end interface device_sync - + !> Table of host to device address mappings type(htable_cptr_t), private :: device_addrtbl @@ -100,7 +100,7 @@ module device device_stream_wait_event private :: device_memcpy_common - + contains subroutine device_init @@ -115,7 +115,7 @@ subroutine device_init call opencl_init #endif -#endif +#endif end subroutine device_init subroutine device_finalize @@ -145,7 +145,7 @@ subroutine device_name(name) call opencl_device_name(name) #endif end subroutine device_name - + !> Allocate memory on the device subroutine device_alloc(x_d, s) type(c_ptr), intent(inout) :: x_d @@ -192,21 +192,10 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm) class(*), intent(inout), target :: x(:) type(c_ptr), intent(inout) :: x_d integer, intent(in), value :: dir - logical, optional :: sync + logical :: sync type(c_ptr), optional :: strm type(c_ptr) :: ptr_h, copy_stream integer(c_size_t) :: s - logical :: sync_device - - if (present(sync)) then - sync_device = sync - else -#if defined(HAVE_CUDA) || defined (HAVE_HIP) - sync_device = .false. -#else - sync_device = .true. -#endif - end if if (present(strm)) then copy_stream = strm @@ -218,7 +207,7 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm) type is (integer) s = n * 4 ptr_h = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) s = n * 8 ptr_h = c_loc(x) type is (real) @@ -231,8 +220,8 @@ subroutine device_memcpy_r1(x, x_d, n, dir, sync, strm) call neko_error('Unknown Fortran type') end select - call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream) - + call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream) + end subroutine device_memcpy_r1 !> Copy data between host and device (rank 2 arrays) @@ -241,21 +230,10 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm) class(*), intent(inout), target :: x(:,:) type(c_ptr), intent(inout) :: x_d integer, intent(in), value :: dir - logical, optional :: sync + logical :: sync type(c_ptr), optional :: strm type(c_ptr) :: ptr_h, copy_stream integer(c_size_t) :: s - logical :: sync_device - - if (present(sync)) then - sync_device = sync - else -#if defined(HAVE_CUDA) || defined (HAVE_HIP) - sync_device = .false. -#else - sync_device = .true. -#endif - end if if (present(strm)) then copy_stream = strm @@ -267,7 +245,7 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm) type is (integer) s = n * 4 ptr_h = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) s = n * 8 ptr_h = c_loc(x) type is (real) @@ -280,8 +258,8 @@ subroutine device_memcpy_r2(x, x_d, n, dir, sync, strm) call neko_error('Unknown Fortran type') end select - call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream) - + call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream) + end subroutine device_memcpy_r2 !> Copy data between host and device (rank 3 arrays) @@ -290,33 +268,22 @@ subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm) class(*), intent(inout), target :: x(:,:,:) type(c_ptr), intent(inout) :: x_d integer, intent(in), value :: dir - logical, optional :: sync + logical :: sync type(c_ptr), optional :: strm type(c_ptr) :: ptr_h, copy_stream integer(c_size_t) :: s - logical :: sync_device - - if (present(sync)) then - sync_device = sync - else -#if defined(HAVE_CUDA) || defined (HAVE_HIP) - sync_device = .false. -#else - sync_device = .true. -#endif - end if if (present(strm)) then copy_stream = strm else copy_stream = glb_cmd_queue end if - + select type(x) type is (integer) s = n * 4 ptr_h = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) s = n * 8 ptr_h = c_loc(x) type is (real) @@ -329,8 +296,8 @@ subroutine device_memcpy_r3(x, x_d, n, dir, sync, strm) call neko_error('Unknown Fortran type') end select - call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream) - + call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream) + end subroutine device_memcpy_r3 !> Copy data between host and device (rank 4 arrays) @@ -339,33 +306,22 @@ subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm) class(*), intent(inout), target :: x(:,:,:,:) type(c_ptr), intent(inout) :: x_d integer, intent(in), value :: dir - logical, optional :: sync + logical :: sync type(c_ptr), optional :: strm type(c_ptr) :: ptr_h, copy_stream - integer(c_size_t) :: s - logical :: sync_device - - if (present(sync)) then - sync_device = sync - else -#if defined(HAVE_CUDA) || defined (HAVE_HIP) - sync_device = .false. -#else - sync_device = .true. -#endif - end if + integer(c_size_t) :: s if (present(strm)) then copy_stream = strm else copy_stream = glb_cmd_queue end if - + select type(x) type is (integer) s = n * 4 ptr_h = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) s = n * 8 ptr_h = c_loc(x) type is (real) @@ -378,8 +334,8 @@ subroutine device_memcpy_r4(x, x_d, n, dir, sync, strm) call neko_error('Unknown Fortran type') end select - call device_memcpy_common(ptr_h, x_d, s, dir, sync_device, copy_stream) - + call device_memcpy_common(ptr_h, x_d, s, dir, sync, copy_stream) + end subroutine device_memcpy_r4 !> Copy data between host and device (or device and device) (c-pointers) @@ -398,11 +354,7 @@ subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm) if (present(sync)) then sync_device = sync else -#if defined(HAVE_CUDA) || defined (HAVE_HIP) sync_device = .false. -#else - sync_device = .true. -#endif end if if (present(strm)) then @@ -412,9 +364,9 @@ subroutine device_memcpy_cptr(dst, src, s, dir, sync, strm) end if call device_memcpy_common(dst, src, s, dir, sync_device, copy_stream) - + end subroutine device_memcpy_cptr - + !> Copy data between host and device !! @note For device to device copies, @a ptr_h is assumed !! to be the dst device pointer @@ -431,12 +383,12 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream) hipMemcpyHostToDevice, stream) .ne. hipSuccess) then call neko_error('Device memcpy async (host-to-device) failed') end if - else if (dir .eq. DEVICE_TO_HOST) then + else if (dir .eq. DEVICE_TO_HOST) then if (hipMemcpyAsync(ptr_h, x_d, s, & hipMemcpyDeviceToHost, stream) .ne. hipSuccess) then call neko_error('Device memcpy async (device-to-host) failed') end if - else if (dir .eq. DEVICE_TO_DEVICE) then + else if (dir .eq. DEVICE_TO_DEVICE) then if (hipMemcpyAsync(ptr_h, x_d, s, & hipMemcpyDeviceToDevice, stream) .ne. hipSuccess) then call neko_error('Device memcpy async (device-to-device) failed') @@ -453,12 +405,12 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream) cudaMemcpyHostToDevice, stream) .ne. cudaSuccess) then call neko_error('Device memcpy async (host-to-device) failed') end if - else if (dir .eq. DEVICE_TO_HOST) then + else if (dir .eq. DEVICE_TO_HOST) then if (cudaMemcpyAsync(ptr_h, x_d, s, & cudaMemcpyDeviceToHost, stream) .ne. cudaSuccess) then call neko_error('Device memcpy async (device-to-host) failed') end if - else if (dir .eq. DEVICE_TO_DEVICE) then + else if (dir .eq. DEVICE_TO_DEVICE) then if (cudaMemcpyAsync(ptr_h, x_d, s, & cudaMemcpyDeviceToDevice, stream) .ne. cudaSuccess) then call neko_error('Device memcpy async (device-to-device) failed') @@ -479,7 +431,7 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream) else if (dir .eq. DEVICE_TO_HOST) then if (clEnqueueReadBuffer(glb_cmd_queue, x_d, CL_TRUE, 0_i8, s, ptr_h, & 0, C_NULL_PTR, C_NULL_PTR) .ne. CL_SUCCESS) then - call neko_error('Device memcpy (host-to-device) failed') + call neko_error('Device memcpy (device-to-host) failed') end if else if (dir .eq. DEVICE_TO_DEVICE) then if (clEnqueueCopyBuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, & @@ -498,7 +450,7 @@ subroutine device_memcpy_common(ptr_h, x_d, s, dir, sync_device, stream) else if (dir .eq. DEVICE_TO_HOST) then if (clEnqueueReadBuffer(glb_cmd_queue, x_d, CL_FALSE, 0_i8, s, ptr_h, & 0, C_NULL_PTR, C_NULL_PTR) .ne. CL_SUCCESS) then - call neko_error('Device memcpy (host-to-device) failed') + call neko_error('Device memcpy (device-to-host) failed') end if else if (dir .eq. DEVICE_TO_DEVICE) then if (clEnqueueCopyBuffer(glb_cmd_queue, x_d, ptr_h, 0_i8, 0_i8, s, & @@ -521,7 +473,7 @@ subroutine device_associate_r1(x, x_d) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -532,7 +484,7 @@ subroutine device_associate_r1(x, x_d) end select htbl_ptr_d%ptr = x_d - + call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d) end subroutine device_associate_r1 @@ -546,7 +498,7 @@ subroutine device_associate_r2(x, x_d) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -557,7 +509,7 @@ subroutine device_associate_r2(x, x_d) end select htbl_ptr_d%ptr = x_d - + call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d) end subroutine device_associate_r2 @@ -571,7 +523,7 @@ subroutine device_associate_r3(x, x_d) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -582,7 +534,7 @@ subroutine device_associate_r3(x, x_d) end select htbl_ptr_d%ptr = x_d - + call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d) end subroutine device_associate_r3 @@ -596,7 +548,7 @@ subroutine device_associate_r4(x, x_d) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -607,7 +559,7 @@ subroutine device_associate_r4(x, x_d) end select htbl_ptr_d%ptr = x_d - + call device_addrtbl%set(htbl_ptr_h, htbl_ptr_d) end subroutine device_associate_r4 @@ -620,7 +572,7 @@ subroutine device_deassociate_r1(x) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -644,7 +596,7 @@ subroutine device_deassociate_r2(x) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -653,7 +605,7 @@ subroutine device_deassociate_r2(x) class default call neko_error('Unknown Fortran type') end select - + if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then call device_addrtbl%remove(htbl_ptr_h) end if @@ -668,7 +620,7 @@ subroutine device_deassociate_r3(x) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -692,7 +644,7 @@ subroutine device_deassociate_r4(x) select type(x) type is (integer) htbl_ptr_h%ptr = c_loc(x) - type is (integer(i8)) + type is (integer(i8)) htbl_ptr_h%ptr = c_loc(x) type is (real) htbl_ptr_h%ptr = c_loc(x) @@ -707,7 +659,7 @@ subroutine device_deassociate_r4(x) end if end subroutine device_deassociate_r4 - + !> Map a Fortran rank 1 array to a device (allocate and associate) subroutine device_map_r1(x, x_d, n) integer, intent(in) :: n @@ -846,9 +798,9 @@ function device_associated_r1(x) result(assoc) if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then assoc = .true. else - assoc = .false. + assoc = .false. end if - + end function device_associated_r1 !> Check if a Fortran rank 2 array is assoicated with a device pointer @@ -873,9 +825,9 @@ function device_associated_r2(x) result(assoc) if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then assoc = .true. else - assoc = .false. + assoc = .false. end if - + end function device_associated_r2 !> Check if a Fortran rank 3 array is assoicated with a device pointer @@ -900,9 +852,9 @@ function device_associated_r3(x) result(assoc) if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then assoc = .true. else - assoc = .false. + assoc = .false. end if - + end function device_associated_r3 !> Check if a Fortran rank 4 array is assoicated with a device pointer @@ -927,9 +879,9 @@ function device_associated_r4(x) result(assoc) if (device_addrtbl%get(htbl_ptr_h, htbl_ptr_d) .eq. 0) then assoc = .true. else - assoc = .false. + assoc = .false. end if - + end function device_associated_r4 !> Return the device pointer for an associated Fortran rank 1 array @@ -1043,7 +995,7 @@ function device_get_ptr_r4(x) call neko_error('Array not associated with device') end if end function device_get_ptr_r4 - + !> Synchronize the device subroutine device_sync_device() #ifdef HAVE_HIP @@ -1108,7 +1060,7 @@ subroutine device_stream_create(stream, flags) stream = clCreateCommandQueue(glb_ctx, glb_device_id, 0_i8, ierr) if (ierr .ne. CL_SUCCESS) then call neko_error('Error during stream create') - end if + end if #endif end subroutine device_stream_create @@ -1141,7 +1093,7 @@ subroutine device_stream_destroy(stream) call neko_error('Error during stream destroy') end if #elif HAVE_OPENCL - if (clReleaseCommandQueue(stream) .eq. CL_SUCCESS) then + if (clReleaseCommandQueue(stream) .ne. CL_SUCCESS) then call neko_error('Error during stream destroy') end if #endif @@ -1166,7 +1118,7 @@ subroutine device_stream_wait_event(stream, event, flags) ! end if #endif end subroutine device_stream_wait_event - + !> Start device profiling subroutine device_profiler_start() #if HAVE_CUDA @@ -1175,7 +1127,7 @@ subroutine device_profiler_start() end if #endif end subroutine device_profiler_start - + !> Stop device profiling subroutine device_profiler_stop() #if HAVE_CUDA @@ -1232,7 +1184,7 @@ subroutine device_event_destroy(event) end if #endif end subroutine device_event_destroy - + !> Record a device event subroutine device_event_record(event, stream) type(c_ptr), intent(in) :: event @@ -1264,11 +1216,11 @@ subroutine device_event_sync(event) call neko_error('Error during event sync') end if #elif HAVE_OPENCL - if (clWaitForEvents(1, event) .eq. CL_SUCCESS) then + if (clWaitForEvents(1, event) .ne. CL_SUCCESS) then call neko_error('Error during event sync') end if #endif end subroutine device_event_sync - + end module device diff --git a/src/device/device_config.h.in b/src/device/device_config.h.in index d38ccce302d..f829d6517dd 100644 --- a/src/device/device_config.h.in +++ b/src/device/device_config.h.in @@ -1,3 +1,6 @@ +#ifndef __DEVICE_DEVICE_CONFIG__ +#define __DEVICE_DEVICE_CONFIG__ + /** * Parameters and options for device backends */ @@ -11,3 +14,5 @@ typedef @NEKO_DEV_REAL_TYPE@ real; extern void *glb_ctx; extern void *glb_cmd_queue; extern void *glb_device_id; + +#endif // __DEVICE_DEVICE_CONFIG__ \ No newline at end of file diff --git a/src/device/dummy_device.F90 b/src/device/dummy_device.F90 index 07bc2d0e82b..babbfb9f99f 100644 --- a/src/device/dummy_device.F90 +++ b/src/device/dummy_device.F90 @@ -45,5 +45,5 @@ module dummy_device type(c_ptr), public, bind(c) :: aux_cmd_queue = C_NULL_PTR #endif - + end module dummy_device diff --git a/src/device/hip/roctx.F90 b/src/device/hip/roctx.F90 index 24914ea740d..cc475eb492b 100644 --- a/src/device/hip/roctx.F90 +++ b/src/device/hip/roctx.F90 @@ -3,7 +3,7 @@ module roctx use, intrinsic :: iso_c_binding implicit none private - + integer, parameter :: ROCTX_MAX_LEN = 256 #ifdef HAVE_ROCTX @@ -13,30 +13,30 @@ subroutine roctxRangePushA(name) bind(C, name='roctxRangePushA') character(kind=c_char) :: name(256) end subroutine roctxRangePushA end interface roctxRangePushA - + interface roctxRangePop subroutine roctxRangePop() bind(C, name='roctxRangePop') end subroutine roctxRangePop end interface roctxRangePop public :: roctxStartRange, roctxRangePushA, roctxRangePop - + contains - + subroutine roctxStartRange(name) character(kind=c_char,len=*) :: name character :: c_name(ROCTX_MAX_LEN) integer:: i, str_len - - str_len = len(trim(name)) + + str_len = len(trim(name)) do i = 1, len(trim(name)) c_name(i) = name(i:i) end do c_name(str_len+1) = C_NULL_CHAR - + call roctxRangePushA(c_name) end subroutine roctxStartRange - + #endif end module roctx diff --git a/src/device/hip_intf.F90 b/src/device/hip_intf.F90 index 87a3c8ddb97..bbddaaeff19 100644 --- a/src/device/hip_intf.F90 +++ b/src/device/hip_intf.F90 @@ -49,43 +49,43 @@ module hip_intf !> Low priority stream setting integer :: STRM_LOW_PRIO - + !> Enum @a hipError_t enum, bind(c) - enumerator :: hipSuccess = 0 - enumerator :: hipErrorInvalidContext = 1 - enumerator :: hipErrorInvalidKernelFile = 2 - enumerator :: hipErrorMemoryAllocation = 3 - enumerator :: hipErrorInitializationError = 4 - enumerator :: hipErrorLaunchFailure = 5 - enumerator :: hipErrorLaunchOutOfResources = 6 - enumerator :: hipErrorInvalidDevice = 7 - enumerator :: hipErrorInvalidValue = 8 - enumerator :: hipErrorInvalidDevicePointer = 9 - enumerator :: hipErrorInvalidMemcpyDirection = 10 - enumerator :: hipErrorUnknown = 11 - enumerator :: hipErrorInvalidResourceHandle = 12 - enumerator :: hipErrorNotReady = 13 - enumerator :: hipErrorNoDevice = 14 - enumerator :: hipErrorPeerAccessAlreadyEnabled = 15 - enumerator :: hipErrorPeerAccessNotEnabled = 16 - enumerator :: hipErrorRuntimeMemory = 17 - enumerator :: hipErrorRuntimeOther = 18 - enumerator :: hipErrorHostMemoryAlreadyRegistered = 19 - enumerator :: hipErrorHostMemoryNotRegistered = 20 - enumerator :: hipErrorMapBufferObjectFailed = 21 - enumerator :: hipErrorTbd = 22 + enumerator :: hipSuccess = 0 + enumerator :: hipErrorInvalidContext = 1 + enumerator :: hipErrorInvalidKernelFile = 2 + enumerator :: hipErrorMemoryAllocation = 3 + enumerator :: hipErrorInitializationError = 4 + enumerator :: hipErrorLaunchFailure = 5 + enumerator :: hipErrorLaunchOutOfResources = 6 + enumerator :: hipErrorInvalidDevice = 7 + enumerator :: hipErrorInvalidValue = 8 + enumerator :: hipErrorInvalidDevicePointer = 9 + enumerator :: hipErrorInvalidMemcpyDirection = 10 + enumerator :: hipErrorUnknown = 11 + enumerator :: hipErrorInvalidResourceHandle = 12 + enumerator :: hipErrorNotReady = 13 + enumerator :: hipErrorNoDevice = 14 + enumerator :: hipErrorPeerAccessAlreadyEnabled = 15 + enumerator :: hipErrorPeerAccessNotEnabled = 16 + enumerator :: hipErrorRuntimeMemory = 17 + enumerator :: hipErrorRuntimeOther = 18 + enumerator :: hipErrorHostMemoryAlreadyRegistered = 19 + enumerator :: hipErrorHostMemoryNotRegistered = 20 + enumerator :: hipErrorMapBufferObjectFailed = 21 + enumerator :: hipErrorTbd = 22 end enum - + !> Enum @a hipMemcpyKind enum, bind(c) - enumerator :: hipMemcpyHostToHost = 0 - enumerator :: hipMemcpyHostToDevice = 1 - enumerator :: hipMemcpyDeviceToHost = 2 - enumerator :: hipMemcpyDevicetoDevice = 3 - enumerator :: hipMemcpyDefault = 4 + enumerator :: hipMemcpyHostToHost = 0 + enumerator :: hipMemcpyHostToDevice = 1 + enumerator :: hipMemcpyDeviceToHost = 2 + enumerator :: hipMemcpyDevicetoDevice = 3 + enumerator :: hipMemcpyDefault = 4 end enum - + interface integer (c_int) function hipMalloc(ptr_d, s) & bind(c, name='hipMalloc') @@ -104,7 +104,7 @@ integer (c_int) function hipFree(ptr_d) & type(c_ptr), value :: ptr_d end function hipFree end interface - + interface integer (c_int) function hipMemcpy(ptr_dst, ptr_src, s, dir) & bind(c, name='hipMemcpy') @@ -126,7 +126,7 @@ integer (c_int) function hipMemcpyAsync(ptr_dst, ptr_src, s, dir, stream) & integer(c_int), value :: dir end function hipMemcpyAsync end interface - + interface integer (c_int) function hipDeviceSynchronize() & bind(c, name='hipDeviceSynchronize') @@ -184,7 +184,7 @@ integer (c_int) function hipStreamDestroy(steam) & end function hipStreamDestroy end interface - interface + interface integer (c_int) function hipStreamSynchronize(stream) & bind(c, name='hipStreamSynchronize') use, intrinsic :: iso_c_binding @@ -192,8 +192,8 @@ integer (c_int) function hipStreamSynchronize(stream) & type(c_ptr), value :: stream end function hipStreamSynchronize end interface - - interface + + interface integer (c_int) function hipStreamWaitEvent(stream, event, flags) & bind(c, name='hipStreamWaitEvent') use, intrinsic :: iso_c_binding @@ -211,7 +211,7 @@ integer (c_int) function hipDeviceGetStreamPriorityRange(low_prio, high_prio) & integer(c_int) :: low_prio, high_prio end function hipDeviceGetStreamPriorityRange end interface - + interface integer (c_int) function hipEventCreate(event) & bind(c, name='hipEventCreate') @@ -249,7 +249,7 @@ integer (c_int) function hipEventRecord(event, stream) & end function hipEventRecord end interface - interface + interface integer (c_int) function hipEventSynchronize(event) & bind(c, name='hipEventSynchronize') use, intrinsic :: iso_c_binding @@ -257,13 +257,13 @@ integer (c_int) function hipEventSynchronize(event) & type(c_ptr), value :: event end function hipEventSynchronize end interface - + contains subroutine hip_init if (hipDeviceGetStreamPriorityRange(STRM_LOW_PRIO, STRM_HIGH_PRIO) & - .ne. hipSuccess) then + .ne. hipSuccess) then call neko_error('Error retrieving stream priority range') end if @@ -287,12 +287,12 @@ subroutine hip_finalize call neko_error('Error destroying aux stream') end if end subroutine hip_finalize - + subroutine hip_device_name(name) character(len=*), intent(inout) :: name character(kind=c_char, len=1024), target :: c_name integer :: end_pos - + if (hipDeviceGetName(c_loc(c_name), 1024, 0) .ne. hipSuccess) then call neko_error('Failed to query device') end if @@ -305,5 +305,5 @@ subroutine hip_device_name(name) end subroutine hip_device_name #endif - + end module hip_intf diff --git a/src/device/opencl/check.c b/src/device/opencl/check.c index 36817a863db..a50bad33e5d 100644 --- a/src/device/opencl/check.c +++ b/src/device/opencl/check.c @@ -40,6 +40,7 @@ #include #include +#include #include #define CL_ERR_STR(err) case err: return #err diff --git a/src/device/opencl/jit.c b/src/device/opencl/jit.c index eab32c27864..d6930448029 100644 --- a/src/device/opencl/jit.c +++ b/src/device/opencl/jit.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include diff --git a/src/device/opencl/jit.h b/src/device/opencl/jit.h index b31ad84a28f..50d19c0fabc 100644 --- a/src/device/opencl/jit.h +++ b/src/device/opencl/jit.h @@ -1,3 +1,6 @@ +#ifndef __CL_JIT_H +#define __CL_JIT_H + #ifdef __APPLE__ #include #else @@ -5,3 +8,5 @@ #endif void opencl_kernel_jit(const char *kernel, cl_program *program); + +#endif diff --git a/src/device/opencl/prgm_lib.F90 b/src/device/opencl/prgm_lib.F90 index 60bb1ed2626..be4fe2953fa 100644 --- a/src/device/opencl/prgm_lib.F90 +++ b/src/device/opencl/prgm_lib.F90 @@ -39,7 +39,7 @@ module opencl_prgm_lib !> Device onvective kernels type(c_ptr), bind(c) :: conv1_program = C_NULL_PTR - + !> Device CFL kernels type(c_ptr), bind(c) :: cfl_program = C_NULL_PTR @@ -253,16 +253,16 @@ subroutine opencl_prgm_lib_release end if scalar_residual_program = C_NULL_PTR end if - + if (c_associated(lambda2_program)) then if(clReleaseProgram(lambda2_program) .ne. CL_SUCCESS) then call neko_error('Failed to release program') end if lambda2_program = C_NULL_PTR end if - + end subroutine opencl_prgm_lib_release #endif - + end module opencl_prgm_lib diff --git a/src/device/opencl_intf.F90 b/src/device/opencl_intf.F90 index 156a7fe2074..677523a23a2 100644 --- a/src/device/opencl_intf.F90 +++ b/src/device/opencl_intf.F90 @@ -36,7 +36,7 @@ module opencl_intf use utils, only : neko_error use, intrinsic :: iso_c_binding implicit none - + #ifdef HAVE_OPENCL !> Global OpenCL command queue @@ -50,57 +50,57 @@ module opencl_intf !> Global OpenCL device_id type(c_ptr), bind(c), target :: glb_device_id = C_NULL_PTR - + !> Enum Error Codes enum, bind(c) - enumerator :: CL_SUCCESS = 0 - enumerator :: CL_DEVICE_NOT_FOUND = -1 - enumerator :: CL_DEVICE_NOT_AVAILABLE = -2 - enumerator :: CL_COMPILER_NOT_AVAILABLE = -3 - enumerator :: CL_MEM_OBJECT_ALLOCATION_FAILURE = -4 - enumerator :: CL_OUT_OF_RESOURCES = -5 - enumerator :: CL_OUT_OF_HOST_MEMORY = -6 - enumerator :: CL_PROFILING_INFO_NOT_AVAILABLE = -7 - enumerator :: CL_MEM_COPY_OVERLAP = -8 - enumerator :: CL_IMAGE_FORMAT_MISMATCH = -9 - enumerator :: CL_IMAGE_FORMAT_NOT_SUPPORTED = -10 - enumerator :: CL_BUILD_PROGRAM_FAILURE = -11 - enumerator :: CL_MAP_FAILURE = -12 + enumerator :: CL_SUCCESS = 0 + enumerator :: CL_DEVICE_NOT_FOUND = -1 + enumerator :: CL_DEVICE_NOT_AVAILABLE = -2 + enumerator :: CL_COMPILER_NOT_AVAILABLE = -3 + enumerator :: CL_MEM_OBJECT_ALLOCATION_FAILURE = -4 + enumerator :: CL_OUT_OF_RESOURCES = -5 + enumerator :: CL_OUT_OF_HOST_MEMORY = -6 + enumerator :: CL_PROFILING_INFO_NOT_AVAILABLE = -7 + enumerator :: CL_MEM_COPY_OVERLAP = -8 + enumerator :: CL_IMAGE_FORMAT_MISMATCH = -9 + enumerator :: CL_IMAGE_FORMAT_NOT_SUPPORTED = -10 + enumerator :: CL_BUILD_PROGRAM_FAILURE = -11 + enumerator :: CL_MAP_FAILURE = -12 end enum !> Enum mem flags enum, bind(c) - enumerator :: CL_MEM_READ_WRITE = 1 - enumerator :: CL_MEM_WRITE_ONLY = 2 - enumerator :: CL_MEM_READ_ONLY = 4 - enumerator :: CL_MEM_USE_HOST_PTR = 8 - enumerator :: CL_MEM_ALLOC_HOST_PTR = 16 - enumerator :: CL_MEM_HOST_WRITE_ONLY = 128 - enumerator :: CL_MEM_HOST_READ_ONLY = 256 - enumerator :: CL_MEM_HOST_NO_ACCESS = 512 + enumerator :: CL_MEM_READ_WRITE = 1 + enumerator :: CL_MEM_WRITE_ONLY = 2 + enumerator :: CL_MEM_READ_ONLY = 4 + enumerator :: CL_MEM_USE_HOST_PTR = 8 + enumerator :: CL_MEM_ALLOC_HOST_PTR = 16 + enumerator :: CL_MEM_HOST_WRITE_ONLY = 128 + enumerator :: CL_MEM_HOST_READ_ONLY = 256 + enumerator :: CL_MEM_HOST_NO_ACCESS = 512 end enum !> Enum event flags enum, bind(c) - enumerator :: CL_COMPLETE = 0 - enumerator :: CL_RUNNING = 1 - enumerator :: CL_SUBMITTED = 2 - enumerator :: CL_QUEUED = 3 + enumerator :: CL_COMPLETE = 0 + enumerator :: CL_RUNNING = 1 + enumerator :: CL_SUBMITTED = 2 + enumerator :: CL_QUEUED = 3 end enum !> Enum boolean enum, bind(c) - enumerator :: CL_FALSE = 0 - enumerator :: CL_TRUE = 1 + enumerator :: CL_FALSE = 0 + enumerator :: CL_TRUE = 1 end enum enum, bind(c) - enumerator :: CL_CONTEXT_PLATFORM = int(Z'1084') + enumerator :: CL_CONTEXT_PLATFORM = int(Z'1084') end enum !> Enum device info enum, bind(c) - enumerator :: CL_DEVICE_NAME = 4139 + enumerator :: CL_DEVICE_NAME = 4139 end enum !> Device types @@ -110,10 +110,10 @@ module opencl_intf integer(c_int64_t), parameter :: CL_DEVICE_TYPE_ACCELERATOR = 8 integer(c_int64_t), parameter :: CL_DEVICE_TYPE_CUSTOM = 16 integer(c_int64_t), parameter :: CL_DEVICE_TYPE_ALL = int(Z'FFFFFFFF', i8) - + interface integer (c_int) function clGetPlatformIDs(num_entries, & - platforms, num_platforms) bind(c, name='clGetPlatformIDs') + platforms, num_platforms) bind(c, name='clGetPlatformIDs') use, intrinsic :: iso_c_binding implicit none integer(c_int), value :: num_entries @@ -253,7 +253,7 @@ end function clEnqueueMarkerWithWaitList interface integer (c_int) function clEnqueueMarker(cmd_queue, event) & - bind(c, name='clEnqueueMarker') + bind(c, name='clEnqueueMarker') use, intrinsic :: iso_c_binding implicit none type(c_ptr), value :: cmd_queue @@ -291,7 +291,7 @@ integer (c_int) function clSetUserEventStatus(event, status) & integer(c_int), value :: status end function clSetUserEventStatus end interface - + interface integer (c_int) function clGetDeviceInfo(device, param_name, & param_value_size, param_value, param_value_size_ret) & @@ -305,7 +305,7 @@ integer (c_int) function clGetDeviceInfo(device, param_name, & type(c_ptr), value :: param_value_size_ret end function clGetDeviceInfo end interface - + interface integer (c_int) function clReleaseContext(context) & bind(c, name='clReleaseContext') @@ -314,7 +314,7 @@ integer (c_int) function clReleaseContext(context) & type(c_ptr), value :: context end function clReleaseContext end interface - + interface integer (c_int) function clReleaseCommandQueue(queue) & bind(c, name='clReleaseCommandQueue') @@ -323,7 +323,7 @@ integer (c_int) function clReleaseCommandQueue(queue) & type(c_ptr), value :: queue end function clReleaseCommandQueue end interface - + interface integer (c_int) function clReleaseDevice(device) & bind(c, name='clReleaseDevice') @@ -341,7 +341,7 @@ integer (c_int) function clReleaseProgram(prgm) & type(c_ptr), value :: prgm end function clReleaseProgram end interface - + interface integer (c_int) function clReleaseMemObject(ptr_d) & bind(c, name='clReleaseMemObject') @@ -369,7 +369,7 @@ integer (c_int) function clFlush(cmd_queue) & end function clFlush end interface - interface + interface integer (c_int) function clFinish(cmd_queue) & bind(c, name='clFinish') use, intrinsic :: iso_c_binding @@ -377,7 +377,7 @@ integer (c_int) function clFinish(cmd_queue) & type(c_ptr), value :: cmd_queue end function clFinish end interface - + contains subroutine opencl_init @@ -415,7 +415,7 @@ subroutine opencl_init call neko_error('Faield to release command queue') end if end if - + glb_cmd_queue = clCreateCommandQueue(glb_ctx, glb_device_id, queue_props, ierr) if (ierr .ne. CL_SUCCESS) then @@ -448,7 +448,7 @@ subroutine opencl_finalize call neko_error('Faield to release device') end if end if - + end subroutine opencl_finalize subroutine opencl_device_name(name) @@ -460,11 +460,11 @@ subroutine opencl_device_name(name) c_loc(c_name), c_loc(name_len)) .ne. CL_SUCCESS) then call neko_error('Failed to query device') end if - + name(1:name_len) = c_name(1:name_len) - + end subroutine opencl_device_name - + #endif - + end module opencl_intf diff --git a/src/driver.f90 b/src/driver.f90 index c4467c69de9..44fc83285a3 100644 --- a/src/driver.f90 +++ b/src/driver.f90 @@ -2,7 +2,7 @@ program turboneko use neko type(case_t), target :: C - + call neko_init(C) call neko_solve(C) call neko_finalize(C) diff --git a/src/field/field.f90 b/src/field/field.f90 index 898f6a2377d..b9a165dff55 100644 --- a/src/field/field.f90 +++ b/src/field/field.f90 @@ -42,10 +42,10 @@ module field use dofmap, only : dofmap_t implicit none private - + type, public :: field_t real(kind=rp), allocatable :: x(:,:,:,:) !< Field data - + type(space_t), pointer :: Xh !< Function space \f$ X_h \f$ type(mesh_t), pointer :: msh !< Mesh type(dofmap_t), pointer :: dof !< Dofmap @@ -62,8 +62,10 @@ module field procedure, private, pass(this) :: assign_field => field_assign_field procedure, private, pass(this) :: assign_scalar => field_assign_scalar procedure, private, pass(this) :: add_field => field_add_field - procedure, private, pass(this) :: add_Scalar => field_add_scalar + procedure, private, pass(this) :: add_scalar => field_add_scalar procedure, pass(this) :: free => field_free + !> Return the size of the field. + procedure, pass(this) :: size => field_size !> Initialise a field generic :: init => init_external_dof, init_internal_dof !> Assignemnt to current field @@ -96,13 +98,13 @@ subroutine field_init_internal_dof(this, msh, space, fld_name) allocate(this%dof) this%dof = dofmap_t(this%msh, this%Xh) this%internal_dofmap = .true. - + if (present(fld_name)) then call this%init_common(fld_name) else call this%init_common() end if - + end subroutine field_init_internal_dof !> Initialize a field @a this on the mesh @a msh using an internal dofmap @@ -122,7 +124,7 @@ subroutine field_init_external_dof(this, dof, fld_name) else call this%init_common() end if - + end subroutine field_init_external_dof !> Initialize a field @a this @@ -134,9 +136,9 @@ subroutine field_init_common(this, fld_name) associate(lx => this%Xh%lx, ly => this%Xh%ly, & lz => this%Xh%lz, nelv => this%msh%nelv) - + if (.not. allocated(this%x)) then - allocate(this%x(lx, ly, lz, nelv), stat = ierr) + allocate(this%x(lx, ly, lz, nelv), stat = ierr) this%x = 0d0 end if @@ -147,17 +149,17 @@ subroutine field_init_common(this, fld_name) end if if (NEKO_BCKND_DEVICE .eq. 1) then - n = lx * ly * lz * nelv + n = lx * ly * lz * nelv call device_map(this%x, this%x_d, n) end if end associate - + end subroutine field_init_common !> Deallocate a field @a f subroutine field_free(this) class(field_t), intent(inout) :: this - + if (allocated(this%x)) then deallocate(this%x) end if @@ -166,7 +168,7 @@ subroutine field_free(this) deallocate(this%dof) this%internal_dofmap = .false. end if - + nullify(this%msh) nullify(this%Xh) nullify(this%dof) @@ -183,38 +185,38 @@ end subroutine field_free subroutine field_assign_field(this, g) class(field_t), intent(inout) :: this type(field_t), intent(in) :: g - + if (allocated(this%x)) then if (this%Xh .ne. g%Xh) then call this%free() end if end if - + this%Xh => g%Xh this%msh => g%msh this%dof => g%dof - - + + this%Xh%lx = g%Xh%lx this%Xh%ly = g%Xh%ly this%Xh%lz = g%Xh%lz - + if (.not. allocated(this%x)) then - + allocate(this%x(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + if (NEKO_BCKND_DEVICE .eq. 1) then - call device_map(this%x, this%x_d, this%dof%size()) + call device_map(this%x, this%x_d, this%size()) end if - + end if if (NEKO_BCKND_DEVICE .eq. 1) then - call device_copy(this%x_d, g%x_d, this%dof%size()) + call device_copy(this%x_d, g%x_d, this%size()) else call copy(this%x, g%x, this%dof%size()) end if - + end subroutine field_assign_field !> Assignment \f$ this = a \f$ @@ -224,7 +226,7 @@ subroutine field_assign_scalar(this, a) integer :: i, j, k, l if (NEKO_BCKND_DEVICE .eq. 1) then - call device_cfill(this%x_d, a, this%dof%size()) + call device_cfill(this%x_d, a, this%size()) else do i = 1, this%msh%nelv do l = 1, this%Xh%lz @@ -236,9 +238,9 @@ subroutine field_assign_scalar(this, a) end do end do end if - + end subroutine field_assign_scalar - + !> Add \f$ this(u_1, u_2, ... , u_n) = !! this(u_1, u_2, ... , u_n) + G(u_1, u_2, ... , u_n) \f$ !! @note Component wise @@ -247,9 +249,9 @@ subroutine field_add_field(this, g) type(field_t), intent(in) :: g if (NEKO_BCKND_DEVICE .eq. 1) then - call device_add2(this%x_d, g%x_d, this%dof%size()) + call device_add2(this%x_d, g%x_d, this%size()) else - call add2(this%x, g%x, this%dof%size()) + call add2(this%x, g%x, this%size()) end if end subroutine field_add_field @@ -262,12 +264,20 @@ subroutine field_add_scalar(this, a) real(kind=rp), intent(in) :: a if (NEKO_BCKND_DEVICE .eq. 1) then - call device_cadd(this%x_d, a, this%dof%size()) + call device_cadd(this%x_d, a, this%size()) else - call cadd(this%x, a, this%dof%size()) + call cadd(this%x, a, this%size()) end if end subroutine field_add_scalar + !> Return the size of the field based on the underlying dofmap. + pure function field_size(this) result(size) + class(field_t), intent(in) :: this + integer :: size + + size = this%dof%size() + end function field_size + end module field diff --git a/src/field/field_list.f90 b/src/field/field_list.f90 index 04de4c2dfc7..36264a8d170 100644 --- a/src/field/field_list.f90 +++ b/src/field/field_list.f90 @@ -13,21 +13,21 @@ module field_list procedure, pass(this) :: free => field_list_free end type field_list_t - contains +contains !> Append a field to the list. - !! @param field The field to append. - subroutine field_list_append(this, field) + !! @param f The field to append. + subroutine field_list_append(this, f) class(field_list_t), intent(inout) :: this - class(field_t), intent(inout), target :: field + class(field_t), intent(in), target :: f type(field_ptr_t), allocatable :: tmp(:) integer :: len - + len = size(this%fields) allocate(tmp(len+1)) tmp(1:len) = this%fields call move_alloc(tmp, this%fields) - this%fields(len+1)%f => field + this%fields(len+1)%f => f end subroutine field_list_append @@ -35,7 +35,7 @@ end subroutine field_list_append subroutine field_list_free(this) class(field_list_t), intent(inout) :: this integer :: i, n_fields - + if (allocated(this%fields)) then n_fields = size(this%fields) do i=1, n_fields @@ -48,5 +48,5 @@ subroutine field_list_free(this) end subroutine field_list_free - + end module field_list diff --git a/src/field/field_registry.f90 b/src/field/field_registry.f90 index a29e9f142c1..feb75086d81 100644 --- a/src/field/field_registry.f90 +++ b/src/field/field_registry.f90 @@ -40,24 +40,33 @@ module field_registry use utils, only: neko_error implicit none private - + type :: field_registry_t - !> list of fields stored - type(field_t), private, allocatable :: fields(:) - !> number of registered fields - integer, private :: n - !> the size the fields array is increased by upon reallocation - integer, private :: expansion_size + !> List of fields stored. + type(field_t), private, allocatable :: fields(:) + !> Number of registered fields. + integer, private :: n + !> The size the fields array is increased by upon reallocation. + integer, private :: expansion_size contains procedure, private, pass(this) :: expand + !> Constructor. procedure, pass(this) :: init => field_registry_init + !> Destructor. procedure, pass(this) :: free => field_registry_free + !> Add a field to the registry. procedure, pass(this) :: add_field + !> Get the number of fields in the registry. procedure, pass(this) :: n_fields + !> Get pointer to a stored field by index. procedure, pass(this) :: get_field_by_index + !> Get pointer to a stored field by name. procedure, pass(this) :: get_field_by_name + !> Get the `expansion_size` procedure, pass(this) :: get_expansion_size + !> Get total allocated size of `fields`. procedure, pass(this) :: get_size + !> Check if a field with a given name is already in the registry. procedure, pass(this) :: field_exists generic :: get_field => get_field_by_index, get_field_by_name end type field_registry_t @@ -66,8 +75,9 @@ module field_registry type(field_registry_t), public, target :: neko_field_registry contains - !> Constructor, optionally taking initial registry and expansion - !> size as argument + !> Constructor + !! @param size The allocation size of `fields` on init. + !! @param expansion_size The number of entries added to `fields` on expansion. subroutine field_registry_init(this, size, expansion_size) class(field_registry_t), intent(inout):: this integer, optional, intent(in) :: size @@ -104,11 +114,10 @@ subroutine field_registry_free(this) this%expansion_size = 0 end subroutine field_registry_free - !> expand the fields array so as to accomodate more fields + !> Expand the fields array so as to accomodate more fields. subroutine expand(this) class(field_registry_t), intent(inout) :: this - type(field_t), allocatable :: temp(:) - integer :: i + type(field_t), allocatable :: temp(:) allocate(temp(this%n + this%expansion_size)) temp(1:this%n) = this%fields(1:this%n) @@ -117,16 +126,24 @@ subroutine expand(this) end subroutine expand - subroutine add_field(this, dof, fld_name) + !> Add a field to the registry. + !! @param dof The map of degrees of freedom. + !! @param fld_name The name of the field. + !! @param ignore_existing If true, will do nothing if the field is already in + !! the registry. If false, will throw an error. Optional, defaults to false. + subroutine add_field(this, dof, fld_name, ignore_existing) class(field_registry_t), intent(inout) :: this type(dofmap_t), target, intent(in) :: dof - character(len=*), target, intent(in) :: fld_name -! type(h_cptr_t) :: key - integer :: i + character(len=*), target, intent(in) :: fld_name + logical, optional, intent(in) :: ignore_existing if (this%field_exists(fld_name)) then - call neko_error("Field with name " // fld_name // & - " is already registered") + if (present(ignore_existing) .and. ignore_existing .eqv. .true.) then + return + else + call neko_error("Field with name " // fld_name // & + " is already registered") + end if end if if (this%n_fields() == size(this%fields)) then @@ -150,10 +167,10 @@ pure function n_fields(this) result(n) class(field_registry_t), intent(in) :: this integer :: n - n = this%n + n = this%n end function n_fields - !> Get the size of the fields array + !> Get the size of the fields array. pure function get_size(this) result(n) class(field_registry_t), intent(in) :: this integer :: n @@ -161,7 +178,7 @@ pure function get_size(this) result(n) n = size(this%fields) end function get_size - !> Get the expansion size + !> Get the expansion size. pure function get_expansion_size(this) result(n) class(field_registry_t), intent(in) :: this integer :: n @@ -169,7 +186,7 @@ pure function get_expansion_size(this) result(n) n = this%expansion_size end function get_expansion_size - !> Get pointer to a stored field by index + !> Get pointer to a stored field by index. function get_field_by_index(this, i) result(f) class(field_registry_t), target, intent(in) :: this integer, intent(in) :: i @@ -184,15 +201,16 @@ function get_field_by_index(this, i) result(f) f => this%fields(i) end function get_field_by_index - !> Get pointer to a stored field by field name + !> Get pointer to a stored field by field name. function get_field_by_name(this, name) result(f) class(field_registry_t), target, intent(in) :: this - character(len=*), intent(in) ::name + character(len=*), intent(in) ::name type(field_t), pointer :: f logical :: found integer :: i found = .false. + do i=1, this%n_fields() if (this%fields(i)%name == name) then f => this%fields(i) @@ -207,10 +225,10 @@ function get_field_by_name(this, name) result(f) end if end function get_field_by_name - !> Check if a field with a given name is already in the registry + !> Check if a field with a given name is already in the registry. function field_exists(this, name) result(found) class(field_registry_t), target, intent(in) :: this - character(len=*), intent(in) ::name + character(len=*), intent(in) ::name logical :: found integer :: i diff --git a/src/field/field_series.f90 b/src/field/field_series.f90 index d11966b5dee..cfa4b85874b 100644 --- a/src/field/field_series.f90 +++ b/src/field/field_series.f90 @@ -36,10 +36,10 @@ module field_series use field implicit none private - + type, public :: field_series_t type(field_t), pointer :: f => null() - type(field_t), allocatable :: lf(:) + type(field_t), allocatable :: lf(:) integer, private :: len = 0 contains procedure, pass(this) :: init => field_series_init @@ -57,6 +57,7 @@ subroutine field_series_init(this, f, len) type(field_t), intent(inout), target :: f integer :: len character(len=80) :: name + character(len=5) :: id_str integer :: i call this%free() @@ -67,7 +68,8 @@ subroutine field_series_init(this, f, len) allocate(this%lf(len)) do i = 1, this%len - name = trim(f%name)//'_lag'//char(i) + write(id_str, '(I0)') i + name = trim(f%name)//'_lag'//id_str call this%lf(i)%init(this%f%dof, name) end do @@ -85,7 +87,7 @@ subroutine field_series_free(this) do i = 1, this%len call this%lf(i)%free() end do - + end subroutine field_series_free !> Return the size of the field series @@ -105,7 +107,7 @@ subroutine field_series_update(this) end do this%lf(1) = this%f - + end subroutine field_series_update !> Set all fields in a series to @a g @@ -117,7 +119,7 @@ subroutine field_series_set(this, g) do i = 1, this%len this%lf(i) = g end do - + end subroutine field_series_set - + end module field_series diff --git a/src/field/mean_field.f90 b/src/field/mean_field.f90 index c85c20c98fa..0072177eb43 100644 --- a/src/field/mean_field.f90 +++ b/src/field/mean_field.f90 @@ -41,7 +41,7 @@ module mean_field use device_math, only : device_cmult, device_add2s2 implicit none private - + type, public, extends(stats_quant_t) :: mean_field_t type(field_t), pointer :: f => null() type(field_t) :: mf @@ -61,15 +61,15 @@ subroutine mean_field_init(this, f, field_name) type(field_t), intent(inout), target :: f character(len=*), optional, intent(in) :: field_name character(len=80) :: name - - + + call this%free() this%f => f this%time = 0.0_rp if (present(field_name)) then name = field_name - else + else write(name, '(A,A)') 'mean_',trim(f%name) end if @@ -91,9 +91,9 @@ end subroutine mean_field_free !> Resets a mean field subroutine mean_field_reset(this) class(mean_field_t), intent(inout) :: this - + this%time = 0.0 - this%mf = 0.0_rp + this%mf = 0.0_rp end subroutine mean_field_reset @@ -113,8 +113,8 @@ subroutine mean_field_update(this, k) this%time = this%time + k this%mf%x = this%mf%x / this%time end if - + end subroutine mean_field_update - + end module mean_field diff --git a/src/field/mean_sqr_field.f90 b/src/field/mean_sqr_field.f90 index c76f339d749..6c1b210c1b0 100644 --- a/src/field/mean_sqr_field.f90 +++ b/src/field/mean_sqr_field.f90 @@ -46,7 +46,7 @@ module mean_sqr_field procedure, pass(this) :: update => mean_sqr_field_update end type mean_sqr_field_t -contains +contains !> Update a mean sqr field subroutine mean_sqr_field_update(this, k) diff --git a/src/field/mesh_field.f90 b/src/field/mesh_field.f90 index 19a757041a1..188b4ee7766 100644 --- a/src/field/mesh_field.f90 +++ b/src/field/mesh_field.f90 @@ -32,11 +32,11 @@ ! !> Defines a mesh field !! @details A mesh field is a scalar integer cell based field (\f$ dQ_0 \f$) -module mesh_field +module mesh_field use mesh, only : mesh_t implicit none private - + !> @todo Add support for different data types type, public :: mesh_fld_t integer, allocatable :: data(:) !< Data @@ -47,11 +47,11 @@ module mesh_field public :: mesh_field_init, mesh_field_free contains - + subroutine mesh_field_init(fld, msh, fld_name) type(mesh_fld_t), intent(inout) :: fld type(mesh_t), target, intent(in) :: msh - character(len=*), optional :: fld_name + character(len=*), optional :: fld_name call mesh_field_free(fld) diff --git a/src/field/scratch_registry.f90 b/src/field/scratch_registry.f90 index 4206772a437..7e7b14bf7b5 100644 --- a/src/field/scratch_registry.f90 +++ b/src/field/scratch_registry.f90 @@ -35,48 +35,46 @@ !! often and you don't want to create temporary fields (work arrays) inside !! it on each call. module scratch_registry - use num_types, only : rp use field, only : field_t, field_ptr_t use dofmap, only : dofmap_t - use utils, only : neko_error implicit none private - - + + type, public :: scratch_registry_t - !> list of scratch fields - type(field_ptr_t), private, allocatable :: fields(:) + !> list of scratch fields + type(field_ptr_t), private, allocatable :: fields(:) !> Tracks which fields are used - logical, private, allocatable :: inuse(:) + logical, private, allocatable :: inuse(:) !> number of registered fields - integer, private :: nfields + integer, private :: nfields !> number of fields in use - integer, private :: nfields_inuse + integer, private :: nfields_inuse !> the size the fields array is increased by upon reallocation - integer, private :: expansion_size + integer, private :: expansion_size !> Dofmap - type(dofmap_t), pointer :: dof + type(dofmap_t), pointer :: dof contains procedure, private, pass(this) :: expand !> destructor - procedure, pass(this) :: free => scratch_registry_free - !> getter for nfields - procedure, pass(this) :: get_nfields - !> getter for nfields_inuse - procedure, pass(this) :: get_nfields_inuse + procedure, pass(this) :: free => scratch_registry_free + !> getter for nfields + procedure, pass(this) :: get_nfields + !> getter for nfields_inuse + procedure, pass(this) :: get_nfields_inuse !> getter for expansion_size - procedure, pass(this) :: get_expansion_size + procedure, pass(this) :: get_expansion_size !> return size of allocated fields - procedure, pass(this) :: get_size + procedure, pass(this) :: get_size !> get value of inuse for a given index - procedure, pass(this) :: get_inuse + procedure, pass(this) :: get_inuse !> get a new scratch field - procedure, pass(this) :: request_field + procedure, pass(this) :: request_field procedure, pass(this) :: relinquish_field_single procedure, pass(this) :: relinquish_field_multiple !> free a field for later reuse generic :: relinquish_field => relinquish_field_single, & - relinquish_field_multiple + relinquish_field_multiple end type scratch_registry_t interface scratch_registry_t @@ -87,7 +85,7 @@ module scratch_registry type(scratch_registry_t), public, target :: neko_scratch_registry contains - + !> Constructor, optionally taking initial registry and expansion !! size as argument type(scratch_registry_t) function init(dof, size, expansion_size) result(this) @@ -95,13 +93,13 @@ type(scratch_registry_t) function init(dof, size, expansion_size) result(this) integer, optional, intent(in) :: size integer, optional, intent(in) :: expansion_size integer :: i - + this%dof => dof if (present(size)) then allocate (this%fields(size)) do i= 1, size - allocate(this%fields(i)%f) + allocate(this%fields(i)%f) end do allocate (this%inuse(size)) else @@ -130,13 +128,13 @@ subroutine scratch_registry_free(this) call this%fields(i)%f%free() deallocate(this%fields(i)%f) end do - + deallocate(this%fields) deallocate(this%inuse) end if nullify(this%dof) - + end subroutine scratch_registry_free @@ -145,7 +143,7 @@ pure function get_nfields(this) result(n) class(scratch_registry_t), intent(in) :: this integer :: n - n = this%nfields + n = this%nfields end function get_nfields pure function get_nfields_inuse(this) result(n) @@ -154,7 +152,7 @@ pure function get_nfields_inuse(this) result(n) n = 0 do i=1,this%get_size() - if (this%inuse(i)) n = n + 1 + if (this%inuse(i)) n = n + 1 end do end function get_nfields_inuse @@ -176,15 +174,15 @@ end function get_expansion_size subroutine expand(this) class(scratch_registry_t), intent(inout) :: this - type(field_ptr_t), allocatable :: temp(:) - logical, allocatable :: temp2(:) + type(field_ptr_t), allocatable :: temp(:) + logical, allocatable :: temp2(:) integer :: i allocate(temp(this%get_size() + this%expansion_size)) temp(1:this%nfields) = this%fields(1:this%nfields) - + do i=this%nfields +1, size(temp) - allocate(temp(i)%f) + allocate(temp(i)%f) enddo call move_alloc(temp, this%fields) @@ -203,13 +201,13 @@ subroutine request_field(this, f, index) integer, intent(inout) :: index !< The index of the field in the inuse array character(len=10) :: name - - associate(nfields => this%nfields, nfields_inuse => this%nfields_inuse) + + associate(nfields => this%nfields, nfields_inuse => this%nfields_inuse) do index=1,this%get_size() if (this%inuse(index) .eqv. .false.) then write (name, "(A3,I0.3)") "wrk", index - + if (.not. allocated(this%fields(index)%f%x)) then call this%fields(index)%f%init(this%dof, trim(name)) nfields = nfields + 1 @@ -232,12 +230,12 @@ subroutine request_field(this, f, index) end associate end subroutine request_field - + !> Relinquish the use of a field in the registry subroutine relinquish_field_single(this, index) class(scratch_registry_t), target, intent(inout) :: this integer, intent(inout) :: index !< The index of the field to free - + this%inuse(index) = .false. this%nfields_inuse = this%nfields_inuse - 1 end subroutine relinquish_field_single @@ -248,15 +246,15 @@ subroutine relinquish_field_multiple(this, indices) integer :: i do i=1, size(indices) - this%inuse(indices(i)) = .false. + this%inuse(indices(i)) = .false. end do - this%nfields_inuse = this%nfields_inuse - size(indices) + this%nfields_inuse = this%nfields_inuse - size(indices) end subroutine relinquish_field_multiple logical function get_inuse(this, index) class(scratch_registry_t), target, intent(inout) :: this - integer, intent(inout) :: index !< The index of the field to check - + integer, intent(inout) :: index !< The index of the field to check + get_inuse = this%inuse(index) end function get_inuse diff --git a/src/fluid/advection.f90 b/src/fluid/advection.f90 index 2226f09e2ff..07c6c69577f 100644 --- a/src/fluid/advection.f90 +++ b/src/fluid/advection.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021-2023, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -30,14 +30,14 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Subroutines to add advection terms to the RHS of a transport equation. +!> Subroutines to add advection terms to the RHS of a transport equation. module advection - use num_types + use num_types, only : rp use math use utils - use space - use field - use coefs + use space, only : space_t, GL + use field, only : field_t + use coefs, only : coef_t use device_math use neko_config use operators @@ -48,12 +48,13 @@ module advection c_associated implicit none private - + !> Base abstract type for computing the advection operator type, public, abstract :: advection_t contains procedure(compute_adv), pass(this), deferred :: compute procedure(compute_scalar_adv), pass(this), deferred :: compute_scalar + procedure(advection_free), pass(this), deferred :: free end type advection_t !> Type encapsulating advection routines with no dealiasing applied @@ -61,6 +62,10 @@ module advection real(kind=rp), allocatable :: temp(:) type(c_ptr) :: temp_d = C_NULL_PTR contains + !> Constructor + procedure, pass(this) :: init => init_no_dealias + !> Destructor + procedure, pass(this) :: free => free_no_dealias !> Add the advection term for the fluid, i.e. \f$u \cdot \nabla u \f$, to !! the RHS procedure, pass(this) :: compute => compute_advection_no_dealias @@ -112,6 +117,8 @@ module advection procedure, pass(this) :: compute_scalar => compute_scalar_advection_dealias !> Constructor procedure, pass(this) :: init => init_dealias + !> Destructor + procedure, pass(this) :: free => free_dealias end type adv_dealias_t abstract interface @@ -143,7 +150,7 @@ end subroutine compute_adv abstract interface !> Add advection operator to the right-hand-side for a scalar. - !! @param this The object. + !! @param this The object. !! @param vx The x component of velocity. !! @param vy The y component of velocity. !! @param vz The z component of velocity. @@ -168,63 +175,21 @@ subroutine compute_scalar_adv(this, vx, vy, vz, s, fs, Xh, coef, n) end subroutine compute_scalar_adv end interface - public :: advection_factory + abstract interface + !> Destructor + subroutine advection_free(this) + import :: advection_t + class(advection_t), intent(inout) :: this + end subroutine advection_free + end interface contains - - !> A factory for \ref advection_t decendants. - !! @param this Polymorphic object of class \ref advection_t. - !! @param coeff The coefficients of the (space, mesh) pair. - !! @param delias Whether to dealias or not. - !! @param lxd The polynomial order of the space used in the dealiasing. - !! Defaults to 3/2 of `coeff%Xh%lx`. - !! @note The factory both allocates and initializes `this`. - subroutine advection_factory(this, coef, dealias, lxd) - implicit none - class(advection_t), allocatable, intent(inout) :: this - type(coef_t), target :: coef - logical, intent(in) :: dealias - integer, intent(in) :: lxd - - ! Free allocatables if necessary - if (allocated(this)) then - select type(adv => this) - type is(adv_no_dealias_t) - if (allocated(adv%temp)) then - deallocate(adv%temp) - end if - if (c_associated(adv%temp_d)) then - call device_free(adv%temp_d) - end if - end select - deallocate(this) - end if - - if (dealias) then - allocate(adv_dealias_t::this) - else - allocate(adv_no_dealias_t::this) - end if - - select type(adv => this) - type is(adv_dealias_t) - if (lxd .gt. 0) then - call init_dealias(adv, lxd, coef) - else - call init_dealias(adv, coef%Xh%lx * 3/2, coef) - end if - type is(adv_no_dealias_t) - call init_no_dealias(adv, coef) - end select - - end subroutine advection_factory !> Constructor - !! @param coeff The coefficients of the (space, mesh) pair. + !! @param coef The coefficients of the (space, mesh) pair. subroutine init_no_dealias(this, coef) - implicit none - class(adv_no_dealias_t) :: this - type(coef_t) :: coef + class(adv_no_dealias_t), intent(inout) :: this + type(coef_t), intent(in) :: coef allocate(this%temp(coef%dof%size())) @@ -234,11 +199,22 @@ subroutine init_no_dealias(this, coef) end subroutine init_no_dealias + !> Destructor + subroutine free_no_dealias(this) + class(adv_no_dealias_t), intent(inout) :: this + + if (allocated(this%temp)) then + deallocate(this%temp) + end if + if (c_associated(this%temp_d)) then + call device_free(this%temp_d) + end if + end subroutine free_no_dealias + !> Constructor !! @param lxd The polynomial order of the space used in the dealiasing. - !! @param coeff The coefficients of the (space, mesh) pair. + !! @param coef The coefficients of the (space, mesh) pair. subroutine init_dealias(this, lxd, coef) - implicit none class(adv_dealias_t), target, intent(inout) :: this integer, intent(in) :: lxd type(coef_t), intent(inout), target :: coef @@ -275,7 +251,7 @@ subroutine init_dealias(this, lxd, coef) allocate(this%vs(n_GL)) allocate(this%vt(n_GL)) end if - + if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%temp, this%temp_d, n_GL) call device_map(this%tbf, this%tbf_d, n_GL) @@ -288,7 +264,13 @@ subroutine init_dealias(this, lxd, coef) end if end subroutine init_dealias - + + !> Destructor + subroutine free_dealias(this) + class(adv_dealias_t), intent(inout) :: this + end subroutine free_dealias + + !> Add the advection term for the fluid, i.e. \f$u \cdot \nabla u \f$, to !! the RHS. !! @param vx The x component of velocity. @@ -301,7 +283,6 @@ end subroutine init_dealias !! @param coef The coefficients of the (Xh, mesh) pair. !! @param n Typically the size of the mesh. subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n) - implicit none class(adv_dealias_t), intent(inout) :: this type(space_t), intent(inout) :: Xh type(coef_t), intent(inout) :: coef @@ -309,7 +290,7 @@ subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n) integer, intent(in) :: n real(kind=rp), intent(inout), dimension(n) :: fx, fy, fz real(kind=rp), dimension(this%Xh_GL%lxyz) :: tx, ty, tz - real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz + real(kind=rp), dimension(this%Xh_GL%lxyz) :: tfx, tfy, tfz real(kind=rp), dimension(this%Xh_GL%lxyz) :: vr, vs, vt real(kind=rp), dimension(this%Xh_GLL%lxyz) :: tempx, tempy, tempz type(c_ptr) :: fx_d, fy_d, fz_d @@ -318,90 +299,90 @@ subroutine compute_advection_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n) n_GL = nel * this%Xh_GL%lxyz !This is extremely primitive and unoptimized on the device //Karp associate(c_GL => this%coef_GL) - if (NEKO_BCKND_DEVICE .eq. 1) then - fx_d = device_get_ptr(fx) - fy_d = device_get_ptr(fy) - fz_d = device_get_ptr(fz) - call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) - - call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL) - call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & + if (NEKO_BCKND_DEVICE .eq. 1) then + fx_d = device_get_ptr(fx) + fy_d = device_get_ptr(fy) + fz_d = device_get_ptr(fz) + call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) + + call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL) + call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & this%tx_d, this%ty_d, this%tz_d, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call device_sub2(fx_d, this%temp_d, n) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call device_sub2(fx_d, this%temp_d, n) - call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL) - call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & + call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL) + call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & this%tx_d, this%ty_d, this%tz_d, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call device_sub2(fy_d, this%temp_d, n) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call device_sub2(fy_d, this%temp_d, n) - call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL) - call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & + call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL) + call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & this%tx_d, this%ty_d, this%tz_d, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call device_sub2(fz_d, this%temp_d, n) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call device_sub2(fz_d, this%temp_d, n) - else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then + else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then - call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) - call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL) - call vdot3(this%tbf, this%vr, this%vs, this%vt, & + call opgrad(this%vr, this%vs, this%vt, this%tx, c_GL) + call vdot3(this%tbf, this%vr, this%vs, this%vt, & this%tx, this%ty, this%tz, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call sub2(fx, this%temp, n) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call sub2(fx, this%temp, n) - call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL) - call vdot3(this%tbf, this%vr, this%vs, this%vt, & + call opgrad(this%vr, this%vs, this%vt, this%ty, c_GL) + call vdot3(this%tbf, this%vr, this%vs, this%vt, & this%tx, this%ty, this%tz, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call sub2(fy, this%temp, n) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call sub2(fy, this%temp, n) - call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL) - call vdot3(this%tbf, this%vr, this%vs, this%vt, & + call opgrad(this%vr, this%vs, this%vt, this%tz, c_GL) + call vdot3(this%tbf, this%vr, this%vs, this%vt, & this%tx, this%ty, this%tz, n_GL) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - call sub2(fz, this%temp, n) - - else - - do e = 1, coef%msh%nelv - call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL) - call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL) - call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL) - - call opgrad(vr, vs, vt, tx, c_GL, e, e) - do i = 1, this%Xh_GL%lxyz - tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) - end do - - call opgrad(vr, vs, vt, ty, c_GL, e, e) - do i = 1, this%Xh_GL%lxyz - tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) - end do - - call opgrad(vr, vs, vt, tz, c_GL, e, e) - do i = 1, this%Xh_GL%lxyz - tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) - end do - - call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL) - call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL) - call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL) - - idx = (e-1)*this%Xh_GLL%lxyz+1 - call sub2(fx(idx), tempx, this%Xh_GLL%lxyz) - call sub2(fy(idx), tempy, this%Xh_GLL%lxyz) - call sub2(fz(idx), tempz, this%Xh_GLL%lxyz) - end do - end if + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + call sub2(fz, this%temp, n) + + else + + do e = 1, coef%msh%nelv + call this%GLL_to_GL%map(tx, vx%x(1,1,1,e), 1, this%Xh_GL) + call this%GLL_to_GL%map(ty, vy%x(1,1,1,e), 1, this%Xh_GL) + call this%GLL_to_GL%map(tz, vz%x(1,1,1,e), 1, this%Xh_GL) + + call opgrad(vr, vs, vt, tx, c_GL, e, e) + do i = 1, this%Xh_GL%lxyz + tfx(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) + end do + + call opgrad(vr, vs, vt, ty, c_GL, e, e) + do i = 1, this%Xh_GL%lxyz + tfy(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) + end do + + call opgrad(vr, vs, vt, tz, c_GL, e, e) + do i = 1, this%Xh_GL%lxyz + tfz(i) = tx(i)*vr(i) + ty(i)*vs(i) + tz(i)*vt(i) + end do + + call this%GLL_to_GL%map(tempx, tfx, 1, this%Xh_GLL) + call this%GLL_to_GL%map(tempy, tfy, 1, this%Xh_GLL) + call this%GLL_to_GL%map(tempz, tfz, 1, this%Xh_GLL) + + idx = (e-1)*this%Xh_GLL%lxyz+1 + call sub2(fx(idx), tempx, this%Xh_GLL%lxyz) + call sub2(fy(idx), tempy, this%Xh_GLL%lxyz) + call sub2(fz(idx), tempz, this%Xh_GLL%lxyz) + end do + end if end associate end subroutine compute_advection_dealias @@ -418,7 +399,6 @@ end subroutine compute_advection_dealias !! @param coef The coefficients of the (Xh, mesh) pair. !! @param n Typically the size of the mesh. subroutine compute_advection_no_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, n) - implicit none class(adv_no_dealias_t), intent(inout) :: this type(space_t), intent(inout) :: Xh type(coef_t), intent(inout) :: coef @@ -431,7 +411,7 @@ subroutine compute_advection_no_dealias(this, vx, vy, vz, fx, fy, fz, Xh, coef, fx_d = device_get_ptr(fx) fy_d = device_get_ptr(fy) fz_d = device_get_ptr(fz) - + call conv1(this%temp, vx%x, vx%x, vy%x, vz%x, Xh, coef) call device_subcol3 (fx_d, coef%B_d, this%temp_d, n) call conv1(this%temp, vy%x, vx%x, vy%x, vz%x, Xh, coef) @@ -459,7 +439,7 @@ end subroutine compute_advection_no_dealias !> Add the advection term for a scalar, i.e. \f$u \cdot \nabla s \f$, to the !! RHS. - !! @param this The object. + !! @param this The object. !! @param vx The x component of velocity. !! @param vy The y component of velocity. !! @param vz The z component of velocity. @@ -470,7 +450,6 @@ end subroutine compute_advection_no_dealias !! @param n Typically the size of the mesh. subroutine compute_scalar_advection_no_dealias(this, vx, vy, vz, s, fs, Xh, & coef, n) - implicit none class(adv_no_dealias_t), intent(inout) :: this type(field_t), intent(inout) :: vx, vy, vz type(field_t), intent(inout) :: s @@ -482,7 +461,7 @@ subroutine compute_scalar_advection_no_dealias(this, vx, vy, vz, s, fs, Xh, & if (NEKO_BCKND_DEVICE .eq. 1) then fs_d = device_get_ptr(fs) - + call conv1(this%temp, s%x, vx%x, vy%x, vz%x, Xh, coef) call device_subcol3 (fs_d, coef%B_d, this%temp_d, n) if (coef%Xh%lz .eq. 1) then @@ -503,7 +482,7 @@ end subroutine compute_scalar_advection_no_dealias !> Add the advection term for a scalar, i.e. \f$u \cdot \nabla s \f$, to the !! RHS. - !! @param this The object. + !! @param this The object. !! @param vx The x component of velocity. !! @param vy The y component of velocity. !! @param vz The z component of velocity. @@ -514,7 +493,6 @@ end subroutine compute_scalar_advection_no_dealias !! @param n Typically the size of the mesh. subroutine compute_scalar_advection_dealias(this, vx, vy, vz, s, fs, Xh, & coef, n) - implicit none class(adv_dealias_t), intent(inout) :: this type(field_t), intent(inout) :: vx, vy, vz type(field_t), intent(inout) :: s @@ -533,80 +511,80 @@ subroutine compute_scalar_advection_dealias(this, vx, vy, vz, s, fs, Xh, & n_GL = nel * this%Xh_GL%lxyz associate(c_GL => this%coef_GL) - if (NEKO_BCKND_DEVICE .eq. 1) then - fs_d = device_get_ptr(fs) + if (NEKO_BCKND_DEVICE .eq. 1) then + fs_d = device_get_ptr(fs) + + ! Map advecting velocity onto the higher-order space + call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) - ! Map advecting velocity onto the higher-order space - call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) - - ! Map the scalar onto the high-order space - call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL) - - ! Compute the scalar gradient in the high-order space - call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL) - - ! Compute the convective term, i.e dot the velocity with the scalar grad - call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & + ! Map the scalar onto the high-order space + call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL) + + ! Compute the scalar gradient in the high-order space + call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL) + + ! Compute the convective term, i.e dot the velocity with the scalar grad + call device_vdot3(this%tbf_d, this%vr_d, this%vs_d, this%vt_d, & this%tx_d, this%ty_d, this%tz_d, n_GL) - ! Map back to the original space (we reuse this%temp) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - - ! Update the source term - call device_sub2(fs_d, this%temp_d, n) - - else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then - - ! Map advecting velocity onto the higher-order space - call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) - call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) - - ! Map the scalar onto the high-order space - call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL) - - ! Compute the scalar gradient in the high-order space - call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL) - - ! Compute the convective term, i.e dot the velocity with the scalar grad - call vdot3(this%tbf, this%vr, this%vs, this%vt, & + ! Map back to the original space (we reuse this%temp) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) + + ! Update the source term + call device_sub2(fs_d, this%temp_d, n) + + else if ((NEKO_BCKND_SX .eq. 1) .or. (NEKO_BCKND_XSMM .eq. 1)) then + + ! Map advecting velocity onto the higher-order space + call this%GLL_to_GL%map(this%tx, vx%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%ty, vy%x, nel, this%Xh_GL) + call this%GLL_to_GL%map(this%tz, vz%x, nel, this%Xh_GL) + + ! Map the scalar onto the high-order space + call this%GLL_to_GL%map(this%temp, s%x, nel, this%Xh_GL) + + ! Compute the scalar gradient in the high-order space + call opgrad(this%vr, this%vs, this%vt, this%temp, c_GL) + + ! Compute the convective term, i.e dot the velocity with the scalar grad + call vdot3(this%tbf, this%vr, this%vs, this%vt, & this%tx, this%ty, this%tz, n_GL) - ! Map back to the original space (we reuse this%temp) - call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - - ! Update the source term - call sub2(fs, this%temp, n) + ! Map back to the original space (we reuse this%temp) + call this%GLL_to_GL%map(this%temp, this%tbf, nel, this%Xh_GLL) - else - do e = 1, coef%msh%nelv - ! Map advecting velocity onto the higher-order space - call this%GLL_to_GL%map(vx_GL, vx%x(1,1,1,e), 1, this%Xh_GL) - call this%GLL_to_GL%map(vy_GL, vy%x(1,1,1,e), 1, this%Xh_GL) - call this%GLL_to_GL%map(vz_GL, vz%x(1,1,1,e), 1, this%Xh_GL) - - ! Map scalar onto the higher-order space - call this%GLL_to_GL%map(s_GL, s%x(1,1,1,e), 1, this%Xh_GL) - - ! Gradient of s in the higher-order space - call opgrad(dsdx, dsdy, dsdz, s_GL, c_GL, e, e) - - ! vx * ds/dx + vy * ds/dy + vz * ds/dz for each point in the element - do i = 1, this%Xh_GL%lxyz - f_GL(i) = vx_GL(i)*dsdx(i) + vy_GL(i)*dsdy(i) + vz_GL(i)*dsdz(i) - end do - - ! Map back the contructed operator to the original space - call this%GLL_to_GL%map(temp, f_GL, 1, this%Xh_GLL) - - idx = (e-1)*this%Xh_GLL%lxyz + 1 - - call sub2(fs(idx), temp, this%Xh_GLL%lxyz) - end do - end if - end associate + ! Update the source term + call sub2(fs, this%temp, n) + + else + do e = 1, coef%msh%nelv + ! Map advecting velocity onto the higher-order space + call this%GLL_to_GL%map(vx_GL, vx%x(1,1,1,e), 1, this%Xh_GL) + call this%GLL_to_GL%map(vy_GL, vy%x(1,1,1,e), 1, this%Xh_GL) + call this%GLL_to_GL%map(vz_GL, vz%x(1,1,1,e), 1, this%Xh_GL) + + ! Map scalar onto the higher-order space + call this%GLL_to_GL%map(s_GL, s%x(1,1,1,e), 1, this%Xh_GL) + + ! Gradient of s in the higher-order space + call opgrad(dsdx, dsdy, dsdz, s_GL, c_GL, e, e) + + ! vx * ds/dx + vy * ds/dy + vz * ds/dz for each point in the element + do i = 1, this%Xh_GL%lxyz + f_GL(i) = vx_GL(i)*dsdx(i) + vy_GL(i)*dsdy(i) + vz_GL(i)*dsdz(i) + end do + + ! Map back the contructed operator to the original space + call this%GLL_to_GL%map(temp, f_GL, 1, this%Xh_GLL) + + idx = (e-1)*this%Xh_GLL%lxyz + 1 + + call sub2(fs(idx), temp, this%Xh_GLL%lxyz) + end do + end if + end associate end subroutine compute_scalar_advection_dealias diff --git a/src/fluid/advection_fctry.f90 b/src/fluid/advection_fctry.f90 new file mode 100644 index 00000000000..7f84dad793e --- /dev/null +++ b/src/fluid/advection_fctry.f90 @@ -0,0 +1,96 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Contains the factory routine for `advection_t` children. +module advection_fctry + use num_types, only : rp + use advection, only : advection_t, adv_no_dealias_t, adv_dealias_t + use coefs, only : coef_t + use device, only : device_free + use, intrinsic :: iso_c_binding, only : c_associated + use json_utils, only : json_get + use json_module, only : json_file + implicit none + private + + public :: advection_factory + +contains + + !> A factory for \ref advection_t decendants. + !! @param this Polymorphic object of class \ref advection_t. + !! @param json The parameter file. + !! @param coef The coefficients of the (space, mesh) pair. + !! @note The factory both allocates and initializes `this`. + subroutine advection_factory(this, json, coef) + implicit none + class(advection_t), allocatable, intent(inout) :: this + type(json_file), intent(inout) :: json + type(coef_t), target :: coef + logical :: dealias, found + integer :: lxd, order + + call json_get(json, 'case.numerics.dealias', dealias) + call json%get('case.numerics.dealiased_polynomial_order', lxd, found) + if (.not. found) then + call json_get(json, 'case.numerics.polynomial_order', order) + ! Note, assumes odd polynomial order + lxd = 3.0_rp / 2.0_rp * (order + 1) + end if + + ! Free allocatables if necessary + if (allocated(this)) then + call this%free + deallocate(this) + end if + + if (dealias) then + allocate(adv_dealias_t::this) + else + allocate(adv_no_dealias_t::this) + end if + + select type(adv => this) + type is(adv_dealias_t) + if (lxd .gt. 0) then + call adv%init(lxd, coef) + else + call adv%init(coef%Xh%lx * 3/2, coef) + end if + type is(adv_no_dealias_t) + call adv%init(coef) + end select + + end subroutine advection_factory + + +end module advection_fctry diff --git a/src/fluid/bcknd/cpu/pnpn_res_cpu.f90 b/src/fluid/bcknd/cpu/pnpn_res_cpu.f90 index dfe1fb6e3ea..87e2f8d5697 100644 --- a/src/fluid/bcknd/cpu/pnpn_res_cpu.f90 +++ b/src/fluid/bcknd/cpu/pnpn_res_cpu.f90 @@ -13,7 +13,7 @@ module pnpn_res_cpu use space, only : space_t implicit none private - + type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_cpu_t contains procedure, nopass :: compute => pnpn_prs_res_cpu_compute @@ -36,7 +36,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & type(gs_t), intent(inout) :: gs_Xh type(facet_normal_t), intent(inout) :: bc_prs_surface type(facet_normal_t), intent(inout) :: bc_sym_surface - class(Ax_t), intent(inout) :: Ax + class(ax_t), intent(inout) :: Ax real(kind=rp), intent(inout) :: bd real(kind=rp), intent(in) :: dt real(kind=rp), intent(in) :: mu @@ -63,11 +63,11 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & c_Xh%h2(i,1,1,1) = 0.0_rp end do c_Xh%ifh2 = .false. - + call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh) call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh) - do i = 1, n + do concurrent (i = 1:n) ta1%x(i,1,1,1) = f_x%x(i,1,1,1) / rho & - ((wa1%x(i,1,1,1) * (mu / rho)) * c_Xh%B(i,1,1,1)) ta2%x(i,1,1,1) = f_y%x(i,1,1,1) / rho & @@ -75,12 +75,12 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & ta3%x(i,1,1,1) = f_z%x(i,1,1,1) / rho & - ((wa3%x(i,1,1,1) * (mu / rho)) * c_Xh%B(i,1,1,1)) end do - - call gs_Xh%op(ta1, GS_OP_ADD) - call gs_Xh%op(ta2, GS_OP_ADD) - call gs_Xh%op(ta3, GS_OP_ADD) - do i = 1, n + call gs_Xh%op(ta1, GS_OP_ADD) + call gs_Xh%op(ta2, GS_OP_ADD) + call gs_Xh%op(ta3, GS_OP_ADD) + + do concurrent (i = 1:n) ta1%x(i,1,1,1) = ta1%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) ta2%x(i,1,1,1) = ta2%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) ta3%x(i,1,1,1) = ta3%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) @@ -92,7 +92,7 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh) - do i = 1, n + do concurrent (i = 1:n) p_res%x(i,1,1,1) = (-p_res%x(i,1,1,1)) & + wa1%x(i,1,1,1) + wa2%x(i,1,1,1) + wa3%x(i,1,1,1) end do @@ -100,29 +100,29 @@ subroutine pnpn_prs_res_cpu_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & ! ! Surface velocity terms ! - do i = 1, n + do concurrent (i = 1:n) wa1%x(i,1,1,1) = 0.0_rp wa2%x(i,1,1,1) = 0.0_rp wa3%x(i,1,1,1) = 0.0_rp end do - + call bc_sym_surface%apply_surfvec(wa1%x,wa2%x,wa3%x,ta1%x, ta2%x, ta3%x, n) dtbd = bd / dt - do i = 1, n + do concurrent (i = 1:n) ta1%x(i,1,1,1) = 0.0_rp ta2%x(i,1,1,1) = 0.0_rp ta3%x(i,1,1,1) = 0.0_rp end do - + call bc_prs_surface%apply_surfvec(ta1%x, ta2%x, ta3%x, u%x, v%x, w%x, n) - do i = 1, n + do concurrent (i = 1:n) p_res%x(i,1,1,1) = p_res%x(i,1,1,1) & - (dtbd * (ta1%x(i,1,1,1) + ta2%x(i,1,1,1) + ta3%x(i,1,1,1)))& - (wa1%x(i,1,1,1) + wa2%x(i,1,1,1) + wa3%x(i,1,1,1)) end do - + call neko_scratch_registry%relinquish_field(temp_indices) end subroutine pnpn_prs_res_cpu_compute @@ -131,7 +131,7 @@ subroutine pnpn_vel_res_cpu_compute(Ax, u, v, w, u_res, v_res, w_res, & p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: p, u, v, w type(field_t), intent(inout) :: u_res, v_res, w_res type(field_t), intent(inout) :: f_x, f_y, f_z @@ -161,14 +161,14 @@ subroutine pnpn_vel_res_cpu_compute(Ax, u, v, w, u_res, v_res, w_res, & call opgrad(ta1%x, ta2%x, ta3%x, p%x, c_Xh) - do i = 1, n + do concurrent (i = 1:n) u_res%x(i,1,1,1) = (-u_res%x(i,1,1,1)) - ta1%x(i,1,1,1) + f_x%x(i,1,1,1) v_res%x(i,1,1,1) = (-v_res%x(i,1,1,1)) - ta2%x(i,1,1,1) + f_y%x(i,1,1,1) w_res%x(i,1,1,1) = (-w_res%x(i,1,1,1)) - ta3%x(i,1,1,1) + f_z%x(i,1,1,1) end do - + call neko_scratch_registry%relinquish_field(temp_indices) - - end subroutine pnpn_vel_res_cpu_compute + + end subroutine pnpn_vel_res_cpu_compute end module pnpn_res_cpu diff --git a/src/fluid/bcknd/device/cuda/prs_res_kernel.h b/src/fluid/bcknd/device/cuda/prs_res_kernel.h index 17615bdbc54..894bc0d7d0b 100644 --- a/src/fluid/bcknd/device/cuda/prs_res_kernel.h +++ b/src/fluid/bcknd/device/cuda/prs_res_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __FLUID_PRS_RES_KERNEL__ +#define __FLUID_PRS_RES_KERNEL__ + template< typename T > __global__ void prs_res_part1_kernel(T * __restrict__ ta1, T * __restrict__ ta2, @@ -92,3 +95,5 @@ __global__ void prs_res_part3_kernel(T * __restrict__ p_res, p_res[i] = p_res[i] - (dtbd * (ta1[i] + ta2[i] + ta3[i])); } } + +#endif // __FLUID_PRS_RES_KERNEL__ \ No newline at end of file diff --git a/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h b/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h index 037d43f8931..0cbe3b13d76 100644 --- a/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h +++ b/src/fluid/bcknd/device/cuda/vel_res_update_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __FLUID_VEL_RES_UPDATE_KERNEL__ +#define __FLUID_VEL_RES_UPDATE_KERNEL__ + template< typename T > __global__ void vel_res_update_kernel(T * __restrict__ u_res, T * __restrict__ v_res, @@ -55,3 +58,4 @@ __global__ void vel_res_update_kernel(T * __restrict__ u_res, } +#endif // __FLUID_VEL_RES_UPDATE_KERNEL__ \ No newline at end of file diff --git a/src/fluid/bcknd/device/fluid_abbdf_device.F90 b/src/fluid/bcknd/device/fluid_abbdf_device.F90 index 4cfe776dc84..79d6cf9b759 100644 --- a/src/fluid/bcknd/device/fluid_abbdf_device.F90 +++ b/src/fluid/bcknd/device/fluid_abbdf_device.F90 @@ -54,7 +54,7 @@ module rhs_maker_device end type rhs_maker_bdf_device_t #ifdef HAVE_HIP - interface + interface subroutine rhs_maker_sumab_hip(u_d, v_d, w_d, uu_d, vv_d, ww_d, & uulag1, uulag2, vvlag1, vvlag2, wwlag1, wwlag2, ab1, ab2, ab3, nab, n)& bind(c, name='rhs_maker_sumab_hip') @@ -75,7 +75,7 @@ subroutine rhs_maker_ext_hip(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_hip') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -95,7 +95,7 @@ subroutine rhs_maker_bdf_hip(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_hip - end interface + end interface #elif HAVE_CUDA interface subroutine rhs_maker_sumab_cuda(u_d, v_d, w_d, uu_d, vv_d, ww_d, & @@ -118,7 +118,7 @@ subroutine rhs_maker_ext_cuda(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_cuda') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -138,7 +138,7 @@ subroutine rhs_maker_bdf_cuda(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_cuda - end interface + end interface #elif HAVE_OPENCL interface subroutine rhs_maker_sumab_opencl(u_d, v_d, w_d, uu_d, vv_d, ww_d, & @@ -161,7 +161,7 @@ subroutine rhs_maker_ext_opencl(abx1_d, aby1_d, abz1_d, & bind(c, name='rhs_maker_ext_opencl') use, intrinsic :: iso_c_binding import c_rp - type(c_ptr), value :: abx1_d, aby1_d, abz1_d + type(c_ptr), value :: abx1_d, aby1_d, abz1_d type(c_ptr), value :: abx2_d, aby2_d, abz2_d type(c_ptr), value :: bfx_d, bfy_d, bfz_d real(c_rp) :: rho, ab1, ab2, ab3 @@ -181,12 +181,12 @@ subroutine rhs_maker_bdf_opencl(ulag1_d, ulag2_d, vlag1_d, vlag2_d, & reaL(c_rp) :: rho, dt, bd2, bd3, bd4 integer(c_int) :: nbd, n end subroutine rhs_maker_bdf_opencl - end interface + end interface #endif contains - subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) + subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, nab) type(field_t), intent(inout) :: u,v, w type(field_t), intent(inout) :: uu, vv, ww type(field_series_t), intent(inout) :: uulag, vvlag, wwlag @@ -198,7 +198,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, & wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & uu%dof%size()) -#elif HAVE_CUDA +#elif HAVE_CUDA call rhs_maker_sumab_cuda(u%x_d, v%x_d, w%x_d, uu%x_d, vv%x_d, ww%x_d, & uulag%lf(1)%x_d, uulag%lf(2)%x_d, vvlag%lf(1)%x_d, vvlag%lf(2)%x_d, & wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & @@ -209,7 +209,7 @@ subroutine rhs_maker_sumab_device(u, v, w, uu, vv, ww, uulag, vvlag, wwlag, ab, wwlag%lf(1)%x_d, wwlag%lf(2)%x_d, ab(1), ab(2), ab(3), nab, & uu%dof%size()) #endif - + end subroutine rhs_maker_sumab_device subroutine rhs_maker_ext_device(temp1, temp2, temp3, fx_lag, fy_lag, fz_lag, & @@ -243,17 +243,17 @@ subroutine rhs_maker_ext_device(temp1, temp2, temp3, fx_lag, fy_lag, fz_lag, & fx_d, fy_d, fz_d, rho, & ext_coeffs(1), ext_coeffs(2), ext_coeffs(3), n) #endif - + end subroutine rhs_maker_ext_device subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, & ulag, vlag, wlag, bfx, bfy, bfz, & - u, v, w, B, rho, dt, bd, nbd, n) + u, v, w, B, rho, dt, bd, nbd, n) integer, intent(in) :: n, nbd type(field_t), intent(inout) :: ta1, ta2, ta3 type(field_t), intent(in) :: u, v, w type(field_t), intent(inout) :: tb1, tb2, tb3 - type(field_series_t), intent(in) :: ulag, vlag, wlag + type(field_series_t), intent(in) :: ulag, vlag, wlag real(kind=rp), intent(inout) :: bfx(n), bfy(n), bfz(n) real(kind=rp), intent(in) :: B(n) real(kind=rp), intent(in) :: dt, rho, bd(10) @@ -263,7 +263,7 @@ subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, & bfy_d = device_get_ptr(bfy) bfz_d = device_get_ptr(bfz) B_d = device_get_ptr(B) - + #ifdef HAVE_HIP call rhs_maker_bdf_hip(ulag%lf(1)%x_d, ulag%lf(2)%x_d, & vlag%lf(1)%x_d, vlag%lf(2)%x_d, & @@ -285,5 +285,5 @@ subroutine rhs_maker_bdf_device(ta1, ta2, ta3, tb1, tb2, tb3, & #endif end subroutine rhs_maker_bdf_device - + end module rhs_maker_device diff --git a/src/fluid/bcknd/device/hip/prs_res_kernel.h b/src/fluid/bcknd/device/hip/prs_res_kernel.h index 17615bdbc54..894bc0d7d0b 100644 --- a/src/fluid/bcknd/device/hip/prs_res_kernel.h +++ b/src/fluid/bcknd/device/hip/prs_res_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __FLUID_PRS_RES_KERNEL__ +#define __FLUID_PRS_RES_KERNEL__ + template< typename T > __global__ void prs_res_part1_kernel(T * __restrict__ ta1, T * __restrict__ ta2, @@ -92,3 +95,5 @@ __global__ void prs_res_part3_kernel(T * __restrict__ p_res, p_res[i] = p_res[i] - (dtbd * (ta1[i] + ta2[i] + ta3[i])); } } + +#endif // __FLUID_PRS_RES_KERNEL__ \ No newline at end of file diff --git a/src/fluid/bcknd/device/hip/vel_res_update_kernel.h b/src/fluid/bcknd/device/hip/vel_res_update_kernel.h index 037d43f8931..0cbe3b13d76 100644 --- a/src/fluid/bcknd/device/hip/vel_res_update_kernel.h +++ b/src/fluid/bcknd/device/hip/vel_res_update_kernel.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __FLUID_VEL_RES_UPDATE_KERNEL__ +#define __FLUID_VEL_RES_UPDATE_KERNEL__ + template< typename T > __global__ void vel_res_update_kernel(T * __restrict__ u_res, T * __restrict__ v_res, @@ -55,3 +58,4 @@ __global__ void vel_res_update_kernel(T * __restrict__ u_res, } +#endif // __FLUID_VEL_RES_UPDATE_KERNEL__ \ No newline at end of file diff --git a/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl b/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl index c0a4cdb840a..ba18b53e5af 100644 --- a/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl +++ b/src/fluid/bcknd/device/opencl/pnpn_res_kernel.cl @@ -27,6 +27,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __FLUID_PNPN_RES_KERNEL__ +#define __FLUID_PNPN_RES_KERNEL__ + __kernel void prs_res_part1_kernel(__global real * __restrict__ ta1, __global real * __restrict__ ta2, __global real * __restrict__ ta3, @@ -107,3 +110,5 @@ __kernel void vel_res_update_kernel(__global real * __restrict__ u_res, } } + +#endif // __FLUID_PNPN_RES_KERNEL__ \ No newline at end of file diff --git a/src/fluid/bcknd/device/pnpn_res_device.F90 b/src/fluid/bcknd/device/pnpn_res_device.F90 index 8e676a2fa3d..2b7de4428af 100644 --- a/src/fluid/bcknd/device/pnpn_res_device.F90 +++ b/src/fluid/bcknd/device/pnpn_res_device.F90 @@ -47,7 +47,7 @@ module pnpn_res_device use scratch_registry, only : neko_scratch_registry implicit none private - + type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_device_t contains procedure, nopass :: compute => pnpn_prs_res_device_compute @@ -59,7 +59,7 @@ module pnpn_res_device end type pnpn_vel_res_device_t #ifdef HAVE_HIP - interface + interface subroutine pnpn_prs_res_part1_hip(ta1_d, ta2_d, ta3_d, & wa1_d, wa2_d, wa3_d, f_u_d, f_v_d, f_w_d, & B_d, h1_d, mu, rho, n) & @@ -97,7 +97,7 @@ subroutine pnpn_prs_res_part3_hip(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) & integer(c_int) :: n end subroutine pnpn_prs_res_part3_hip end interface - + interface subroutine pnpn_vel_res_update_hip(u_res_d, v_res_d, w_res_d, & ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) & @@ -149,7 +149,7 @@ subroutine pnpn_prs_res_part3_cuda(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) & integer(c_int) :: n end subroutine pnpn_prs_res_part3_cuda end interface - + interface subroutine pnpn_vel_res_update_cuda(u_res_d, v_res_d, w_res_d, & ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) & @@ -201,7 +201,7 @@ subroutine pnpn_prs_res_part3_opencl(p_res_d, ta1_d, ta2_d, ta3_d, dtbd, n) & integer(c_int) :: n end subroutine pnpn_prs_res_part3_opencl end interface - + interface subroutine pnpn_vel_res_update_opencl(u_res_d, v_res_d, w_res_d, & ta1_d, ta2_d, ta3_d, f_u_d, f_v_d, f_w_d, n) & @@ -216,7 +216,7 @@ end subroutine pnpn_vel_res_update_opencl end interface #endif - + contains subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & @@ -251,7 +251,7 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & n = u%dof%size() gdim = c_Xh%msh%gdim - + call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh) call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh) @@ -259,23 +259,23 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & #ifdef HAVE_HIP call pnpn_prs_res_part1_hip(ta1%x_d, ta2%x_d, ta3%x_d, & wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, & - c_Xh%B_d, c_Xh%h1_d, mu, rho, n) + c_Xh%B_d, c_Xh%h1_d, mu, rho, n) #elif HAVE_CUDA call pnpn_prs_res_part1_cuda(ta1%x_d, ta2%x_d, ta3%x_d, & wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, & - c_Xh%B_d, c_Xh%h1_d, mu, rho, n) + c_Xh%B_d, c_Xh%h1_d, mu, rho, n) #elif HAVE_OPENCL call pnpn_prs_res_part1_opencl(ta1%x_d, ta2%x_d, ta3%x_d, & wa1%x_d, wa2%x_d, wa3%x_d, f_x%x_d, f_z%x_d, f_z%x_d, & - c_Xh%B_d, c_Xh%h1_d, mu, rho, n) + c_Xh%B_d, c_Xh%h1_d, mu, rho, n) #endif - c_Xh%ifh2 = .false. + c_Xh%ifh2 = .false. call device_cfill(c_Xh%h1_d,1.0_rp / rho,n) - - call gs_Xh%op(ta1, GS_OP_ADD) - call gs_Xh%op(ta2, GS_OP_ADD) - call gs_Xh%op(ta3, GS_OP_ADD) + + call gs_Xh%op(ta1, GS_OP_ADD) + call gs_Xh%op(ta2, GS_OP_ADD) + call gs_Xh%op(ta3, GS_OP_ADD) call device_opcolv(ta1%x_d, ta2%x_d, ta3%x_d, c_Xh%Binv_d, gdim, n) @@ -286,11 +286,11 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh) #ifdef HAVE_HIP - call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n); + call pnpn_prs_res_part2_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n) #elif HAVE_CUDA - call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n); + call pnpn_prs_res_part2_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n) #elif HAVE_OPENCL - call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n); + call pnpn_prs_res_part2_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, n) #endif ! @@ -303,13 +303,13 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & call bc_sym_surface%apply_surfvec_dev(wa1%x_d, wa2%x_d, wa3%x_d, ta1%x_d, ta2%x_d, ta3%x_d) #ifdef HAVE_HIP - call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n); + call pnpn_prs_res_part3_hip(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n) #elif HAVE_CUDA - call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n); + call pnpn_prs_res_part3_cuda(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n) #elif HAVE_OPENCL - call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n); + call pnpn_prs_res_part3_opencl(p_res%x_d, wa1%x_d, wa2%x_d, wa3%x_d, dtbd, n) #endif - ! + ! dtbd = bd / dt call device_rzero(ta1%x_d, n) @@ -320,14 +320,14 @@ subroutine pnpn_prs_res_device_compute(p, p_res, u, v, w, u_e, v_e, w_e, & u%x_d, v%x_d, w%x_d) #ifdef HAVE_HIP - call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n); + call pnpn_prs_res_part3_hip(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n) #elif HAVE_CUDA - call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n); + call pnpn_prs_res_part3_cuda(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n) #elif HAVE_OPENCL - call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n); + call pnpn_prs_res_part3_opencl(p_res%x_d, ta1%x_d, ta2%x_d, ta3%x_d, dtbd, n) #endif - - call neko_scratch_registry%relinquish_field(temp_indices) + + call neko_scratch_registry%relinquish_field(temp_indices) end subroutine pnpn_prs_res_device_compute @@ -335,12 +335,12 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, & p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: p, u, v, w type(field_t), intent(inout) :: u_res, v_res, w_res type(field_t), intent(inout) :: f_x, f_y, f_z type(coef_t), intent(inout) :: c_Xh - real(kind=rp), intent(in) :: mu + real(kind=rp), intent(in) :: mu real(kind=rp), intent(in) :: rho real(kind=rp), intent(in) :: bd real(kind=rp), intent(in) :: dt @@ -351,7 +351,7 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, & call device_cfill(c_Xh%h1_d, mu, n) call device_cfill(c_Xh%h2_d, rho * (bd / dt), n) c_Xh%ifh2 = .true. - + call Ax%compute(u_res%x, u%x, c_Xh, msh, Xh) call Ax%compute(v_res%x, v%x, c_Xh, msh, Xh) call Ax%compute(w_res%x, w%x, c_Xh, msh, Xh) @@ -372,7 +372,7 @@ subroutine pnpn_vel_res_device_compute(Ax, u, v, w, u_res, v_res, w_res, & call pnpn_vel_res_update_opencl(u_res%x_d, v_res%x_d, w_res%x_d, & ta1%x_d, ta2%x_d, ta3%x_d, f_x%x_d, f_y%x_d, f_z%x_d, n) #endif - + call neko_scratch_registry%relinquish_field(temp_indices) end subroutine pnpn_vel_res_device_compute diff --git a/src/fluid/bcknd/sx/pnpn_res_sx.f90 b/src/fluid/bcknd/sx/pnpn_res_sx.f90 index 477d2cb7531..2c849077c36 100644 --- a/src/fluid/bcknd/sx/pnpn_res_sx.f90 +++ b/src/fluid/bcknd/sx/pnpn_res_sx.f90 @@ -6,14 +6,14 @@ module pnpn_res_sx use ax_product, only : ax_t use coefs, only : coef_t use facet_normal, only : facet_normal_t - use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t + use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t use scratch_registry, only: neko_scratch_registry use mesh, only : mesh_t use num_types, only : rp use space, only : space_t implicit none private - + type, public, extends(pnpn_prs_res_t) :: pnpn_prs_res_sx_t contains procedure, nopass :: compute => pnpn_prs_res_sx_compute @@ -63,7 +63,7 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & c_Xh%h2(i,1,1,1) = 0.0_rp end do c_Xh%ifh2 = .false. - + call curl(ta1, ta2, ta3, u_e, v_e, w_e, work1, work2, c_Xh) call curl(wa1, wa2, wa3, ta1, ta2, ta3, work1, work2, c_Xh) @@ -78,17 +78,17 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & ta2%x(i,1,1,1) = f_y%x(i,1,1,1) / rho - wa2%x(i,1,1,1) ta3%x(i,1,1,1) = f_z%x(i,1,1,1) / rho - wa3%x(i,1,1,1) end do - - call gs_Xh%op(ta1, GS_OP_ADD) - call gs_Xh%op(ta2, GS_OP_ADD) - call gs_Xh%op(ta3, GS_OP_ADD) + + call gs_Xh%op(ta1, GS_OP_ADD) + call gs_Xh%op(ta2, GS_OP_ADD) + call gs_Xh%op(ta3, GS_OP_ADD) do i = 1, n ta1%x(i,1,1,1) = ta1%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) ta2%x(i,1,1,1) = ta2%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) ta3%x(i,1,1,1) = ta3%x(i,1,1,1) * c_Xh%Binv(i,1,1,1) end do - + call Ax%compute(p_res%x,p%x,c_Xh,p%msh,p%Xh) call cdtp(wa1%x, ta1%x, c_Xh%drdx, c_Xh%dsdx, c_Xh%dtdx, c_Xh) @@ -107,16 +107,16 @@ subroutine pnpn_prs_res_sx_compute(p, p_res, u, v, w, u_e, v_e, w_e, f_x, & wa2%x(i,1,1,1) = 0.0_rp wa3%x(i,1,1,1) = 0.0_rp end do - + call bc_sym_surface%apply_surfvec(wa1%x,wa2%x,wa3%x,ta1%x, ta2%x, ta3%x, n) - + dtbd = bd / dt do i = 1, n ta1%x(i,1,1,1) = 0.0_rp ta2%x(i,1,1,1) = 0.0_rp ta3%x(i,1,1,1) = 0.0_rp end do - + call bc_prs_surface%apply_surfvec(ta1%x, ta2%x, ta3%x, u%x, v%x, w%x, n) do i = 1, n @@ -132,7 +132,7 @@ subroutine pnpn_vel_res_sx_compute(Ax, u, v, w, u_res, v_res, w_res, & p, f_x, f_y, f_z, c_Xh, msh, Xh, mu, rho, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: p, u, v, w type(field_t), intent(inout) :: u_res, v_res, w_res type(field_t), intent(inout) :: f_x, f_y, f_z @@ -167,8 +167,8 @@ subroutine pnpn_vel_res_sx_compute(Ax, u, v, w, u_res, v_res, w_res, & v_res%x(i,1,1,1) = (-v_res%x(i,1,1,1)) - ta2%x(i,1,1,1) + f_y%x(i,1,1,1) w_res%x(i,1,1,1) = (-w_res%x(i,1,1,1)) - ta3%x(i,1,1,1) + f_z%x(i,1,1,1) end do - + call neko_scratch_registry%relinquish_field(temp_indices) - end subroutine pnpn_vel_res_sx_compute - + end subroutine pnpn_vel_res_sx_compute + end module pnpn_res_sx diff --git a/src/fluid/flow_ic.f90 b/src/fluid/flow_ic.f90 index 3e0d9e1587c..194eeec267d 100644 --- a/src/fluid/flow_ic.f90 +++ b/src/fluid/flow_ic.f90 @@ -43,16 +43,16 @@ module flow_ic use math use user_intf, only : useric use json_module, only : json_file - use json_utils, only: json_get, json_get_or_default + use json_utils, only: json_get implicit none private interface set_flow_ic module procedure set_flow_ic_int, set_flow_ic_usr - end interface + end interface set_flow_ic public :: set_flow_ic - + contains !> Set initial flow condition (builtin) @@ -71,7 +71,7 @@ subroutine set_flow_ic_int(u, v, w, p, coef, gs, type, params) real(kind=rp), allocatable :: uinf(:) character(len=:), allocatable :: blasius_approximation - if (trim(type) .eq. 'uniform') then + if (trim(type) .eq. 'uniform') then call json_get(params, 'case.fluid.initial_condition.value', uinf) call set_flow_ic_uniform(u, v, w, uinf) else if (trim(type) .eq. 'blasius') then @@ -83,9 +83,9 @@ subroutine set_flow_ic_int(u, v, w, p, coef, gs, type, params) else call neko_error('Invalid initial condition') end if - + call set_flow_ic_common(u, v, w, p, coef, gs) - + end subroutine set_flow_ic_int !> Set intial flow condition (user defined) @@ -100,9 +100,9 @@ subroutine set_flow_ic_usr(u, v, w, p, coef, gs, usr_ic, params) type(json_file), intent(inout) :: params call usr_ic(u, v, w, p, params) - + call set_flow_ic_common(u, v, w, p, coef, gs) - + end subroutine set_flow_ic_usr subroutine set_flow_ic_common(u, v, w, p, coef, gs) @@ -112,17 +112,20 @@ subroutine set_flow_ic_common(u, v, w, p, coef, gs) type(field_t), intent(inout) :: p type(coef_t), intent(in) :: coef type(gs_t), intent(inout) :: gs - - if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE) - call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE) - call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE) + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) end if - + ! Ensure continuity across elements for initial conditions - call gs%op(u%x, u%dof%size(), GS_OP_ADD) - call gs%op(v%x, v%dof%size(), GS_OP_ADD) - call gs%op(w%x, w%dof%size(), GS_OP_ADD) + call gs%op(u%x, u%dof%size(), GS_OP_ADD) + call gs%op(v%x, v%dof%size(), GS_OP_ADD) + call gs%op(w%x, w%dof%size(), GS_OP_ADD) if (NEKO_BCKND_DEVICE .eq. 1) then call device_col2(u%x_d, coef%mult_d, u%dof%size()) @@ -133,7 +136,7 @@ subroutine set_flow_ic_common(u, v, w, p, coef, gs) call col2(v%x, coef%mult, v%dof%size()) call col2(w%x, coef%mult, w%dof%size()) end if - + end subroutine set_flow_ic_common !> Uniform initial condition @@ -181,7 +184,7 @@ subroutine set_flow_ic_blasius(u, v, w, delta, uinf, type) case default call neko_error('Invalid Blasius approximation') end select - + if ((uinf(1) .gt. 0.0_rp) .and. (uinf(2) .eq. 0.0_rp) & .and. (uinf(3) .eq. 0.0_rp)) then do i = 1, u%dof%size() @@ -202,9 +205,9 @@ subroutine set_flow_ic_blasius(u, v, w, delta, uinf, type) u%x(i,1,1,1) = 0.0_rp v%x(i,1,1,1) = 0.0_rp w%x(i,1,1,1) = bla(u%dof%y(i,1,1,1), delta, uinf(3)) - end do + end do end if - + end subroutine set_flow_ic_blasius - + end module flow_ic diff --git a/src/fluid/flow_profile.f90 b/src/fluid/flow_profile.f90 index ec380c511d4..32e5ef29506 100644 --- a/src/fluid/flow_profile.f90 +++ b/src/fluid/flow_profile.f90 @@ -30,7 +30,7 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Defines a flow profile +!> Defines a flow profile module flow_profile use num_types implicit none @@ -59,7 +59,7 @@ function blasius_linear(y, delta, u) else blasius_linear = u * (y / delta) end if - + end function blasius_linear !> Quadratic approximate Blasius Profile @@ -76,7 +76,7 @@ function blasius_quadratic(y, delta, u) else blasius_quadratic = u * arg end if - + end function blasius_quadratic !> Cubic approximate Blasius Profile @@ -93,7 +93,7 @@ function blasius_cubic(y, delta, u) else blasius_cubic = u * arg end if - + end function blasius_cubic !> Quartic approximate Blasius Profile @@ -111,7 +111,7 @@ function blasius_quartic(y, delta, u) else blasius_quartic = u * arg end if - + end function blasius_quartic !> Sinusoidal approximate Blasius Profile @@ -129,7 +129,7 @@ function blasius_sin(y, delta, u) else blasius_sin = u * sin(arg) end if - + end function blasius_sin - + end module flow_profile diff --git a/src/fluid/fluid_aux.f90 b/src/fluid/fluid_aux.f90 index 5641e694cb1..5f8bed604ae 100644 --- a/src/fluid/fluid_aux.f90 +++ b/src/fluid/fluid_aux.f90 @@ -5,11 +5,14 @@ module fluid_aux use krylov, only : ksp_monitor_t use, intrinsic :: ieee_arithmetic, only: ieee_is_nan implicit none + private + + public :: fluid_step_info contains !> Prints for prs, velx, vely, velz the following: - !! Number of iterations, start residual, end residual + !! Number of iterations, start residual, end residual subroutine fluid_step_info(step, t, dt, ksp_results) type(ksp_monitor_t), intent(in) :: ksp_results(4) integer, intent(in) :: step @@ -58,7 +61,7 @@ subroutine fluid_step_info(step, t, dt, ksp_results) stop end if end do - + end subroutine fluid_step_info diff --git a/src/fluid/fluid_fctry.f90 b/src/fluid/fluid_fctry.f90 index 5773c081ba4..75e11c9254a 100644 --- a/src/fluid/fluid_fctry.f90 +++ b/src/fluid/fluid_fctry.f90 @@ -34,7 +34,7 @@ module fluid_fctry use fluid_scheme, only : fluid_scheme_t ! use fluid_plan1, only : fluid_plan1_t - use fluid_pnpn, only : fluid_pnpn_t + use fluid_pnpn, only : fluid_pnpn_t use utils, only : neko_error use neko_config implicit none @@ -53,7 +53,7 @@ subroutine fluid_scheme_factory(fluid, fluid_scheme) else call neko_error('Invalid fluid scheme') end if - + end subroutine fluid_scheme_factory end module fluid_fctry diff --git a/src/fluid/fluid_plan1.f90 b/src/fluid/fluid_plan1.f90 index cc7096544e7..3b1b7deff2e 100644 --- a/src/fluid/fluid_plan1.f90 +++ b/src/fluid/fluid_plan1.f90 @@ -22,17 +22,17 @@ subroutine fluid_plan1_init(this, msh, lx, param) class(fluid_plan1_t), target, intent(inout) :: this type(mesh_t), target, intent(inout) :: msh integer, intent(inout) :: lx - type(param_t), target, intent(inout) :: param + type(param_t), target, intent(inout) :: param character(len=15), parameter :: scheme = 'plan1 (Pn/Pn-2)' integer :: lx2 call this%free() - + !> Setup velocity fields on the space \f$ Xh \f$ call this%scheme_init(msh, lx, param, kspv_init=.true., scheme=scheme) !> Setup pressure field and related space \f$ Yh \f$ - lx2 = lx - 2 + lx2 = lx - 2 if (msh%gdim .eq. 2) then call this%Yh%init(GLL, lx2, lx2) else @@ -40,19 +40,19 @@ subroutine fluid_plan1_init(this, msh, lx, param) end if this%dm_Yh = dofmap_t(msh, this%Yh) - + call this%p%init(this%dm_Yh) call gs_init(this%gs_Yh, this%dm_Yh) call this%c_Yh%init(this%gs_Yh) - + call fluid_scheme_solver_factory(this%ksp_prs, this%dm_Yh%size(), & param%ksp_prs, param%abstol_prs) call fluid_scheme_precon_factory(this%pc_prs, this%ksp_prs, & this%c_Yh, this%dm_Yh, this%gs_Yh, this%bclst_prs, param%pc_prs) - - + + end subroutine fluid_plan1_init subroutine fluid_plan1_free(this) @@ -67,17 +67,18 @@ subroutine fluid_plan1_free(this) call gs_free(this%gs_Yh) call this%c_Yh%free() - + end subroutine fluid_plan1_free - subroutine fluid_plan1_step(this, t, tstep, ext_bdf) - class(fluid_plan1_t), intent(inout) :: this + subroutine fluid_plan1_step(this, t, tstep, ext_bdf, dt_controller) + class(fluid_plan1_t), target, intent(inout) :: this real(kind=rp), intent(inout) :: t integer, intent(inout) :: tstep type(time_scheme_controller_t), intent(inout) :: ext_bdf + type(time_step_controller_t), intent(in) :: dt_controller if (this%freeze) return end subroutine fluid_plan1_step - + end module fluid_plan1 diff --git a/src/fluid/fluid_pnpn.f90 b/src/fluid/fluid_pnpn.f90 index bd36238cb89..480c661c420 100644 --- a/src/fluid/fluid_pnpn.f90 +++ b/src/fluid/fluid_pnpn.f90 @@ -32,38 +32,56 @@ ! !> Modular version of the Classic Nek5000 Pn/Pn formulation for fluids module fluid_pnpn - use pnpn_res_fctry - use ax_helm_fctry - use rhs_maker_fctry - use fluid_volflow - use fluid_scheme - use field_series - use facet_normal - use device_math - use device_mathops - use fluid_aux - use time_scheme_controller - use projection - use device - use logger - use advection - use profiler - use json_utils, only : json_get, json_get_or_default + use num_types, only : rp + use krylov, only : ksp_monitor_t + use pnpn_res_fctry, only : pnpn_prs_res_factory, pnpn_vel_res_factory + use pnpn_residual, only : pnpn_prs_res_t, pnpn_vel_res_t + use ax_helm_fctry, only : ax_helm_factory + use rhs_maker_fctry, only : rhs_maker_sumab_fctry, rhs_maker_bdf_fctry, & + rhs_maker_ext_fctry + use rhs_maker, only : rhs_maker_sumab_t, rhs_maker_bdf_t, rhs_maker_ext_t + use fluid_volflow, only : fluid_volflow_t + use fluid_scheme, only : fluid_scheme_t + use field_series, only : field_series_t + use device_math, only : device_add2, device_col2 + use device_mathops, only : device_opcolv, device_opadd2cm + use fluid_aux, only : fluid_step_info + use time_scheme_controller, only : time_scheme_controller_t + use projection, only : projection_t + use device, only : device_memcpy, HOST_TO_DEVICE + use logger, only : neko_log + use advection, only : advection_t + use profiler, only : profiler_start_region, profiler_end_region + use json_utils, only : json_get use json_module, only : json_file use material_properties, only : material_properties_t + use advection_fctry, only : advection_factory + use ax_product, only : ax_t + use field, only : field_t + use dirichlet, only : dirichlet_t + use facet_normal, only : facet_normal_t + use non_normal, only : non_normal_t + use mesh, only : mesh_t + use user_intf, only : user_t + use coefs, only : coef_t + use time_step_controller, only : time_step_controller_t + use gather_scatter, only : gs_t, GS_OP_ADD + use neko_config, only : NEKO_BCKND_DEVICE + use math, only : col2, add2 + use mathops, only : opadd2cm, opcolv + use bc, only: bc_list_t, bc_list_init, bc_list_add, bc_list_free, & + bc_list_apply_scalar, bc_list_apply_vector implicit none private - + type, public, extends(fluid_scheme_t) :: fluid_pnpn_t type(field_t) :: p_res, u_res, v_res, w_res - type(field_series_t) :: ulag, vlag, wlag - type(field_t) :: dp, du, dv, dw class(ax_t), allocatable :: Ax - + type(projection_t) :: proj_prs type(projection_t) :: proj_u type(projection_t) :: proj_v @@ -72,20 +90,23 @@ module fluid_pnpn type(facet_normal_t) :: bc_prs_surface !< Surface term in pressure rhs type(facet_normal_t) :: bc_sym_surface !< Surface term in pressure rhs type(dirichlet_t) :: bc_vel_res !< Dirichlet condition vel. res. - type(dirichlet_t) :: bc_dp !< Dirichlet condition vel. res. + type(dirichlet_t) :: bc_field_dirichlet_p !< Dirichlet condition vel. res. + type(dirichlet_t) :: bc_field_dirichlet_u !< Dirichlet condition vel. res. + type(dirichlet_t) :: bc_field_dirichlet_v !< Dirichlet condition vel. res. + type(dirichlet_t) :: bc_field_dirichlet_w !< Dirichlet condition vel. res. type(non_normal_t) :: bc_vel_res_non_normal !< Dirichlet condition vel. res. - type(bc_list_t) :: bclst_vel_res + type(bc_list_t) :: bclst_vel_res type(bc_list_t) :: bclst_du type(bc_list_t) :: bclst_dv type(bc_list_t) :: bclst_dw - type(bc_list_t) :: bclst_dp + type(bc_list_t) :: bclst_dp - class(advection_t), allocatable :: adv + class(advection_t), allocatable :: adv ! Time variables type(field_t) :: abx1, aby1, abz1 type(field_t) :: abx2, aby2, abz2 - + !> Pressure residual class(pnpn_prs_res_t), allocatable :: prs_res @@ -103,7 +124,7 @@ module fluid_pnpn !> Adjust flow volume type(fluid_volflow_t) :: vol_flow - + contains procedure, pass(this) :: init => fluid_pnpn_init procedure, pass(this) :: free => fluid_pnpn_free @@ -112,21 +133,21 @@ module fluid_pnpn end type fluid_pnpn_t contains - + subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) class(fluid_pnpn_t), target, intent(inout) :: this type(mesh_t), target, intent(inout) :: msh integer, intent(inout) :: lx type(json_file), target, intent(inout) :: params type(user_t), intent(in) :: user - type(material_properties_t), intent(inout) :: material_properties + type(material_properties_t), target, intent(inout) :: material_properties character(len=15), parameter :: scheme = 'Modular (Pn/Pn)' logical :: found, logical_val integer :: integer_val real(kind=rp) :: real_val call this%free() - + ! Initialize base class call this%scheme_init(msh, lx, params, .true., .true., scheme, user, & material_properties) @@ -148,7 +169,7 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) ! Setup backend depenent contributions to F from lagged BD terms call rhs_maker_bdf_fctry(this%makebdf) - + ! Initialize variables specific to this plan associate(Xh_lx => this%Xh%lx, Xh_ly => this%Xh%ly, Xh_lz => this%Xh%lz, & dm_Xh => this%dm_Xh, nelv => this%msh%nelv) @@ -156,7 +177,7 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) call this%p_res%init(dm_Xh, "p_res") call this%u_res%init(dm_Xh, "u_res") call this%v_res%init(dm_Xh, "v_res") - call this%w_res%init(dm_Xh, "w_res") + call this%w_res%init(dm_Xh, "w_res") call this%abx1%init(dm_Xh, "abx1") call this%aby1%init(dm_Xh, "aby1") call this%abz1%init(dm_Xh, "abz1") @@ -169,34 +190,36 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) this%abx2 = 0.0_rp this%aby2 = 0.0_rp this%abz2 = 0.0_rp - + call this%du%init(dm_Xh, 'du') call this%dv%init(dm_Xh, 'dv') call this%dw%init(dm_Xh, 'dw') call this%dp%init(dm_Xh, 'dp') - call this%ulag%init(this%u, 2) - call this%vlag%init(this%v, 2) - call this%wlag%init(this%w, 2) - end associate - + ! Initialize velocity surface terms in pressure rhs - call this%bc_prs_surface%init(this%dm_Xh) + call this%bc_prs_surface%init(this%c_Xh) call this%bc_prs_surface%mark_zone(msh%inlet) call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,& 'v', this%bc_labels) + !This impacts the rhs of the pressure, need to check what is correct to add here + call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_u', this%bc_labels) + call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_v', this%bc_labels) + call this%bc_prs_surface%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_w', this%bc_labels) call this%bc_prs_surface%finalize() - call this%bc_prs_surface%set_coef(this%c_Xh) ! Initialize symmetry surface terms in pressure rhs - call this%bc_sym_surface%init(this%dm_Xh) + call this%bc_sym_surface%init(this%c_Xh) call this%bc_sym_surface%mark_zone(msh%sympln) call this%bc_sym_surface%mark_zones_from_list(msh%labeled_zones,& 'sym', this%bc_labels) + ! Same here, should du, dv, dw be marked here? call this%bc_sym_surface%finalize() - call this%bc_sym_surface%set_coef(this%c_Xh) ! Initialize dirichlet bcs for velocity residual - call this%bc_vel_res_non_normal%init(this%dm_Xh) + call this%bc_vel_res_non_normal%init(this%c_Xh) call this%bc_vel_res_non_normal%mark_zone(msh%outlet_normal) call this%bc_vel_res_non_normal%mark_zones_from_list(msh%labeled_zones,& 'on', this%bc_labels) @@ -204,21 +227,41 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) 'on+dong', & this%bc_labels) call this%bc_vel_res_non_normal%finalize() - call this%bc_vel_res_non_normal%init_msk(this%c_Xh) + call this%bc_vel_res_non_normal%init_msk() - call this%bc_dp%init(this%dm_Xh) - call this%bc_dp%mark_zones_from_list(msh%labeled_zones, 'on+dong', & + call this%bc_field_dirichlet_p%init(this%c_Xh) + call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, 'on+dong', & this%bc_labels) - call this%bc_dp%mark_zones_from_list(msh%labeled_zones, & + call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, & 'o+dong', this%bc_labels) - call this%bc_dp%finalize() - call this%bc_dp%set_g(0.0_rp) + call this%bc_field_dirichlet_p%mark_zones_from_list(msh%labeled_zones, 'd_pres', & + this%bc_labels) + call this%bc_field_dirichlet_p%finalize() + call this%bc_field_dirichlet_p%set_g(0.0_rp) call bc_list_init(this%bclst_dp) - call bc_list_add(this%bclst_dp, this%bc_dp) + call bc_list_add(this%bclst_dp, this%bc_field_dirichlet_p) !Add 0 prs bcs call bc_list_add(this%bclst_dp, this%bc_prs) - call this%bc_vel_res%init(this%dm_Xh) + call this%bc_field_dirichlet_u%init(this%c_Xh) + call this%bc_field_dirichlet_u%mark_zones_from_list(msh%labeled_zones, 'd_vel_u', & + this%bc_labels) + call this%bc_field_dirichlet_u%finalize() + call this%bc_field_dirichlet_u%set_g(0.0_rp) + + call this%bc_field_dirichlet_v%init(this%c_Xh) + call this%bc_field_dirichlet_v%mark_zones_from_list(msh%labeled_zones, 'd_vel_v', & + this%bc_labels) + call this%bc_field_dirichlet_v%finalize() + call this%bc_field_dirichlet_v%set_g(0.0_rp) + + call this%bc_field_dirichlet_w%init(this%c_Xh) + call this%bc_field_dirichlet_w%mark_zones_from_list(msh%labeled_zones, 'd_vel_w', & + this%bc_labels) + call this%bc_field_dirichlet_w%finalize() + call this%bc_field_dirichlet_w%set_g(0.0_rp) + + call this%bc_vel_res%init(this%c_Xh) call this%bc_vel_res%mark_zone(msh%inlet) call this%bc_vel_res%mark_zone(msh%wall) call this%bc_vel_res%mark_zones_from_list(msh%labeled_zones, & @@ -237,45 +280,41 @@ subroutine fluid_pnpn_init(this, msh, lx, params, user, material_properties) call bc_list_add(this%bclst_du,this%bc_sym%bc_x) call bc_list_add(this%bclst_du,this%bc_vel_res_non_normal%bc_x) call bc_list_add(this%bclst_du, this%bc_vel_res) + call bc_list_add(this%bclst_du, this%bc_field_dirichlet_u) call bc_list_init(this%bclst_dv) call bc_list_add(this%bclst_dv,this%bc_sym%bc_y) call bc_list_add(this%bclst_dv,this%bc_vel_res_non_normal%bc_y) call bc_list_add(this%bclst_dv, this%bc_vel_res) + call bc_list_add(this%bclst_dv, this%bc_field_dirichlet_v) call bc_list_init(this%bclst_dw) call bc_list_add(this%bclst_dw,this%bc_sym%bc_z) call bc_list_add(this%bclst_dw,this%bc_vel_res_non_normal%bc_z) call bc_list_add(this%bclst_dw, this%bc_vel_res) + call bc_list_add(this%bclst_dw, this%bc_field_dirichlet_w) !Intialize projection space thingy - if (this%pr_projection_dim .gt. 0) then - call this%proj_prs%init(this%dm_Xh%size(), this%pr_projection_dim) - end if - - if (this%vel_projection_dim .gt. 0) then - call this%proj_u%init(this%dm_Xh%size(), this%vel_projection_dim) - call this%proj_v%init(this%dm_Xh%size(), this%vel_projection_dim) - call this%proj_w%init(this%dm_Xh%size(), this%vel_projection_dim) - end if + call this%proj_prs%init(this%dm_Xh%size(), this%pr_projection_dim, & + this%pr_projection_activ_step) + + call this%proj_u%init(this%dm_Xh%size(), this%vel_projection_dim, & + this%vel_projection_activ_step) + call this%proj_v%init(this%dm_Xh%size(), this%vel_projection_dim, & + this%vel_projection_activ_step) + call this%proj_w%init(this%dm_Xh%size(), this%vel_projection_dim, & + this%vel_projection_activ_step) + ! Add lagged term to checkpoint - call this%chkp%add_lag(this%ulag, this%vlag, this%wlag) - - call json_get(params, 'case.numerics.dealias', logical_val) - call params%get('case.numerics.dealiased_polynomial_order', integer_val, & - found) - if (.not. found) then - call json_get(params, 'case.numerics.polynomial_order', integer_val) - integer_val = 3.0_rp / 2.0_rp * (integer_val + 1) - 1 - end if - ! an extra +1 below to go from poly order to space size - call advection_factory(this%adv, this%c_Xh, logical_val, integer_val + 1) + call this%chkp%add_lag(this%ulag, this%vlag, this%wlag) + + call advection_factory(this%adv, params, this%c_Xh) if (params%valid_path('case.fluid.flow_rate_force')) then call this%vol_flow%init(this%dm_Xh, params) end if - + end subroutine fluid_pnpn_init subroutine fluid_pnpn_restart(this,dtlag, tlag) @@ -285,7 +324,9 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag) integer :: i, n n = this%u%dof%size() - ! Make sure that continuity is maintained (important for interpolation) + ! Make sure that continuity is maintained (important for interpolation) + ! Do not do this for lagged rhs + ! (derivatives are not necessairly coninous across elements) call col2(this%u%x,this%c_Xh%mult,this%u%dof%size()) call col2(this%v%x,this%c_Xh%mult,this%u%dof%size()) call col2(this%w%x,this%c_Xh%mult,this%u%dof%size()) @@ -295,44 +336,48 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag) call col2(this%vlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size()) call col2(this%wlag%lf(i)%x,this%c_Xh%mult,this%u%dof%size()) end do - + + if (NEKO_BCKND_DEVICE .eq. 1) then associate(u=>this%u, v=>this%v, w=>this%w, & ulag=>this%ulag, vlag=>this%vlag, wlag=>this%wlag,& p=>this%p) - call device_memcpy(u%x, u%x_d, u%dof%size(), HOST_TO_DEVICE) - call device_memcpy(v%x, v%x_d, v%dof%size(), HOST_TO_DEVICE) - call device_memcpy(w%x, w%x_d, w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(p%x, p%x_d, p%dof%size(), HOST_TO_DEVICE) - call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, & - u%dof%size(), HOST_TO_DEVICE) - call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, & - u%dof%size(), HOST_TO_DEVICE) - - call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, & - v%dof%size(), HOST_TO_DEVICE) - call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, & - v%dof%size(), HOST_TO_DEVICE) - - call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%abx1%x, this%abx1%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%abx2%x, this%abx2%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%aby1%x, this%aby1%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%aby2%x, this%aby2%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%abz1%x, this%abz1%x_d, & - w%dof%size(), HOST_TO_DEVICE) - call device_memcpy(this%abz2%x, this%abz2%x_d, & - w%dof%size(), HOST_TO_DEVICE) + call device_memcpy(u%x, u%x_d, u%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(v%x, v%x_d, v%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(w%x, w%x_d, w%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(p%x, p%x_d, p%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(ulag%lf(1)%x, ulag%lf(1)%x_d, & + u%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(ulag%lf(2)%x, ulag%lf(2)%x_d, & + u%dof%size(), HOST_TO_DEVICE, sync=.false.) + + call device_memcpy(vlag%lf(1)%x, vlag%lf(1)%x_d, & + v%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(vlag%lf(2)%x, vlag%lf(2)%x_d, & + v%dof%size(), HOST_TO_DEVICE, sync=.false.) + + call device_memcpy(wlag%lf(1)%x, wlag%lf(1)%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(wlag%lf(2)%x, wlag%lf(2)%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abx1%x, this%abx1%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abx2%x, this%abx2%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%aby1%x, this%aby1%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%aby2%x, this%aby2%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abz1%x, this%abz1%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%abz2%x, this%abz2%x_d, & + w%dof%size(), HOST_TO_DEVICE, sync=.false.) end associate - end if - + end if call this%gs_Xh%op(this%u,GS_OP_ADD) @@ -346,33 +391,83 @@ subroutine fluid_pnpn_restart(this,dtlag, tlag) call this%gs_Xh%op(this%wlag%lf(i),GS_OP_ADD) end do + + !! If we would decide to only restart from lagged fields instead of asving abx1, aby1 etc. + !! Observe that one also needs to recompute the focing at the old time steps + !u_temp = this%ulag%lf(2) + !v_temp = this%vlag%lf(2) + !w_temp = this%wlag%lf(2) + !! Compute the source terms + !call this%source_term%compute(tlag(2), -1) + ! + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !! Add the advection operators to the right-hand-side. + !call this%adv%compute(u_temp, v_temp, w_temp, & + ! this%f_x%x, this%f_y%x, this%f_z%x, & + ! this%Xh, this%c_Xh, this%dm_Xh%size()) + !this%abx2 = this%f_x + !this%aby2 = this%f_y + !this%abz2 = this%f_z + ! + !u_temp = this%ulag%lf(1) + !v_temp = this%vlag%lf(1) + !w_temp = this%wlag%lf(1) + !call this%source_term%compute(tlag(1), 0) + + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !! Pre-multiply the source terms with the mass matrix. + !if (NEKO_BCKND_DEVICE .eq. 1) then + ! call device_opcolv(this%f_x%x_d, this%f_y%x_d, this%f_z%x_d, this%c_Xh%B_d, this%msh%gdim, n) + !else + ! call opcolv(this%f_x%x, this%f_y%x, this%f_z%x, this%c_Xh%B, this%msh%gdim, n) + !end if + + !call this%adv%compute(u_temp, v_temp, w_temp, & + ! this%f_x%x, this%f_y%x, this%f_z%x, & + ! this%Xh, this%c_Xh, this%dm_Xh%size()) + !this%abx1 = this%f_x + !this%aby1 = this%f_y + !this%abz1 = this%f_z + end subroutine fluid_pnpn_restart - + subroutine fluid_pnpn_free(this) class(fluid_pnpn_t), intent(inout) :: this !Deallocate velocity and pressure fields call this%scheme_free() - call this%bc_prs_surface%free() - call this%bc_sym_surface%free() + call this%bc_prs_surface%free() + call this%bc_sym_surface%free() call bc_list_free(this%bclst_vel_res) call bc_list_free(this%bclst_dp) call this%proj_prs%free() call this%proj_u%free() call this%proj_v%free() call this%proj_w%free() - + call this%p_res%free() call this%u_res%free() call this%v_res%free() call this%w_res%free() - + call this%du%free() call this%dv%free() call this%dw%free() call this%dp%free() - + call this%abx1%free() call this%aby1%free() call this%abz1%free() @@ -380,7 +475,7 @@ subroutine fluid_pnpn_free(this) call this%abx2%free() call this%aby2%free() call this%abz2%free() - + if (allocated(this%Ax)) then deallocate(this%Ax) end if @@ -388,7 +483,7 @@ subroutine fluid_pnpn_free(this) if (allocated(this%prs_res)) then deallocate(this%prs_res) end if - + if (allocated(this%vel_res)) then deallocate(this%vel_res) end if @@ -406,11 +501,7 @@ subroutine fluid_pnpn_free(this) end if call this%vol_flow%free() - - call this%ulag%free() - call this%vlag%free() - call this%wlag%free() - + end subroutine fluid_pnpn_free !> Advance fluid simulation in time. @@ -418,19 +509,21 @@ end subroutine fluid_pnpn_free !! @param tstep The current interation. !! @param dt The timestep !! @param ext_bdf Time integration logic. - subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) - class(fluid_pnpn_t), intent(inout) :: this + !! @param dt_controller timestep controller + subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf, dt_controller) + class(fluid_pnpn_t), target, intent(inout) :: this real(kind=rp), intent(inout) :: t integer, intent(inout) :: tstep real(kind=rp), intent(in) :: dt type(time_scheme_controller_t), intent(inout) :: ext_bdf + type(time_step_controller_t), intent(in) :: dt_controller ! number of degrees of freedom integer :: n ! Solver results monitors (pressure + 3 velocity) type(ksp_monitor_t) :: ksp_results(4) ! Extrapolated velocity for the pressure residual type(field_t), pointer :: u_e, v_e, w_e - ! Indices for tracking temporary fields + ! Indices for tracking temporary fields integer :: temp_indices(3) ! Counter integer :: i @@ -439,7 +532,7 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) n = this%dm_Xh%size() - call profiler_start_region('Fluid') + call profiler_start_region('Fluid', 1) associate(u => this%u, v => this%v, w => this%w, p => this%p, & du => this%du, dv => this%dv, dw => this%dw, dp => this%dp, & u_res =>this%u_res, v_res => this%v_res, w_res => this%w_res, & @@ -452,21 +545,21 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) makeabf => this%makeabf, makebdf => this%makebdf, & vel_projection_dim => this%vel_projection_dim, & pr_projection_dim => this%pr_projection_dim, & - ksp_vel_maxiter => this%ksp_vel_maxiter, & - ksp_pr_maxiter => this%ksp_pr_maxiter, & rho => this%rho, mu => this%mu, & - f_x => this%f_x, f_y => this%f_y, f_z => this%f_z) - + f_x => this%f_x, f_y => this%f_y, f_z => this%f_z, & + if_variable_dt => dt_controller%if_variable_dt, & + dt_last_change => dt_controller%dt_last_change) + ! Get temporary arrays call this%scratch%request_field(u_e, temp_indices(1)) call this%scratch%request_field(v_e, temp_indices(2)) call this%scratch%request_field(w_e, temp_indices(3)) call sumab%compute_fluid(u_e, v_e, w_e, u, v, w, & ulag, vlag, wlag, ext_bdf%advection_coeffs, ext_bdf%nadv) - + ! Compute the source terms call this%source_term%compute(t, tstep) - + ! Pre-multiply the source terms with the mass matrix. if (NEKO_BCKND_DEVICE .eq. 1) then call device_opcolv(f_x%x_d, f_y%x_d, f_z%x_d, c_Xh%B_d, msh%gdim, n) @@ -482,7 +575,7 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) ! At this point the RHS contains the sum of the advection operator and ! additional source terms, evaluated using the velocity field from the ! previous time-step. Now, this value is used in the explicit time - ! scheme to advance both terms in time. + ! scheme to advance both terms in time. call makeabf%compute_fluid(this%abx1, this%aby1, this%abz1,& this%abx2, this%aby2, this%abz2, & f_x%x, f_y%x, f_z%x, & @@ -497,49 +590,47 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) call vlag%update() call wlag%update() - !> We assume that no change of boundary conditions + !> We assume that no change of boundary conditions !! occurs between elements. I.e. we do not apply gsop here like in Nek5000 !> Apply dirichlet + call this%dirichlet_update_(this%field_dirichlet_fields, & + this%field_dirichlet_bcs, this%c_Xh, t, tstep, "fluid") + call this%bc_apply_vel(t, tstep) call this%bc_apply_prs(t, tstep) ! Compute pressure. - call profiler_start_region('Pressure residual') + call profiler_start_region('Pressure residual', 18) call prs_res%compute(p, p_res, u, v, w, u_e, v_e, w_e, & f_x, f_y, f_z, c_Xh, gs_Xh, this%bc_prs_surface, & this%bc_sym_surface, Ax, ext_bdf%diffusion_coeffs(1), & dt, mu, rho) - - call gs_Xh%op(p_res, GS_OP_ADD) + + call gs_Xh%op(p_res, GS_OP_ADD) call bc_list_apply_scalar(this%bclst_dp, p_res%x, p%dof%size(), t, tstep) call profiler_end_region - if( tstep .gt. 5 .and. pr_projection_dim .gt. 0) then - call this%proj_prs%project_on(p_res%x, c_Xh, n) - call this%proj_prs%log_info('Pressure') - end if - + call this%proj_prs%pre_solving(p_res%x, tstep, c_Xh, n, dt_controller, 'Pressure') + call this%pc_prs%update() - call profiler_start_region('Pressure solve') + call profiler_start_region('Pressure solve', 3) ksp_results(1) = & - this%ksp_prs%solve(Ax, dp, p_res%x, n, c_Xh, this%bclst_dp, gs_Xh, & - ksp_pr_maxiter) + this%ksp_prs%solve(Ax, dp, p_res%x, n, c_Xh, this%bclst_dp, gs_Xh) + call profiler_end_region - if( tstep .gt. 5 .and. pr_projection_dim .gt. 0) then - call this%proj_prs%project_back(dp%x, Ax, c_Xh, & - this%bclst_dp, gs_Xh, n) - end if + call this%proj_prs%post_solving(dp%x, Ax, c_Xh, & + this%bclst_dp, gs_Xh, n, tstep, dt_controller) if (NEKO_BCKND_DEVICE .eq. 1) then call device_add2(p%x_d, dp%x_d,n) else call add2(p%x, dp%x,n) end if - + ! Compute velocity. - call profiler_start_region('Velocity residual') + call profiler_start_region('Velocity residual', 19) call vel_res%compute(Ax, u, v, w, & u_res, v_res, w_res, & p, & @@ -547,43 +638,50 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) c_Xh, msh, Xh, & mu, rho, ext_bdf%diffusion_coeffs(1), & dt, dm_Xh%size()) - - call gs_Xh%op(u_res, GS_OP_ADD) - call gs_Xh%op(v_res, GS_OP_ADD) - call gs_Xh%op(w_res, GS_OP_ADD) + + call gs_Xh%op(u_res, GS_OP_ADD) + call gs_Xh%op(v_res, GS_OP_ADD) + call gs_Xh%op(w_res, GS_OP_ADD) call bc_list_apply_vector(this%bclst_vel_res,& u_res%x, v_res%x, w_res%x, dm_Xh%size(),& t, tstep) - - call profiler_end_region - - if (tstep .gt. 5 .and. vel_projection_dim .gt. 0) then - call this%proj_u%project_on(u_res%x, c_Xh, n) - call this%proj_v%project_on(v_res%x, c_Xh, n) - call this%proj_w%project_on(w_res%x, c_Xh, n) + + !We should implement a bc that takes three field_bcs and implements vector_apply + if (NEKO_BCKND_DEVICE .eq. 1) then + call this%bc_field_dirichlet_u%apply_scalar_dev(u_res%x_d, t, tstep) + call this%bc_field_dirichlet_v%apply_scalar_dev(v_res%x_d, t, tstep) + call this%bc_field_dirichlet_w%apply_scalar_dev(w_res%x_d, t, tstep) + else + call this%bc_field_dirichlet_u%apply_scalar(u_res%x, this%dm_Xh%size(), t, tstep) + call this%bc_field_dirichlet_v%apply_scalar(v_res%x, this%dm_Xh%size(), t, tstep) + call this%bc_field_dirichlet_w%apply_scalar(w_res%x, this%dm_Xh%size(), t, tstep) end if + call profiler_end_region + + call this%proj_u%pre_solving(u_res%x, tstep, c_Xh, n, dt_controller) + call this%proj_v%pre_solving(v_res%x, tstep, c_Xh, n, dt_controller) + call this%proj_w%pre_solving(w_res%x, tstep, c_Xh, n, dt_controller) + call this%pc_vel%update() - call profiler_start_region("Velocity solve") + call profiler_start_region("Velocity solve", 4) ksp_results(2) = this%ksp_vel%solve(Ax, du, u_res%x, n, & - c_Xh, this%bclst_du, gs_Xh, ksp_vel_maxiter) + c_Xh, this%bclst_du, gs_Xh) ksp_results(3) = this%ksp_vel%solve(Ax, dv, v_res%x, n, & - c_Xh, this%bclst_dv, gs_Xh, ksp_vel_maxiter) + c_Xh, this%bclst_dv, gs_Xh) ksp_results(4) = this%ksp_vel%solve(Ax, dw, w_res%x, n, & - c_Xh, this%bclst_dw, gs_Xh, ksp_vel_maxiter) + c_Xh, this%bclst_dw, gs_Xh) call profiler_end_region - if (tstep .gt. 5 .and. vel_projection_dim .gt. 0) then - call this%proj_u%project_back(du%x, Ax, c_Xh, & - this%bclst_du, gs_Xh, n) - call this%proj_v%project_back(dv%x, Ax, c_Xh, & - this%bclst_dv, gs_Xh, n) - call this%proj_w%project_back(dw%x, Ax, c_Xh, & - this%bclst_dw, gs_Xh, n) - end if - + call this%proj_u%post_solving(du%x, Ax, c_Xh, & + this%bclst_du, gs_Xh, n, tstep, dt_controller) + call this%proj_v%post_solving(dv%x, Ax, c_Xh, & + this%bclst_dv, gs_Xh, n, tstep, dt_controller) + call this%proj_w%post_solving(dw%x, Ax, c_Xh, & + this%bclst_dw, gs_Xh, n, tstep, dt_controller) + if (NEKO_BCKND_DEVICE .eq. 1) then call device_opadd2cm(u%x_d, v%x_d, w%x_d, & du%x_d, dv%x_d, dw%x_d, 1.0_rp, n, msh%gdim) @@ -596,16 +694,18 @@ subroutine fluid_pnpn_step(this, t, tstep, dt, ext_bdf) c_Xh, gs_Xh, ext_bdf, rho, mu,& dt, this%bclst_dp, this%bclst_du, this%bclst_dv, & this%bclst_dw, this%bclst_vel_res, Ax, this%ksp_prs, & - this%ksp_vel, this%pc_prs, this%pc_vel, ksp_pr_maxiter, & - ksp_vel_maxiter) + this%ksp_vel, this%pc_prs, this%pc_vel, this%ksp_prs%max_iter, & + this%ksp_vel%max_iter) end if - + call fluid_step_info(tstep, t, dt, ksp_results) - + call this%scratch%relinquish_field(temp_indices) - + end associate call profiler_end_region end subroutine fluid_pnpn_step - + + + end module fluid_pnpn diff --git a/src/fluid/fluid_scheme.f90 b/src/fluid/fluid_scheme.f90 index 9a098237b2a..77bfaa50360 100644 --- a/src/fluid/fluid_scheme.f90 +++ b/src/fluid/fluid_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2022, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ module fluid_scheme use checkpoint, only : chkp_t use mean_flow, only : mean_flow_t use num_types + use comm use fluid_user_source_term, only: fluid_user_source_term_t use fluid_source_term, only: fluid_source_term_t use field_list, only : field_list_t @@ -54,6 +55,8 @@ module fluid_scheme use dong_outflow, only : dong_outflow_t use symmetry, only : symmetry_t use non_normal, only : non_normal_t + use field_dirichlet, only : field_dirichlet_t, field_dirichlet_update + use field_dirichlet_vector, only: field_dirichlet_vector_t use krylov_fctry use precon_fctry use fluid_stats, only : fluid_stats_t @@ -68,20 +71,20 @@ module fluid_scheme use json_utils, only : json_get, json_get_or_default use json_module, only : json_file, json_core, json_value use scratch_registry, only : scratch_registry_t - use source_term, only : source_term_wrapper_t - use source_term_fctry, only : source_term_factory - use const_source_term, only : const_source_term_t use user_intf, only : user_t use utils, only : neko_warning, neko_error use material_properties, only : material_properties_t + use field_series + use time_step_controller implicit none - + !> Base type of all fluid formulations type, abstract :: fluid_scheme_t type(field_t), pointer :: u => null() !< x-component of Velocity type(field_t), pointer :: v => null() !< y-component of Velocity type(field_t), pointer :: w => null() !< z-component of Velocity type(field_t), pointer :: p => null() !< Pressure + type(field_series_t) :: ulag, vlag, wlag !< fluid field (lag) type(space_t) :: Xh !< Function space \f$ X_h \f$ type(dofmap_t) :: dm_Xh !< Dofmap associated with \f$ X_h \f$ type(gs_t) :: gs_Xh !< Gather-scatter associated with \f$ X_h \f$ @@ -98,19 +101,28 @@ module fluid_scheme class(ksp_t), allocatable :: ksp_prs !< Krylov solver for pressure class(pc_t), allocatable :: pc_vel !< Velocity Preconditioner class(pc_t), allocatable :: pc_prs !< Velocity Preconditioner - integer :: ksp_vel_maxiter !< Max iterations in ksp_vel - integer :: ksp_pr_maxiter !< Max iterattions in ksp_pr integer :: vel_projection_dim !< Size of the projection space for ksp_vel integer :: pr_projection_dim !< Size of the projection space for ksp_pr + integer :: vel_projection_activ_step !< Steps to activate projection for ksp_vel + integer :: pr_projection_activ_step !< Steps to activate projection for ksp_pr type(no_slip_wall_t) :: bc_wall !< No-slip wall for velocity class(inflow_t), allocatable :: bc_inflow !< Dirichlet inflow for velocity + + ! Attributes for field dirichlet BCs + type(field_dirichlet_vector_t) :: bc_field_vel !< Field Dirichlet velocity condition + type(field_dirichlet_t) :: bc_field_prs !< Field Dirichlet pressure condition + procedure(field_dirichlet_update), nopass, pointer :: dirichlet_update_ & + => null() !< Pointer to user_dirichlet_update to be called in fluid_scheme_step + type(bc_list_t) :: field_dirichlet_bcs !< List of BC objects to pass to user_dirichlet_update + type(field_list_t) :: field_dirichlet_fields !< List of fields to pass to user_dirichlet_update + type(dirichlet_t) :: bc_prs !< Dirichlet pressure condition type(dong_outflow_t) :: bc_dong !< Dong outflow condition type(symmetry_t) :: bc_sym !< Symmetry plane for velocity type(bc_list_t) :: bclst_vel !< List of velocity conditions type(bc_list_t) :: bclst_prs !< List of pressure conditions - type(field_t) :: bdry !< Boundary markings - type(json_file), pointer :: params !< Parameters + type(field_t) :: bdry !< Boundary markings + type(json_file), pointer :: params !< Parameters type(mesh_t), pointer :: msh => null() !< Mesh type(chkp_t) :: chkp !< Checkpoint type(mean_flow_t) :: mean !< Mean flow field @@ -119,12 +131,12 @@ module fluid_scheme logical :: forced_flow_rate = .false. !< Is the flow rate forced? logical :: freeze = .false. !< Freeze velocity at initial condition? !> Dynamic viscosity - real(kind=rp), pointer :: mu + real(kind=rp), pointer :: mu => null() !> Density - real(kind=rp), pointer :: rho + real(kind=rp), pointer :: rho => null() type(scratch_registry_t) :: scratch !< Manager for temporary fields !> Boundary condition labels (if any) - character(len=20), allocatable :: bc_labels(:) + character(len=NEKO_MSH_MAX_ZLBL_LEN), allocatable :: bc_labels(:) contains procedure, pass(this) :: fluid_scheme_init_all procedure, pass(this) :: fluid_scheme_init_uvw @@ -155,7 +167,7 @@ subroutine fluid_scheme_init_intrf(this, msh, lx, params, user, & integer, intent(inout) :: lx type(json_file), target, intent(inout) :: params type(user_t), intent(in) :: user - type(material_properties_t), intent(inout) :: material_properties + type(material_properties_t), target, intent(inout) :: material_properties end subroutine fluid_scheme_init_intrf end interface @@ -166,18 +178,20 @@ subroutine fluid_scheme_free_intrf(this) class(fluid_scheme_t), intent(inout) :: this end subroutine fluid_scheme_free_intrf end interface - + !> Abstract interface to compute a time-step abstract interface - subroutine fluid_scheme_step_intrf(this, t, tstep, dt, ext_bdf) + subroutine fluid_scheme_step_intrf(this, t, tstep, dt, ext_bdf, dt_controller) import fluid_scheme_t import time_scheme_controller_t + import time_step_controller_t import rp - class(fluid_scheme_t), intent(inout) :: this + class(fluid_scheme_t), target, intent(inout) :: this real(kind=rp), intent(inout) :: t integer, intent(inout) :: tstep real(kind=rp), intent(in) :: dt type(time_scheme_controller_t), intent(inout) :: ext_bdf + type(time_step_controller_t), intent(in) :: dt_controller end subroutine fluid_scheme_step_intrf end interface @@ -192,7 +206,6 @@ subroutine fluid_scheme_restart_intrf(this, dtlag, tlag) end subroutine fluid_scheme_restart_intrf end interface - contains !> Initialize common data for the current scheme @@ -211,11 +224,11 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & real(kind=rp), allocatable :: real_vec(:) real(kind=rp) :: real_val logical :: logical_val - integer :: integer_val + integer :: integer_val, ierr character(len=:), allocatable :: string_val1, string_val2 ! A local pointer that is needed to make Intel happy - + call neko_log%section('Fluid') write(log_buf, '(A, A)') 'Type : ', trim(scheme) call neko_log%message(log_buf) @@ -270,12 +283,6 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & write(log_buf, '(A, L1)') 'Save bdry : ', logical_val call neko_log%message(log_buf) - call json_get_or_default(params, & - 'case.fluid.velocity_solver.max_iterations', & - this%ksp_vel_maxiter, 800) - call json_get_or_default(params, & - 'case.fluid.pressure_solver.max_iterations', & - this%ksp_pr_maxiter, 800) call json_get_or_default(params, & 'case.fluid.velocity_solver.projection_space_size',& @@ -283,12 +290,19 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call json_get_or_default(params, & 'case.fluid.pressure_solver.projection_space_size',& this%pr_projection_dim, 20) + call json_get_or_default(params, & + 'case.fluid.velocity_solver.projection_hold_steps',& + this%vel_projection_activ_step, 5) + call json_get_or_default(params, & + 'case.fluid.pressure_solver.projection_hold_steps',& + this%pr_projection_activ_step, 5) + call json_get_or_default(params, 'case.fluid.freeze', this%freeze, .false.) - if (params%valid_path("case.fluid.flow_rate_force")) then - this%forced_flow_rate = .true. - end if + if (params%valid_path("case.fluid.flow_rate_force")) then + this%forced_flow_rate = .true. + end if if (msh%gdim .eq. 2) then call this%Xh%init(GLL, lx, lx) @@ -306,12 +320,12 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call this%c_Xh%init(this%gs_Xh) - + this%scratch = scratch_registry_t(this%dm_Xh, 10, 2) allocate(this%bc_labels(NEKO_MSH_MAX_ZLBLS)) this%bc_labels = "not" - + ! ! Setup velocity boundary conditions ! @@ -320,19 +334,19 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & 'case.fluid.boundary_types', & this%bc_labels) end if - + call bc_list_init(this%bclst_vel) - call this%bc_sym%init(this%dm_Xh) + call this%bc_sym%init(this%c_Xh) call this%bc_sym%mark_zone(msh%sympln) call this%bc_sym%mark_zones_from_list(msh%labeled_zones,& 'sym', this%bc_labels) call this%bc_sym%finalize() - call this%bc_sym%init_msk(this%c_Xh) + call this%bc_sym%init_msk() call bc_list_add(this%bclst_vel, this%bc_sym) ! - ! Inflow + ! Inflow ! if (params%valid_path('case.fluid.inflow_condition')) then call json_get(params, 'case.fluid.inflow_condition.type', string_val1) @@ -346,7 +360,7 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call neko_error('Invalid inflow condition '//string_val1) end if - call this%bc_inflow%init(this%dm_Xh) + call this%bc_inflow%init(this%c_Xh) call this%bc_inflow%mark_zone(msh%inlet) call this%bc_inflow%mark_zones_from_list(msh%labeled_zones,& 'v', this%bc_labels) @@ -354,8 +368,8 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call bc_list_add(this%bclst_vel, this%bc_inflow) if (trim(string_val1) .eq. "uniform") then - call json_get(params, 'case.fluid.inflow_condition.value', real_vec) - call this%bc_inflow%set_inflow(real_vec) + call json_get(params, 'case.fluid.inflow_condition.value', real_vec) + call this%bc_inflow%set_inflow(real_vec) else if (trim(string_val1) .eq. "blasius") then select type(bc_if => this%bc_inflow) type is(blasius_t) @@ -372,26 +386,97 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & end select else if (trim(string_val1) .eq. "user") then select type(bc_if => this%bc_inflow) - type is(usr_inflow_t) + type is(usr_inflow_t) call bc_if%set_coef(this%C_Xh) end select end if end if - - call this%bc_wall%init(this%dm_Xh) + + call this%bc_wall%init(this%c_Xh) call this%bc_wall%mark_zone(msh%wall) call this%bc_wall%mark_zones_from_list(msh%labeled_zones,& 'w', this%bc_labels) call this%bc_wall%finalize() call bc_list_add(this%bclst_vel, this%bc_wall) - + + ! Setup field dirichlet bc for u-velocity + call this%bc_field_vel%field_dirichlet_u%init(this%c_Xh) + call this%bc_field_vel%field_dirichlet_u%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_u', this%bc_labels) + call this%bc_field_vel%field_dirichlet_u%finalize() + + call MPI_Allreduce(this%bc_field_vel%field_dirichlet_u%msk(0), integer_val, 1, & + MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) + if (integer_val .gt. 0) call this%bc_field_vel%field_dirichlet_u%init_field('d_vel_u') + + ! Setup field dirichlet bc for v-velocity + call this%bc_field_vel%field_dirichlet_v%init(this%c_Xh) + call this%bc_field_vel%field_dirichlet_v%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_v', this%bc_labels) + call this%bc_field_vel%field_dirichlet_v%finalize() + + call MPI_Allreduce(this%bc_field_vel%field_dirichlet_v%msk(0), integer_val, 1, & + MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) + if (integer_val .gt. 0) call this%bc_field_vel%field_dirichlet_v%init_field('d_vel_v') + + ! Setup field dirichlet bc for w-velocity + call this%bc_field_vel%field_dirichlet_w%init(this%c_Xh) + call this%bc_field_vel%field_dirichlet_w%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_w', this%bc_labels) + call this%bc_field_vel%field_dirichlet_w%finalize() + + call MPI_Allreduce(this%bc_field_vel%field_dirichlet_w%msk(0), integer_val, 1, & + MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) + if (integer_val .gt. 0) call this%bc_field_vel%field_dirichlet_w%init_field('d_vel_w') + + ! Setup our global field dirichlet bc + call this%bc_field_vel%init(this%c_Xh) + call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_u', this%bc_labels) + call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_v', this%bc_labels) + call this%bc_field_vel%mark_zones_from_list(msh%labeled_zones,& + 'd_vel_w', this%bc_labels) + call this%bc_field_vel%finalize() + + ! Add the field bc to velocity bcs + call bc_list_add(this%bclst_vel, this%bc_field_vel) + + ! + ! Associate our field dirichlet update to the user one. + ! + this%dirichlet_update_ => user%user_dirichlet_update + + ! + ! Initialize field list and bc list for user_dirichlet_update + ! + allocate(this%field_dirichlet_fields%fields(4)) + + this%field_dirichlet_fields%fields(1)%f => & + this%bc_field_vel%field_dirichlet_u%field_bc + this%field_dirichlet_fields%fields(2)%f => & + this%bc_field_vel%field_dirichlet_v%field_bc + this%field_dirichlet_fields%fields(3)%f => & + this%bc_field_vel%field_dirichlet_w%field_bc + this%field_dirichlet_fields%fields(4)%f => & + this%bc_field_prs%field_bc + + call bc_list_init(this%field_dirichlet_bcs, size=4) + call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_u) + call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_v) + call bc_list_add(this%field_dirichlet_bcs, this%bc_field_vel%field_dirichlet_w) + + ! + ! Check if we need to output boundaries + ! call json_get_or_default(params, 'case.output_boundary', logical_val,& .false.) + if (logical_val) then call this%bdry%init(this%dm_Xh, 'bdry') this%bdry = 0.0_rp - - call bdry_mask%init(this%dm_Xh) + + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%wall) call bdry_mask%mark_zones_from_list(msh%labeled_zones,& 'w', this%bc_labels) @@ -400,17 +485,16 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size()) call bdry_mask%free() - call bdry_mask%init(this%dm_Xh) + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%inlet) call bdry_mask%mark_zones_from_list(msh%labeled_zones,& 'v', this%bc_labels) - call bdry_mask%finalize() call bdry_mask%set_g(2.0_rp) call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size()) call bdry_mask%free() - call bdry_mask%init(this%dm_Xh) + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%outlet) call bdry_mask%mark_zones_from_list(msh%labeled_zones,& 'o', this%bc_labels) @@ -419,7 +503,7 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size()) call bdry_mask%free() - call bdry_mask%init(this%dm_Xh) + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%sympln) call bdry_mask%mark_zones_from_list(msh%labeled_zones,& 'sym', this%bc_labels) @@ -428,14 +512,14 @@ subroutine fluid_scheme_init_common(this, msh, lx, params, scheme, user, & call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size()) call bdry_mask%free() - call bdry_mask%init(this%dm_Xh) + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%periodic) call bdry_mask%finalize() call bdry_mask%set_g(5.0_rp) call bdry_mask%apply_scalar(this%bdry%x, this%dm_Xh%size()) call bdry_mask%free() - call bdry_mask%init(this%dm_Xh) + call bdry_mask%init(this%c_Xh) call bdry_mask%mark_zone(msh%outlet_normal) call bdry_mask%mark_zones_from_list(msh%labeled_zones,& 'on', this%bc_labels) @@ -475,14 +559,14 @@ subroutine fluid_scheme_init_uvw(this, msh, lx, params, kspv_init, scheme, & logical :: kspv_init character(len=*), intent(in) :: scheme ! Variables for extracting json - logical :: found, logical_val real(kind=rp) :: abs_tol character(len=:), allocatable :: solver_type, precon_type + integer :: ksp_vel_maxiter call fluid_scheme_init_common(this, msh, lx, params, scheme, user, & material_properties) - + call neko_field_registry%add_field(this%dm_Xh, 'u') call neko_field_registry%add_field(this%dm_Xh, 'v') call neko_field_registry%add_field(this%dm_Xh, 'w') @@ -497,8 +581,11 @@ subroutine fluid_scheme_init_uvw(this, msh, lx, params, kspv_init, scheme, & abs_tol) if (kspv_init) then + call json_get_or_default(params, & + 'case.fluid.velocity_solver.max_iterations', & + ksp_vel_maxiter, 800) call fluid_scheme_solver_factory(this%ksp_vel, this%dm_Xh%size(), & - solver_type, abs_tol) + solver_type, ksp_vel_maxiter, abs_tol) call fluid_scheme_precon_factory(this%pc_vel, this%ksp_vel, & this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_vel, precon_type) end if @@ -521,7 +608,7 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& character(len=*), intent(in) :: scheme real(kind=rp) :: real_val, dong_delta, dong_uchar real(kind=rp), allocatable :: real_vec(:) - integer :: integer_val + integer :: integer_val, ierr character(len=:), allocatable :: string_val1, string_val2 call fluid_scheme_init_common(this, msh, lx, params, scheme, user, & @@ -536,16 +623,34 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& this%w => neko_field_registry%get_field('w') this%p => neko_field_registry%get_field('p') + !! lag fields + call this%ulag%init(this%u, 2) + call this%vlag%init(this%v, 2) + call this%wlag%init(this%w, 2) + + ! ! Setup pressure boundary conditions ! call bc_list_init(this%bclst_prs) - call this%bc_prs%init(this%dm_Xh) + call this%bc_prs%init(this%c_Xh) call this%bc_prs%mark_zones_from_list(msh%labeled_zones,& 'o', this%bc_labels) call this%bc_prs%mark_zones_from_list(msh%labeled_zones,& 'on', this%bc_labels) + ! Field dirichlet pressure bc + call this%bc_field_prs%init(this%c_Xh) + call this%bc_field_prs%mark_zones_from_list(msh%labeled_zones,& + 'd_pres', this%bc_labels) + call this%bc_field_prs%finalize() + call MPI_Allreduce(this%bc_field_prs%msk(0), integer_val, 1, & + MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) + + if (integer_val .gt. 0) call this%bc_field_prs%init_field('d_pres') + call bc_list_add(this%bclst_prs, this%bc_field_prs) + call bc_list_add(this%field_dirichlet_bcs, this%bc_field_prs) + if (msh%outlet%size .gt. 0) then call this%bc_prs%mark_zone(msh%outlet) end if @@ -556,7 +661,7 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& call this%bc_prs%finalize() call this%bc_prs%set_g(0.0_rp) call bc_list_add(this%bclst_prs, this%bc_prs) - call this%bc_dong%init(this%dm_Xh) + call this%bc_dong%init(this%c_Xh) call this%bc_dong%mark_zones_from_list(msh%labeled_zones,& 'o+dong', this%bc_labels) call this%bc_dong%mark_zones_from_list(msh%labeled_zones,& @@ -568,13 +673,16 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& call json_get_or_default(params, 'case.fluid.outflow_condition.velocity_scale',& dong_uchar, 1.0_rp) - call this%bc_dong%set_vars(this%c_Xh, this%u, this%v, this%w,& - dong_uchar, dong_delta) + + call this%bc_dong%set_vars(dong_uchar, dong_delta) call bc_list_add(this%bclst_prs, this%bc_dong) if (kspv_init) then + call json_get_or_default(params, & + 'case.fluid.velocity_solver.max_iterations', & + integer_val, 800) call json_get(params, 'case.fluid.velocity_solver.type', string_val1) call json_get(params, 'case.fluid.velocity_solver.preconditioner', & string_val2) @@ -582,12 +690,15 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& real_val) call fluid_scheme_solver_factory(this%ksp_vel, this%dm_Xh%size(), & - string_val1, real_val) + string_val1, integer_val, real_val) call fluid_scheme_precon_factory(this%pc_vel, this%ksp_vel, & this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_vel, string_val2) end if if (kspp_init) then + call json_get_or_default(params, & + 'case.fluid.pressure_solver.max_iterations', & + integer_val, 800) call json_get(params, 'case.fluid.pressure_solver.type', string_val1) call json_get(params, 'case.fluid.pressure_solver.preconditioner', & string_val2) @@ -595,20 +706,19 @@ subroutine fluid_scheme_init_all(this, msh, lx, params, kspv_init, kspp_init,& real_val) call fluid_scheme_solver_factory(this%ksp_prs, this%dm_Xh%size(), & - string_val1, real_val) + string_val1, integer_val, real_val) call fluid_scheme_precon_factory(this%pc_prs, this%ksp_prs, & this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_prs, string_val2) end if call neko_log%end_section() - + end subroutine fluid_scheme_init_all !> Deallocate a fluid formulation subroutine fluid_scheme_free(this) class(fluid_scheme_t), intent(inout) :: this - integer :: i call this%bdry%free() @@ -619,6 +729,22 @@ subroutine fluid_scheme_free(this) call this%bc_wall%free() call this%bc_sym%free() + ! + ! Free everything related to field_dirichlet BCs + ! + call this%bc_field_prs%field_bc%free() + call this%bc_field_prs%free() + call this%bc_field_vel%field_dirichlet_u%field_bc%free() + call this%bc_field_vel%field_dirichlet_v%field_bc%free() + call this%bc_field_vel%field_dirichlet_w%field_bc%free() + call this%bc_field_vel%free() + + call this%field_dirichlet_fields%free() + call bc_list_free(this%field_dirichlet_bcs) + if (associated(this%dirichlet_update_)) then + this%dirichlet_update_ => null() + end if + call this%Xh%free() if (allocated(this%ksp_vel)) then @@ -652,7 +778,7 @@ subroutine fluid_scheme_free(this) call this%c_Xh%free() call bc_list_free(this%bclst_vel) - + call this%scratch%free() nullify(this%params) @@ -662,23 +788,28 @@ subroutine fluid_scheme_free(this) nullify(this%w) nullify(this%p) + call this%ulag%free() + call this%vlag%free() + call this%wlag%free() + + if (associated(this%f_x)) then - call this%f_x%free() + call this%f_x%free() end if if (associated(this%f_y)) then - call this%f_y%free() + call this%f_y%free() end if if (associated(this%f_z)) then - call this%f_z%free() + call this%f_z%free() end if nullify(this%f_x) nullify(this%f_y) nullify(this%f_z) - - + + end subroutine fluid_scheme_free !> Validate that all fields, solvers etc necessary for @@ -686,7 +817,7 @@ end subroutine fluid_scheme_free subroutine fluid_scheme_validate(this) class(fluid_scheme_t), target, intent(inout) :: this ! Variables for retrieving json parameters - logical :: found, logical_val + logical :: logical_val if ( (.not. associated(this%u)) .or. & (.not. associated(this%v)) .or. & @@ -694,7 +825,7 @@ subroutine fluid_scheme_validate(this) (.not. associated(this%p))) then call neko_error('Fields are not registered') end if - + if ( (.not. allocated(this%u%x)) .or. & (.not. allocated(this%v%x)) .or. & (.not. allocated(this%w%x)) .or. & @@ -705,7 +836,7 @@ subroutine fluid_scheme_validate(this) if (.not. allocated(this%ksp_vel)) then call neko_error('No Krylov solver for velocity defined') end if - + if (.not. allocated(this%ksp_prs)) then call neko_error('No Krylov solver for pressure defined') end if @@ -714,10 +845,12 @@ subroutine fluid_scheme_validate(this) call neko_error('No parameters defined') end if - select type(ip => this%bc_inflow) - type is(usr_inflow_t) - call ip%validate - end select + if (allocated(this%bc_inflow)) then + select type(ip => this%bc_inflow) + type is(usr_inflow_t) + call ip%validate + end select + end if ! ! Setup checkpoint structure (if everything is fine) @@ -744,35 +877,42 @@ subroutine fluid_scheme_validate(this) end subroutine fluid_scheme_validate !> Apply all boundary conditions defined for velocity + !! Here we perform additional gs operations to take care of + !! shared points between elements that have different BCs, as done in Nek5000. !! @todo Why can't we call the interface here? subroutine fluid_scheme_bc_apply_vel(this, t, tstep) class(fluid_scheme_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep + call bc_list_apply_vector(this%bclst_vel,& this%u%x, this%v%x, this%w%x, this%dm_Xh%size(), t, tstep) + end subroutine fluid_scheme_bc_apply_vel - + !> Apply all boundary conditions defined for pressure !! @todo Why can't we call the interface here? subroutine fluid_scheme_bc_apply_prs(this, t, tstep) class(fluid_scheme_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep + call bc_list_apply_scalar(this%bclst_prs, this%p%x, & this%p%dof%size(), t, tstep) + end subroutine fluid_scheme_bc_apply_prs - + !> Initialize a linear solver !! @note Currently only supporting Krylov solvers - subroutine fluid_scheme_solver_factory(ksp, n, solver, abstol) + subroutine fluid_scheme_solver_factory(ksp, n, solver, max_iter, abstol) class(ksp_t), allocatable, target, intent(inout) :: ksp integer, intent(in), value :: n character(len=*), intent(in) :: solver + integer, intent(in) :: max_iter real(kind=rp), intent(in) :: abstol - - call krylov_solver_factory(ksp, n, solver, abstol) - + + call krylov_solver_factory(ksp, n, solver, max_iter, abstol) + end subroutine fluid_scheme_solver_factory !> Initialize a Krylov preconditioner @@ -784,9 +924,9 @@ subroutine fluid_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype) type(gs_t), target, intent(inout) :: gs type(bc_list_t), target, intent(inout) :: bclst character(len=*) :: pctype - + call precon_factory(pc, pctype) - + select type(pcp => pc) type is(jacobi_t) call pcp%init(coef, dof, gs) @@ -808,7 +948,7 @@ subroutine fluid_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype) end select call ksp%set_pc(pc) - + end subroutine fluid_scheme_precon_factory !> Initialize a user defined inflow condition @@ -818,9 +958,9 @@ subroutine fluid_scheme_set_usr_inflow(this, usr_eval) select type(bc_if => this%bc_inflow) type is(usr_inflow_t) - call bc_if%set_eval(usr_eval) + call bc_if%set_eval(usr_eval) class default - call neko_error("Not a user defined inflow condition") + call neko_error("Not a user defined inflow condition") end select end subroutine fluid_scheme_set_usr_inflow @@ -832,7 +972,7 @@ function fluid_compute_cfl(this, dt) result(c) c = cfl(dt, this%u%x, this%v%x, this%w%x, & this%Xh, this%c_Xh, this%msh%nelv, this%msh%gdim) - + end function fluid_compute_cfl - + end module fluid_scheme diff --git a/src/fluid/fluid_source_term.f90 b/src/fluid/fluid_source_term.f90 index 69a86da6525..52bfe345503 100644 --- a/src/fluid/fluid_source_term.f90 +++ b/src/fluid/fluid_source_term.f90 @@ -43,7 +43,8 @@ module fluid_source_term use json_utils, only : json_get use json_module, only : json_file, json_core, json_value use coefs, only : coef_t - use user_intf, only : user_t + use user_intf, only : user_t + use utils, only : neko_warning implicit none private @@ -65,7 +66,7 @@ module fluid_source_term procedure, pass(this) :: init => fluid_source_term_init !> Destructor. procedure, pass(this) :: free => fluid_source_term_free - !> Add all the source term to the passed right-hand side fields. + !> Add all the source terms to the passed right-hand side fields. procedure, pass(this) :: compute => fluid_source_term_compute !> Initialize the user source term. procedure, nopass, private :: init_user_source @@ -86,7 +87,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user) ! Json low-level manipulator. type(json_core) :: core ! Pointer to the source_terms JSON object and the individual sources. - type(json_value), pointer :: source_object, source_pointer + type(json_value), pointer :: source_object, source_pointer ! Buffer for serializing the json. character(len=:), allocatable :: buffer ! A single source term as its own json_file. @@ -106,7 +107,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user) if (json%valid_path('case.fluid.source_terms')) then - ! We package the fields for the source term to operate on in a field list. + ! We package the fields for the source term to operate on in a field list. allocate(rhs_fields%fields(3)) rhs_fields%fields(1)%f => f_x rhs_fields%fields(2)%f => f_y @@ -120,7 +121,7 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user) do i=1, n_sources - ! Create a new json containing just the subdict for this source. + ! Create a new json containing just the subdict for this source. call core%get_child(source_object, i, source_pointer, found) call core%print_to_string(source_pointer, buffer) call source_subdict%load_from_string(buffer) @@ -129,24 +130,30 @@ subroutine fluid_source_term_init(this, json, f_x, f_y, f_z, coef, user) ! The user source is treated separately if ((trim(type) .eq. "user_vector") .or. & (trim(type) .eq. "user_pointwise")) then - - call init_user_source(this%source_terms(i)%source_term, & + + if (source_subdict%valid_path("start_time") .or. & + source_subdict%valid_path("end_time")) then + call neko_warning("The start_time and end_time parameters have& + & no effect on the fluid user source term") + end if + + call init_user_source(this%source_terms(i)%source_term, & rhs_fields, coef, type, user) - else - - call source_term_factory(this%source_terms(i)%source_term, & + else + + call source_term_factory(this%source_terms(i)%source_term, & source_subdict, rhs_fields, coef) end if - end do + end do end if - + end subroutine fluid_source_term_init !> Initialize the user source term. !! @param source_term The allocatable source term to be initialized to a user. !! @param rhs_fields The field list with the 3 right-hand-side components. !! @param coef The SEM coefs. - !! @param type The type of the user source term, "user_vector" or + !! @param type The type of the user source term, "user_vector" or !! "user_poinwise". !! @param user The user type containing the user source term routines. subroutine init_user_source(source_term, rhs_fields, coef, type, user) @@ -160,11 +167,11 @@ subroutine init_user_source(source_term, rhs_fields, coef, type, user) select type (source_term) type is (fluid_user_source_term_t) - call source_term%init_from_components(rhs_fields, coef, type, & + call source_term%init_from_components(rhs_fields, coef, type, & user%fluid_user_f_vector, & user%fluid_user_f) end select - end subroutine + end subroutine init_user_source !> Destructor. subroutine fluid_source_term_free(this) diff --git a/src/fluid/fluid_stats.f90 b/src/fluid/fluid_stats.f90 index e2a6434dec2..a80f88073b3 100644 --- a/src/fluid/fluid_stats.f90 +++ b/src/fluid/fluid_stats.f90 @@ -156,7 +156,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf) type(coef_t), target, optional :: coef type(mean_field_t), target, intent(inout) :: u_mf, v_mf, w_mf, p_mf this%coef => coef - + this%u_mean => u_mf%mf this%v_mean => v_mf%mf this%w_mean => w_mf%mf @@ -171,7 +171,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf) call this%stats_v%init(this%u%dof, 'v temp') call this%stats_w%init(this%u%dof, 'w temp') call this%stats_p%init(this%u%dof, 'p temp') - + call this%dudx%init(this%u%dof, 'dudx') call this%dudy%init(this%u%dof, 'dudy') call this%dudz%init(this%u%dof, 'dudz') @@ -181,7 +181,7 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf) call this%dwdx%init(this%u%dof, 'dwdx') call this%dwdy%init(this%u%dof, 'dwdy') call this%dwdz%init(this%u%dof, 'dwdz') - + call this%uu%init(this%stats_u, 'uu') call this%vv%init(this%stats_v, 'vv') call this%ww%init(this%stats_w, 'ww') @@ -205,11 +205,11 @@ subroutine fluid_stats_init(this, coef, u_mf,v_mf,w_mf,p_mf) call this%pp%init(this%stats_p, 'pp') call this%ppp%init(this%stats_work, 'ppp') call this%pppp%init(this%stats_work, 'pppp') - !> Pressure * velocity + !> Pressure * velocity call this%pu%init(this%stats_work, 'pu') call this%pv%init(this%stats_work, 'pv') call this%pw%init(this%stats_work, 'pw') - + call this%pdudx%init(this%stats_work, 'pdudx') call this%pdudy%init(this%stats_work, 'pdudy') call this%pdudz%init(this%stats_work, 'pdudz') @@ -284,223 +284,223 @@ subroutine fluid_stats_update(this, k) associate(stats_work => this%stats_work, stats_u => this%stats_u,& stats_v => this%stats_v, stats_w => this%stats_w, stats_p => this%stats_p) - n = stats_work%dof%size() - - !> U%f is u and U%mf is - if (NEKO_BCKND_DEVICE .eq. 1) then - - call device_col3(stats_u%x_d,this%u%x_d, this%u%x_d,n) - call device_col3(stats_v%x_d,this%v%x_d, this%v%x_d,n) - call device_col3(stats_w%x_d,this%w%x_d, this%w%x_d,n) - call device_col3(stats_p%x_d,this%p%x_d, this%p%x_d,n) - - call this%uu%update(k) - call this%vv%update(k) - call this%ww%update(k) - call this%pp%update(k) - - call device_col3(stats_work%x_d,this%u%x_d, this%v%x_d,n) - call this%uv%update(k) - call device_col3(stats_work%x_d,this%u%x_d, this%w%x_d,n) - call this%uw%update(k) - call device_col3(stats_work%x_d,this%v%x_d, this%w%x_d,n) - call this%vw%update(k) - - call device_col2(stats_work%x_d, this%u%x_d,n) - call this%uvw%update(k) - call device_col3(stats_work%x_d,this%stats_u%x_d, this%u%x_d,n) - call this%uuu%update(k) - call device_col3(stats_work%x_d,this%stats_v%x_d, this%v%x_d,n) - call this%vvv%update(k) - call device_col3(stats_work%x_d,this%stats_w%x_d, this%w%x_d,n) - call this%www%update(k) - call device_col3(stats_work%x_d,this%stats_u%x_d, this%v%x_d,n) - call this%uuv%update(k) - call device_col3(stats_work%x_d,this%stats_u%x_d, this%w%x_d,n) - call this%uuw%update(k) - call device_col3(stats_work%x_d,this%stats_v%x_d, this%u%x_d,n) - call this%uvv%update(k) - call device_col3(stats_work%x_d,this%stats_v%x_d, this%w%x_d,n) - call this%vvw%update(k) - call device_col3(stats_work%x_d,this%stats_w%x_d, this%u%x_d,n) - call this%uww%update(k) - call device_col3(stats_work%x_d,this%stats_w%x_d, this%v%x_d,n) - call this%vww%update(k) - - call device_col3(stats_work%x_d,this%stats_u%x_d, this%stats_u%x_d,n) - call this%uuuu%update(k) - call device_col3(stats_work%x_d,this%stats_v%x_d, this%stats_v%x_d,n) - call this%vvvv%update(k) - call device_col3(stats_work%x_d,this%stats_w%x_d, this%stats_w%x_d,n) - call this%wwww%update(k) - - call device_col3(stats_work%x_d,this%stats_p%x_d, this%p%x_d,n) - call this%ppp%update(k) - call device_col3(stats_work%x_d,this%stats_p%x_d, this%stats_p%x_d,n) - call this%pppp%update(k) - - call device_col3(stats_work%x_d,this%p%x_d, this%u%x_d,n) - call this%pu%update(k) - call device_col3(stats_work%x_d,this%p%x_d, this%v%x_d,n) - call this%pv%update(k) - call device_col3(stats_work%x_d,this%p%x_d, this%w%x_d,n) - call this%pw%update(k) - - else - - call col3(stats_u%x,this%u%x, this%u%x,n) - call col3(stats_v%x,this%v%x, this%v%x,n) - call col3(stats_w%x,this%w%x, this%w%x,n) - call col3(stats_p%x,this%p%x, this%p%x,n) - - call this%uu%update(k) - call this%vv%update(k) - call this%ww%update(k) - call this%pp%update(k) - - call col3(stats_work%x,this%u%x, this%v%x,n) - call this%uv%update(k) - call col3(stats_work%x,this%u%x, this%w%x,n) - call this%uw%update(k) - call col3(stats_work%x,this%v%x, this%w%x,n) - call this%vw%update(k) - - call col2(stats_work%x, this%u%x,n) - call this%uvw%update(k) - call col3(stats_work%x,this%stats_u%x, this%u%x,n) - call this%uuu%update(k) - call col3(stats_work%x,this%stats_v%x, this%v%x,n) - call this%vvv%update(k) - call col3(stats_work%x,this%stats_w%x, this%w%x,n) - call this%www%update(k) - call col3(stats_work%x,this%stats_u%x, this%v%x,n) - call this%uuv%update(k) - call col3(stats_work%x,this%stats_u%x, this%w%x,n) - call this%uuw%update(k) - call col3(stats_work%x,this%stats_v%x, this%u%x,n) - call this%uvv%update(k) - call col3(stats_work%x,this%stats_v%x, this%w%x,n) - call this%vvw%update(k) - call col3(stats_work%x,this%stats_w%x, this%u%x,n) - call this%uww%update(k) - call col3(stats_work%x,this%stats_w%x, this%v%x,n) - call this%vww%update(k) - - call col3(stats_work%x,this%stats_u%x, this%stats_u%x,n) - call this%uuuu%update(k) - call col3(stats_work%x,this%stats_v%x, this%stats_v%x,n) - call this%vvvv%update(k) - call col3(stats_work%x,this%stats_w%x, this%stats_w%x,n) - call this%wwww%update(k) - - call col3(stats_work%x,this%stats_p%x, this%p%x,n) - call this%ppp%update(k) - call col3(stats_work%x,this%stats_p%x, this%stats_p%x,n) - call this%pppp%update(k) - - call col3(stats_work%x,this%p%x, this%u%x,n) - call this%pu%update(k) - call col3(stats_work%x,this%p%x, this%v%x,n) - call this%pv%update(k) - call col3(stats_work%x,this%p%x, this%w%x,n) - call this%pw%update(k) - - - end if - call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u%x,this%coef) - call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v%x,this%coef) - call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w%x,this%coef) - - if (NEKO_BCKND_DEVICE .eq. 1) then - call device_col3(stats_work%x_d,this%dudx%x_d, this%p%x_d,n) - call this%pdudx%update(k) - call device_col3(stats_work%x_d,this%dudy%x_d, this%p%x_d,n) - call this%pdudy%update(k) - call device_col3(stats_work%x_d,this%dudz%x_d, this%p%x_d,n) - call this%pdudz%update(k) - - call device_col3(stats_work%x_d,this%dvdx%x_d, this%p%x_d,n) - call this%pdvdx%update(k) - call device_col3(stats_work%x_d,this%dvdy%x_d, this%p%x_d,n) - call this%pdvdy%update(k) - call device_col3(stats_work%x_d,this%dvdz%x_d, this%p%x_d,n) - call this%pdvdz%update(k) - - call device_col3(stats_work%x_d,this%dwdx%x_d, this%p%x_d,n) - call this%pdwdx%update(k) - call device_col3(stats_work%x_d,this%dwdy%x_d, this%p%x_d,n) - call this%pdwdy%update(k) - call device_col3(stats_work%x_d,this%dwdz%x_d, this%p%x_d,n) - call this%pdwdz%update(k) - - call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dudx%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dudy%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dudz%x_d,n) - call this%e11%update(k) - call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dvdx%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dvdy%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dvdz%x_d,n) - call this%e22%update(k) - call device_col3(this%stats_work%x_d,this%dwdx%x_d, this%dwdx%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dwdy%x_d, this%dwdy%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dwdz%x_d, this%dwdz%x_d,n) - call this%e33%update(k) - call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dvdx%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dvdy%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dvdz%x_d,n) - call this%e12%update(k) - call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dwdx%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dwdy%x_d,n) - call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dwdz%x_d,n) - call this%e23%update(k) - - - else - call col3(stats_work%x,this%dudx%x, this%p%x,n) - call this%pdudx%update(k) - call col3(stats_work%x,this%dudy%x, this%p%x,n) - call this%pdudy%update(k) - call col3(stats_work%x,this%dudz%x, this%p%x,n) - call this%pdudz%update(k) - - call col3(stats_work%x,this%dvdx%x, this%p%x,n) - call this%pdvdx%update(k) - call col3(stats_work%x,this%dvdy%x, this%p%x,n) - call this%pdvdy%update(k) - call col3(stats_work%x,this%dvdz%x, this%p%x,n) - call this%pdvdz%update(k) - - call col3(stats_work%x,this%dwdx%x, this%p%x,n) - call this%pdwdx%update(k) - call col3(stats_work%x,this%dwdy%x, this%p%x,n) - call this%pdwdy%update(k) - call col3(stats_work%x,this%dwdz%x, this%p%x,n) - call this%pdwdz%update(k) - - call col3(this%stats_work%x,this%dudx%x, this%dudx%x,n) - call addcol3(this%stats_work%x,this%dudy%x, this%dudy%x,n) - call addcol3(this%stats_work%x,this%dudz%x, this%dudz%x,n) - call this%e11%update(k) - call col3(this%stats_work%x,this%dvdx%x, this%dvdx%x,n) - call addcol3(this%stats_work%x,this%dvdy%x, this%dvdy%x,n) - call addcol3(this%stats_work%x,this%dvdz%x, this%dvdz%x,n) - call this%e22%update(k) - call col3(this%stats_work%x,this%dwdx%x, this%dwdx%x,n) - call addcol3(this%stats_work%x,this%dwdy%x, this%dwdy%x,n) - call addcol3(this%stats_work%x,this%dwdz%x, this%dwdz%x,n) - call this%e33%update(k) - call col3(this%stats_work%x,this%dudx%x, this%dvdx%x,n) - call addcol3(this%stats_work%x,this%dudy%x, this%dvdy%x,n) - call addcol3(this%stats_work%x,this%dudz%x, this%dvdz%x,n) - call this%e12%update(k) - call col3(this%stats_work%x,this%dvdx%x, this%dwdx%x,n) - call addcol3(this%stats_work%x,this%dvdy%x, this%dwdy%x,n) - call addcol3(this%stats_work%x,this%dvdz%x, this%dwdz%x,n) - call this%e23%update(k) - - end if - - end associate + n = stats_work%dof%size() + + !> U%f is u and U%mf is + if (NEKO_BCKND_DEVICE .eq. 1) then + + call device_col3(stats_u%x_d,this%u%x_d, this%u%x_d,n) + call device_col3(stats_v%x_d,this%v%x_d, this%v%x_d,n) + call device_col3(stats_w%x_d,this%w%x_d, this%w%x_d,n) + call device_col3(stats_p%x_d,this%p%x_d, this%p%x_d,n) + + call this%uu%update(k) + call this%vv%update(k) + call this%ww%update(k) + call this%pp%update(k) + + call device_col3(stats_work%x_d,this%u%x_d, this%v%x_d,n) + call this%uv%update(k) + call device_col3(stats_work%x_d,this%u%x_d, this%w%x_d,n) + call this%uw%update(k) + call device_col3(stats_work%x_d,this%v%x_d, this%w%x_d,n) + call this%vw%update(k) + + call device_col2(stats_work%x_d, this%u%x_d,n) + call this%uvw%update(k) + call device_col3(stats_work%x_d,this%stats_u%x_d, this%u%x_d,n) + call this%uuu%update(k) + call device_col3(stats_work%x_d,this%stats_v%x_d, this%v%x_d,n) + call this%vvv%update(k) + call device_col3(stats_work%x_d,this%stats_w%x_d, this%w%x_d,n) + call this%www%update(k) + call device_col3(stats_work%x_d,this%stats_u%x_d, this%v%x_d,n) + call this%uuv%update(k) + call device_col3(stats_work%x_d,this%stats_u%x_d, this%w%x_d,n) + call this%uuw%update(k) + call device_col3(stats_work%x_d,this%stats_v%x_d, this%u%x_d,n) + call this%uvv%update(k) + call device_col3(stats_work%x_d,this%stats_v%x_d, this%w%x_d,n) + call this%vvw%update(k) + call device_col3(stats_work%x_d,this%stats_w%x_d, this%u%x_d,n) + call this%uww%update(k) + call device_col3(stats_work%x_d,this%stats_w%x_d, this%v%x_d,n) + call this%vww%update(k) + + call device_col3(stats_work%x_d,this%stats_u%x_d, this%stats_u%x_d,n) + call this%uuuu%update(k) + call device_col3(stats_work%x_d,this%stats_v%x_d, this%stats_v%x_d,n) + call this%vvvv%update(k) + call device_col3(stats_work%x_d,this%stats_w%x_d, this%stats_w%x_d,n) + call this%wwww%update(k) + + call device_col3(stats_work%x_d,this%stats_p%x_d, this%p%x_d,n) + call this%ppp%update(k) + call device_col3(stats_work%x_d,this%stats_p%x_d, this%stats_p%x_d,n) + call this%pppp%update(k) + + call device_col3(stats_work%x_d,this%p%x_d, this%u%x_d,n) + call this%pu%update(k) + call device_col3(stats_work%x_d,this%p%x_d, this%v%x_d,n) + call this%pv%update(k) + call device_col3(stats_work%x_d,this%p%x_d, this%w%x_d,n) + call this%pw%update(k) + + else + + call col3(stats_u%x,this%u%x, this%u%x,n) + call col3(stats_v%x,this%v%x, this%v%x,n) + call col3(stats_w%x,this%w%x, this%w%x,n) + call col3(stats_p%x,this%p%x, this%p%x,n) + + call this%uu%update(k) + call this%vv%update(k) + call this%ww%update(k) + call this%pp%update(k) + + call col3(stats_work%x,this%u%x, this%v%x,n) + call this%uv%update(k) + call col3(stats_work%x,this%u%x, this%w%x,n) + call this%uw%update(k) + call col3(stats_work%x,this%v%x, this%w%x,n) + call this%vw%update(k) + + call col2(stats_work%x, this%u%x,n) + call this%uvw%update(k) + call col3(stats_work%x,this%stats_u%x, this%u%x,n) + call this%uuu%update(k) + call col3(stats_work%x,this%stats_v%x, this%v%x,n) + call this%vvv%update(k) + call col3(stats_work%x,this%stats_w%x, this%w%x,n) + call this%www%update(k) + call col3(stats_work%x,this%stats_u%x, this%v%x,n) + call this%uuv%update(k) + call col3(stats_work%x,this%stats_u%x, this%w%x,n) + call this%uuw%update(k) + call col3(stats_work%x,this%stats_v%x, this%u%x,n) + call this%uvv%update(k) + call col3(stats_work%x,this%stats_v%x, this%w%x,n) + call this%vvw%update(k) + call col3(stats_work%x,this%stats_w%x, this%u%x,n) + call this%uww%update(k) + call col3(stats_work%x,this%stats_w%x, this%v%x,n) + call this%vww%update(k) + + call col3(stats_work%x,this%stats_u%x, this%stats_u%x,n) + call this%uuuu%update(k) + call col3(stats_work%x,this%stats_v%x, this%stats_v%x,n) + call this%vvvv%update(k) + call col3(stats_work%x,this%stats_w%x, this%stats_w%x,n) + call this%wwww%update(k) + + call col3(stats_work%x,this%stats_p%x, this%p%x,n) + call this%ppp%update(k) + call col3(stats_work%x,this%stats_p%x, this%stats_p%x,n) + call this%pppp%update(k) + + call col3(stats_work%x,this%p%x, this%u%x,n) + call this%pu%update(k) + call col3(stats_work%x,this%p%x, this%v%x,n) + call this%pv%update(k) + call col3(stats_work%x,this%p%x, this%w%x,n) + call this%pw%update(k) + + + end if + call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u%x,this%coef) + call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v%x,this%coef) + call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w%x,this%coef) + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_col3(stats_work%x_d,this%dudx%x_d, this%p%x_d,n) + call this%pdudx%update(k) + call device_col3(stats_work%x_d,this%dudy%x_d, this%p%x_d,n) + call this%pdudy%update(k) + call device_col3(stats_work%x_d,this%dudz%x_d, this%p%x_d,n) + call this%pdudz%update(k) + + call device_col3(stats_work%x_d,this%dvdx%x_d, this%p%x_d,n) + call this%pdvdx%update(k) + call device_col3(stats_work%x_d,this%dvdy%x_d, this%p%x_d,n) + call this%pdvdy%update(k) + call device_col3(stats_work%x_d,this%dvdz%x_d, this%p%x_d,n) + call this%pdvdz%update(k) + + call device_col3(stats_work%x_d,this%dwdx%x_d, this%p%x_d,n) + call this%pdwdx%update(k) + call device_col3(stats_work%x_d,this%dwdy%x_d, this%p%x_d,n) + call this%pdwdy%update(k) + call device_col3(stats_work%x_d,this%dwdz%x_d, this%p%x_d,n) + call this%pdwdz%update(k) + + call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dudx%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dudy%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dudz%x_d,n) + call this%e11%update(k) + call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dvdx%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dvdy%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dvdz%x_d,n) + call this%e22%update(k) + call device_col3(this%stats_work%x_d,this%dwdx%x_d, this%dwdx%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dwdy%x_d, this%dwdy%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dwdz%x_d, this%dwdz%x_d,n) + call this%e33%update(k) + call device_col3(this%stats_work%x_d,this%dudx%x_d, this%dvdx%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dudy%x_d, this%dvdy%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dudz%x_d, this%dvdz%x_d,n) + call this%e12%update(k) + call device_col3(this%stats_work%x_d,this%dvdx%x_d, this%dwdx%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dvdy%x_d, this%dwdy%x_d,n) + call device_addcol3(this%stats_work%x_d,this%dvdz%x_d, this%dwdz%x_d,n) + call this%e23%update(k) + + + else + call col3(stats_work%x,this%dudx%x, this%p%x,n) + call this%pdudx%update(k) + call col3(stats_work%x,this%dudy%x, this%p%x,n) + call this%pdudy%update(k) + call col3(stats_work%x,this%dudz%x, this%p%x,n) + call this%pdudz%update(k) + + call col3(stats_work%x,this%dvdx%x, this%p%x,n) + call this%pdvdx%update(k) + call col3(stats_work%x,this%dvdy%x, this%p%x,n) + call this%pdvdy%update(k) + call col3(stats_work%x,this%dvdz%x, this%p%x,n) + call this%pdvdz%update(k) + + call col3(stats_work%x,this%dwdx%x, this%p%x,n) + call this%pdwdx%update(k) + call col3(stats_work%x,this%dwdy%x, this%p%x,n) + call this%pdwdy%update(k) + call col3(stats_work%x,this%dwdz%x, this%p%x,n) + call this%pdwdz%update(k) + + call col3(this%stats_work%x,this%dudx%x, this%dudx%x,n) + call addcol3(this%stats_work%x,this%dudy%x, this%dudy%x,n) + call addcol3(this%stats_work%x,this%dudz%x, this%dudz%x,n) + call this%e11%update(k) + call col3(this%stats_work%x,this%dvdx%x, this%dvdx%x,n) + call addcol3(this%stats_work%x,this%dvdy%x, this%dvdy%x,n) + call addcol3(this%stats_work%x,this%dvdz%x, this%dvdz%x,n) + call this%e22%update(k) + call col3(this%stats_work%x,this%dwdx%x, this%dwdx%x,n) + call addcol3(this%stats_work%x,this%dwdy%x, this%dwdy%x,n) + call addcol3(this%stats_work%x,this%dwdz%x, this%dwdz%x,n) + call this%e33%update(k) + call col3(this%stats_work%x,this%dudx%x, this%dvdx%x,n) + call addcol3(this%stats_work%x,this%dudy%x, this%dvdy%x,n) + call addcol3(this%stats_work%x,this%dudz%x, this%dvdz%x,n) + call this%e12%update(k) + call col3(this%stats_work%x,this%dvdx%x, this%dwdx%x,n) + call addcol3(this%stats_work%x,this%dvdy%x, this%dwdy%x,n) + call addcol3(this%stats_work%x,this%dvdz%x, this%dwdz%x,n) + call this%e23%update(k) + + end if + + end associate end subroutine fluid_stats_update @@ -513,7 +513,7 @@ subroutine fluid_stats_free(this) call this%stats_u%free() call this%stats_v%free() call this%stats_w%free() - + call this%uu%free() call this%vv%free() call this%ww%free() @@ -531,13 +531,13 @@ subroutine fluid_stats_free(this) call this%dWdx%free() call this%dWdy%free() call this%dWdz%free() - + end subroutine fluid_stats_free - + !> Initialize a mean flow field subroutine fluid_stats_reset(this) class(fluid_stats_t), intent(inout), target:: this - + call this%uu%reset() call this%vv%reset() call this%ww%reset() @@ -561,11 +561,11 @@ subroutine fluid_stats_reset(this) call this%pp%reset() call this%ppp%reset() call this%pppp%reset() - !> Pressure * velocity + !> Pressure * velocity call this%pu%reset() call this%pv%reset() call this%pw%reset() - + call this%pdudx%reset() call this%pdudy%reset() call this%pdudz%reset() @@ -611,7 +611,7 @@ subroutine fluid_stats_make_strong_grad(this) call device_col2(this%e23%mf%x_d,this%stats_work%x_d, n) - else + else call invers2(this%stats_work%x, this%coef%B,n) call col2(this%pdudx%mf%x, this%stats_work%x, n) call col2(this%pdudy%mf%x, this%stats_work%x, n) @@ -648,35 +648,35 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,& integer :: n if (present(mean)) then - n = mean%fields(1)%f%dof%size() - call copy(mean%fields(1)%f%x,this%u_mean%x,n) - call copy(mean%fields(2)%f%x,this%v_mean%x,n) - call copy(mean%fields(3)%f%x,this%w_mean%x,n) - call copy(mean%fields(4)%f%x,this%p_mean%x,n) + n = mean%fields(1)%f%dof%size() + call copy(mean%fields(1)%f%x,this%u_mean%x,n) + call copy(mean%fields(2)%f%x,this%v_mean%x,n) + call copy(mean%fields(3)%f%x,this%w_mean%x,n) + call copy(mean%fields(4)%f%x,this%p_mean%x,n) end if if (present(reynolds)) then - n = reynolds%fields(1)%f%dof%size() - call copy(reynolds%fields(1)%f%x,this%pp%mf%x,n) - call subcol3(reynolds%fields(1)%f%x,this%p_mean%x,this%p_mean%x,n) - - call copy(reynolds%fields(2)%f%x,this%uu%mf%x,n) - call subcol3(reynolds%fields(2)%f%x,this%u_mean%x,this%u_mean%x,n) - - call copy(reynolds%fields(3)%f%x,this%vv%mf%x,n) - call subcol3(reynolds%fields(3)%f%x,this%v_mean%x,this%v_mean%x,n) - - call copy(reynolds%fields(4)%f%x,this%ww%mf%x,n) - call subcol3(reynolds%fields(4)%f%x,this%w_mean%x,this%w_mean%x,n) - - call copy(reynolds%fields(5)%f%x,this%uv%mf%x,n) - call subcol3(reynolds%fields(5)%f%x,this%u_mean%x,this%v_mean%x,n) - - call copy(reynolds%fields(6)%f%x,this%uw%mf%x,n) - call subcol3(reynolds%fields(6)%f%x,this%u_mean%x,this%w_mean%x,n) - - call copy(reynolds%fields(7)%f%x,this%vw%mf%x,n) - call subcol3(reynolds%fields(7)%f%x,this%v_mean%x,this%w_mean%x,n) + n = reynolds%fields(1)%f%dof%size() + call copy(reynolds%fields(1)%f%x,this%pp%mf%x,n) + call subcol3(reynolds%fields(1)%f%x,this%p_mean%x,this%p_mean%x,n) + + call copy(reynolds%fields(2)%f%x,this%uu%mf%x,n) + call subcol3(reynolds%fields(2)%f%x,this%u_mean%x,this%u_mean%x,n) + + call copy(reynolds%fields(3)%f%x,this%vv%mf%x,n) + call subcol3(reynolds%fields(3)%f%x,this%v_mean%x,this%v_mean%x,n) + + call copy(reynolds%fields(4)%f%x,this%ww%mf%x,n) + call subcol3(reynolds%fields(4)%f%x,this%w_mean%x,this%w_mean%x,n) + + call copy(reynolds%fields(5)%f%x,this%uv%mf%x,n) + call subcol3(reynolds%fields(5)%f%x,this%u_mean%x,this%v_mean%x,n) + + call copy(reynolds%fields(6)%f%x,this%uw%mf%x,n) + call subcol3(reynolds%fields(6)%f%x,this%u_mean%x,this%w_mean%x,n) + + call copy(reynolds%fields(7)%f%x,this%vw%mf%x,n) + call subcol3(reynolds%fields(7)%f%x,this%v_mean%x,this%w_mean%x,n) end if if (present(pressure_skewness)) then @@ -697,22 +697,37 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,& !Compute gradient of mean flow n = mean_vel_grad%fields(1)%f%dof%size() if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(this%u_mean%x, this%u_mean%x_d, n, HOST_TO_DEVICE) - call device_memcpy(this%v_mean%x, this%v_mean%x_d, n, HOST_TO_DEVICE) - call device_memcpy(this%w_mean%x, this%w_mean%x_d, n, HOST_TO_DEVICE) - call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u_mean%x,this%coef) - call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v_mean%x,this%coef) - call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w_mean%x,this%coef) - call device_memcpy(this%dudx%x, this%dudx%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dvdx%x, this%dvdx%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dwdx%x, this%dwdx%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dudy%x, this%dudy%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dvdy%x, this%dvdy%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dwdy%x, this%dwdy%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dudz%x, this%dudz%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dvdz%x, this%dvdz%x_d, n, DEVICE_TO_HOST) - call device_memcpy(this%dwdz%x, this%dwdz%x_d, n, DEVICE_TO_HOST, sync=.true.) - else + call device_memcpy(this%u_mean%x, this%u_mean%x_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%v_mean%x, this%v_mean%x_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%w_mean%x, this%w_mean%x_d, n, & + HOST_TO_DEVICE, sync=.false.) + call opgrad(this%dudx%x, this%dudy%x, this%dudz%x, & + this%u_mean%x, this%coef) + call opgrad(this%dvdx%x, this%dvdy%x, this%dvdz%x, & + this%v_mean%x, this%coef) + call opgrad(this%dwdx%x, this%dwdy%x, this%dwdz%x, & + this%w_mean%x, this%coef) + call device_memcpy(this%dudx%x, this%dudx%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dvdx%x, this%dvdx%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dwdx%x, this%dwdx%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dudy%x, this%dudy%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dvdy%x, this%dvdy%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dwdy%x, this%dwdy%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dudz%x, this%dudz%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dvdz%x, this%dvdz%x_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%dwdz%x, this%dwdz%x_d, n, & + DEVICE_TO_HOST, sync=.true.) + else call opgrad(this%dudx%x,this%dudy%x, this%dudz%x,this%u_mean%x,this%coef) call opgrad(this%dvdx%x,this%dvdy%x, this%dvdz%x,this%v_mean%x,this%coef) call opgrad(this%dwdx%x,this%dwdy%x, this%dwdz%x,this%w_mean%x,this%coef) @@ -735,5 +750,5 @@ subroutine fluid_stats_post_process(this, mean, reynolds, pressure_flatness,& end if end subroutine fluid_stats_post_process - + end module fluid_stats diff --git a/src/fluid/fluid_user_source_term.f90 b/src/fluid/fluid_user_source_term.f90 index 098e509c8d4..02518c5699f 100644 --- a/src/fluid/fluid_user_source_term.f90 +++ b/src/fluid/fluid_user_source_term.f90 @@ -36,7 +36,6 @@ module fluid_user_source_term use num_types, only : rp use utils, only : neko_error use source_term, only : source_term_t - use field, only : field_t use json_module, only : json_file use field_list, only : field_list_t use coefs, only : coef_t @@ -54,13 +53,13 @@ module fluid_user_source_term !! actual implementation in the user file. !! @details The user source term can be applied either pointiwse or acting !! on the whole array in a single call, which is referred to as "vector" - !! application. + !! application. !! @warning !! The user source term does not support init from JSON and should instead be !! directly initialized from components. type, public, extends(source_term_t) :: fluid_user_source_term_t !> Pointer to the dofmap of the right-hand-side fields. - type(dofmap_t), pointer :: dm + type(dofmap_t), pointer :: dm !> x-component of source term. real(kind=rp), allocatable :: u(:, :, :, :) !> y-component of source term. @@ -78,7 +77,7 @@ module fluid_user_source_term procedure(fluid_source_compute_pointwise), nopass, pointer :: compute_pw_ & => null() !> Compute the source term for the entire boundary - procedure(fluid_source_compute_vector), nopass, pointer :: compute_ & + procedure(fluid_source_compute_vector), nopass, pointer :: compute_vector_& => null() contains !> Constructor from JSON (will throw!). @@ -89,14 +88,14 @@ module fluid_user_source_term !> Destructor. procedure, pass(this) :: free => fluid_user_source_term_free !> Computes the source term and adds the result to `fields`. - procedure, pass(this) :: compute => fluid_user_source_term_compute + procedure, pass(this) :: compute_ => fluid_user_source_term_compute end type fluid_user_source_term_t abstract interface !> Computes the source term and adds the result to `fields`. !! @param t The time value. !! @param tstep The current time-step. - subroutine fluid_source_compute_vector(this, t) + subroutine fluid_source_compute_vector(this, t) import fluid_user_source_term_t, rp class(fluid_user_source_term_t), intent(inout) :: this real(kind=rp), intent(in) :: t @@ -111,7 +110,7 @@ end subroutine fluid_source_compute_vector !! @param j The x-index of GLL point. !! @param k The y-index of GLL point. !! @param l The z-index of GLL point. - !! @param e The index of element. + !! @param e The index of element. !! @param t The time value. subroutine fluid_source_compute_pointwise(u, v, w, j, k, l, e, t) import rp @@ -145,7 +144,7 @@ end subroutine fluid_user_source_term_init !> Costructor from components. !! @param fields A list of 3 fields for adding the source values. !! @param coef The SEM coeffs. - !! @param sourc_termtype The type of the user source term, "user_vector" or + !! @param sourc_termtype The type of the user source term, "user_vector" or !! "user_pointwise". !! @param eval_vector The procedure to vector-compute the source term. !! @param eval_pointwise The procedure to pointwise-compute the source term. @@ -159,7 +158,7 @@ subroutine fluid_user_source_term_init_from_components(this, fields, coef, & procedure(fluid_source_compute_pointwise), optional :: eval_pointwise call this%free() - call this%init_base(fields, coef) + call this%init_base(fields, coef, 0.0_rp, huge(0.0_rp)) this%dm => fields%fields(1)%f%dof @@ -186,11 +185,11 @@ subroutine fluid_user_source_term_init_from_components(this, fields, coef, & if (NEKO_BCKND_DEVICE .eq. 1) then call neko_error('Pointwise source terms not supported on accelerators') end if - this%compute_ => pointwise_eval_driver + this%compute_vector_ => pointwise_eval_driver this%compute_pw_ => eval_pointwise else if (trim(source_term_type) .eq. 'user_vector' .and. & present(eval_vector)) then - this%compute_ => eval_vector + this%compute_vector_ => eval_vector else call neko_error('Invalid fluid source term '//source_term_type) end if @@ -208,7 +207,7 @@ subroutine fluid_user_source_term_free(this) if (c_associated(this%v_d)) call device_free(this%v_d) if (c_associated(this%w_d)) call device_free(this%w_d) - nullify(this%compute_) + nullify(this%compute_vector_) nullify(this%compute_pw_) nullify(this%dm) @@ -224,14 +223,14 @@ subroutine fluid_user_source_term_compute(this, t, tstep) integer, intent(in) :: tstep integer :: n - call this%compute_(this, t) + call this%compute_vector_(this, t) n = this%fields%fields(1)%f%dof%size() if (NEKO_BCKND_DEVICE .eq. 1) then call device_add2(this%fields%fields(1)%f%x_d, this%u_d, n) call device_add2(this%fields%fields(2)%f%x_d, this%v_d, n) call device_add2(this%fields%fields(3)%f%x_d, this%w_d, n) - else + else call add2(this%fields%fields(1)%f%x, this%u, n) call add2(this%fields%fields(2)%f%x, this%v, n) call add2(this%fields%fields(3)%f%x, this%w, n) @@ -249,26 +248,26 @@ subroutine pointwise_eval_driver(this, t) select type (this) type is (fluid_user_source_term_t) - do e = 1, size(this%u, 4) - ee = e - do l = 1, size(this%u, 3) - ll = l - do k = 1, size(this%u, 2) - kk = k - do j = 1, size(this%u, 1) - jj =j - call this%compute_pw_(this%u(j,k,l,e), & + do e = 1, size(this%u, 4) + ee = e + do l = 1, size(this%u, 3) + ll = l + do k = 1, size(this%u, 2) + kk = k + do j = 1, size(this%u, 1) + jj =j + call this%compute_pw_(this%u(j,k,l,e), & this%v(j,k,l,e), & this%w(j,k,l,e), & jj, kk, ll, ee, t) - end do - end do - end do - end do + end do + end do + end do + end do class default call neko_error('Incorrect source type in pointwise eval driver!') end select - + end subroutine pointwise_eval_driver - + end module fluid_user_source_term diff --git a/src/fluid/fluid_volflow.f90 b/src/fluid/fluid_volflow.f90 index 47980232b2f..c328d83c1e6 100644 --- a/src/fluid/fluid_volflow.f90 +++ b/src/fluid/fluid_volflow.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,46 +21,46 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! module fluid_volflow use operators use num_types - use mathops + use mathops use krylov, only : ksp_t, ksp_monitor_t use precon use dofmap @@ -76,17 +76,16 @@ module fluid_volflow use json_module, only : json_file use json_utils, only: json_get use scratch_registry, only : scratch_registry_t - use bc, only : bc_list_t, bc_list_apply, bc_list_apply_vector, & - bc_list_apply_scalar + use bc, only : bc_list_t, bc_list_apply_vector, bc_list_apply_scalar use ax_product, only : ax_t implicit none private - + !> Defines volume flow type, public :: fluid_volflow_t integer :: flow_dir !< these two should be moved to params - logical :: avflow - real(kind=rp) :: flow_rate + logical :: avflow + real(kind=rp) :: flow_rate real(kind=rp) :: dtlag = 0d0 real(kind=rp) :: bdlag = 0d0 !< Really quite pointless since we do not vary the timestep type(field_t) :: u_vol, v_vol, w_vol, p_vol @@ -130,7 +129,7 @@ subroutine fluid_vol_flow_init(this, dm_Xh, params) end if this%scratch = scratch_registry_t(dm_Xh, 3, 1) - + end subroutine fluid_vol_flow_init subroutine fluid_vol_flow_free(this) @@ -142,7 +141,7 @@ subroutine fluid_vol_flow_free(this) call this%p_vol%free() call this%scratch%free() - + end subroutine fluid_vol_flow_free !> Compute flow adjustment @@ -172,11 +171,11 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & type(ksp_monitor_t) :: ksp_result type(field_t), pointer :: ta1, ta2, ta3 integer :: temp_indices(3) - + call this%scratch%request_field(ta1, temp_indices(1)) call this%scratch%request_field(ta2, temp_indices(2)) call this%scratch%request_field(ta3, temp_indices(3)) - + associate(msh => c_Xh%msh, p_vol => this%p_vol, & u_vol => this%u_vol, v_vol => this%v_vol, w_vol => this%w_vol) @@ -201,32 +200,32 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & c_Xh%h2(i,1,1,1) = 0.0_rp end do end if - c_Xh%ifh2 = .false. + c_Xh%ifh2 = .false. ! Compute pressure - + if (this%flow_dir .eq. 1) then call cdtp(p_res%x, c_Xh%h1, c_Xh%drdx, c_Xh%dsdx, c_Xh%dtdx, c_Xh) end if - + if (this%flow_dir .eq. 2) then call cdtp(p_res%x, c_Xh%h1, c_Xh%drdy, c_Xh%dsdy, c_Xh%dtdy, c_Xh) end if - + if (this%flow_dir .eq. 3) then call cdtp(p_res%x, c_Xh%h1, c_Xh%drdz, c_Xh%dsdz, c_Xh%dtdz, c_Xh) end if - call gs_Xh%op(p_res, GS_OP_ADD) + call gs_Xh%op(p_res, GS_OP_ADD) call bc_list_apply_scalar(bclst_dp, p_res%x, n) call pc_prs%update() ksp_result = ksp_prs%solve(Ax, p_vol, p_res%x, n, & - c_Xh, bclst_dp, gs_Xh, prs_max_iter) + c_Xh, bclst_dp, gs_Xh, prs_max_iter) ! Compute velocity - + call opgrad(u_res%x, v_res%x, w_res%x, p_vol%x, c_Xh) - + if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then call device_opchsign(u_res%x_d, v_res%x_d, w_res%x_d, msh%gdim, n) @@ -241,12 +240,12 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & end if call bc_list_apply_vector(bclst_vel_res,& ta1%x, ta2%x, ta3%x, n) - + ! add forcing if (NEKO_BCKND_DEVICE .eq. 1) then if (this%flow_dir .eq. 1) then - call device_add2(u_res%x_d, ta1%x_d, n) + call device_add2(u_res%x_d, ta1%x_d, n) else if (this%flow_dir .eq. 2) then call device_add2(v_res%x_d, ta2%x_d, n) else if (this%flow_dir .eq. 3) then @@ -254,7 +253,7 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & end if else if (this%flow_dir .eq. 1) then - call add2(u_res%x, ta1%x, n) + call add2(u_res%x, ta1%x, n) else if (this%flow_dir .eq. 2) then call add2(v_res%x, ta2%x, n) else if (this%flow_dir .eq. 3) then @@ -273,19 +272,19 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & end if c_Xh%ifh2 = .true. - call gs_Xh%op(u_res, GS_OP_ADD) - call gs_Xh%op(v_res, GS_OP_ADD) - call gs_Xh%op(w_res, GS_OP_ADD) + call gs_Xh%op(u_res, GS_OP_ADD) + call gs_Xh%op(v_res, GS_OP_ADD) + call gs_Xh%op(w_res, GS_OP_ADD) - call bc_list_apply_vector(bclst_vel_res,& + call bc_list_apply_vector(bclst_vel_res,& u_res%x, v_res%x, w_res%x, n) - call pc_vel%update() + call pc_vel%update() - ksp_result = ksp_vel%solve(Ax, u_vol, u_res%x, n, & + ksp_result = ksp_vel%solve(Ax, u_vol, u_res%x, n, & c_Xh, bclst_du, gs_Xh, vel_max_iter) - ksp_result = ksp_vel%solve(Ax, v_vol, v_res%x, n, & + ksp_result = ksp_vel%solve(Ax, v_vol, v_res%x, n, & c_Xh, bclst_dv, gs_Xh, vel_max_iter) - ksp_result = ksp_vel%solve(Ax, w_vol, w_res%x, n, & + ksp_result = ksp_vel%solve(Ax, w_vol, w_res%x, n, & c_Xh, bclst_dw, gs_Xh, vel_max_iter) if (NEKO_BCKND_DEVICE .eq. 1) then @@ -293,12 +292,12 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & this%base_flow = & device_glsc2(u_vol%x_d, c_Xh%B_d, n) / this%domain_length end if - + if (this%flow_dir .eq. 2) then this%base_flow = & device_glsc2(v_vol%x_d, c_Xh%B_d, n) / this%domain_length end if - + if (this%flow_dir .eq. 3) then this%base_flow = & device_glsc2(w_vol%x_d, c_Xh%B_d, n) / this%domain_length @@ -307,16 +306,16 @@ subroutine fluid_vol_flow_compute(this, u_res, v_res, w_res, p_res, & if (this%flow_dir .eq. 1) then this%base_flow = glsc2(u_vol%x, c_Xh%B, n) / this%domain_length end if - + if (this%flow_dir .eq. 2) then this%base_flow = glsc2(v_vol%x, c_Xh%B, n) / this%domain_length end if - + if (this%flow_dir .eq. 3) then this%base_flow = glsc2(w_vol%x, c_Xh%B, n) / this%domain_length end if end if - end associate + end associate call this%scratch%relinquish_field(temp_indices) end subroutine fluid_vol_flow_compute @@ -353,34 +352,34 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, & integer :: n, ierr type(field_t), pointer :: ta1, ta2, ta3 integer :: temp_indices(3) - + associate(u_vol => this%u_vol, v_vol => this%v_vol, & w_vol => this%w_vol, p_vol => this%p_vol) - + n = c_Xh%dof%size() ! If either dt or the backwards difference coefficient change, ! then recompute base flow solution corresponding to unit forcing: - + ifcomp = 0.0_rp if (dt .ne. this%dtlag .or. ext_bdf%diffusion_coeffs(1) .ne. this%bdlag) then ifcomp = 1.0_rp end if - + this%dtlag = dt this%bdlag = ext_bdf%diffusion_coeffs(1) call MPI_Allreduce(MPI_IN_PLACE, ifcomp, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - + if (ifcomp .gt. 0d0) then call this%compute(u_res, v_res, w_res, p_res, & ext_bdf, gs_Xh, c_Xh, rho, mu, ext_bdf%diffusion_coeffs(1), dt, & bclst_dp, bclst_du, bclst_dv, bclst_dw, bclst_vel_res, & - Ax, ksp_vel, ksp_prs, pc_prs, pc_vel, prs_max_iter, vel_max_iter) + Ax, ksp_prs, ksp_vel, pc_prs, pc_vel, prs_max_iter, vel_max_iter) end if - + if (NEKO_BCKND_DEVICE .eq. 1) then if (this%flow_dir .eq. 1) then current_flow = & @@ -408,10 +407,10 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, & else flow_rate = this%flow_rate endif - - delta_flow = flow_rate - current_flow + + delta_flow = flow_rate - current_flow scale = delta_flow / this%base_flow - + if (NEKO_BCKND_DEVICE .eq. 1) then call device_add2s2(u%x_d, u_vol%x_d, scale, n) call device_add2s2(v%x_d, v_vol%x_d, scale, n) @@ -424,7 +423,7 @@ subroutine fluid_vol_flow(this, u, v, w, p, u_res, v_res, w_res, p_res, & call add2s2(p%x, p_vol%x, scale, n) end if end associate - + end subroutine fluid_vol_flow diff --git a/src/fluid/mean_flow.f90 b/src/fluid/mean_flow.f90 index fb55c7ae976..ea55da9a089 100644 --- a/src/fluid/mean_flow.f90 +++ b/src/fluid/mean_flow.f90 @@ -64,7 +64,7 @@ subroutine mean_flow_init(this, u, v, w, p) call this%v%init(v) call this%w%init(w) call this%p%init(p) - + end subroutine mean_flow_init @@ -78,7 +78,7 @@ subroutine mean_flow_free(this) call this%p%free() end subroutine mean_flow_free - + !> Resets a mean flow field subroutine mean_flow_reset(this) class(mean_flow_t), intent(inout) :: this @@ -89,5 +89,5 @@ subroutine mean_flow_reset(this) call this%p%reset() end subroutine mean_flow_reset - + end module mean_flow diff --git a/src/fluid/mean_sqr_flow.f90 b/src/fluid/mean_sqr_flow.f90 index e16200f491f..e7f5c942d54 100644 --- a/src/fluid/mean_sqr_flow.f90 +++ b/src/fluid/mean_sqr_flow.f90 @@ -46,9 +46,9 @@ module mean_sqr_flow procedure, pass(this) :: init => mean_sqr_flow_init procedure, pass(this) :: free => mean_sqr_flow_free end type mean_sqr_flow_t - + contains - + !> Initialize a mean squared flow field subroutine mean_sqr_flow_init(this, u, v, w, p) class(mean_sqr_flow_t), intent(inout) :: this @@ -63,7 +63,7 @@ subroutine mean_sqr_flow_init(this, u, v, w, p) call this%vv%init(v) call this%ww%init(w) call this%pp%init(p) - + end subroutine mean_sqr_flow_init @@ -77,5 +77,5 @@ subroutine mean_sqr_flow_free(this) call this%pp%free() end subroutine mean_sqr_flow_free - + end module mean_sqr_flow diff --git a/src/fluid/pnpn_res.f90 b/src/fluid/pnpn_res.f90 index b3187fcb55e..1d6ab091f28 100644 --- a/src/fluid/pnpn_res.f90 +++ b/src/fluid/pnpn_res.f90 @@ -32,7 +32,7 @@ ! !> Defines Pressure and velocity residuals in the Pn-Pn formulation module pnpn_residual - use gather_scatter, only : gs_t + use gather_scatter, only : gs_t use ax_product, only : ax_t use field, only : field_t use coefs, only : coef_t @@ -40,14 +40,13 @@ module pnpn_residual use space, only : space_t use mesh, only : mesh_t use num_types, only : rp - use scratch_registry, only : scratch_registry_t implicit none private - + !> Abstract type to compute pressure residual type, public, abstract :: pnpn_prs_res_t contains - procedure(prs_res), nopass, deferred :: compute + procedure(prs_res), nopass, deferred :: compute end type pnpn_prs_res_t !> Abstract type to compute velocity residual @@ -55,7 +54,7 @@ module pnpn_residual contains procedure(vel_res), nopass, deferred :: compute end type pnpn_vel_res_t - + abstract interface subroutine prs_res(p, p_res, u, v, w, u_e, v_e, w_e, f_x, f_y, f_z, c_xh,& gs_Xh, bc_prs_surface, bc_sym_surface, Ax, bd, dt, mu, rho) @@ -89,13 +88,13 @@ subroutine vel_res(Ax, u, v, w, u_res, v_res, w_res, & import Ax_t import gs_t import facet_normal_t - import space_t + import space_t import coef_t import mesh_t import rp class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: p, u, v, w type(field_t), intent(inout) :: u_res, v_res, w_res type(field_t), intent(inout) :: f_x, f_y, f_z @@ -108,5 +107,5 @@ subroutine vel_res(Ax, u, v, w, u_res, v_res, w_res, & end subroutine vel_res end interface - + end module pnpn_residual diff --git a/src/fluid/pnpn_res_fctry.f90 b/src/fluid/pnpn_res_fctry.f90 index ba19353875c..cf583253271 100644 --- a/src/fluid/pnpn_res_fctry.f90 +++ b/src/fluid/pnpn_res_fctry.f90 @@ -43,7 +43,7 @@ module pnpn_res_fctry public :: pnpn_prs_res_t, pnpn_vel_res_t, & pnpn_prs_res_factory, pnpn_vel_res_factory - + contains subroutine pnpn_prs_res_factory(prs_res) @@ -53,7 +53,7 @@ subroutine pnpn_prs_res_factory(prs_res) deallocate(prs_res) end if - + if (NEKO_BCKND_SX .eq. 1) then allocate(pnpn_prs_res_sx_t::prs_res) else if (NEKO_BCKND_DEVICE .eq. 1) then @@ -61,9 +61,9 @@ subroutine pnpn_prs_res_factory(prs_res) else allocate(pnpn_prs_res_cpu_t::prs_res) end if - + end subroutine pnpn_prs_res_factory - + subroutine pnpn_vel_res_factory(vel_res) class(pnpn_vel_res_t), allocatable, intent(inout) :: vel_res @@ -78,8 +78,8 @@ subroutine pnpn_vel_res_factory(vel_res) else allocate(pnpn_vel_res_cpu_t::vel_res) end if - - + + end subroutine pnpn_vel_res_factory - + end module pnpn_res_fctry diff --git a/src/gs/bcknd/cpu/gs_cpu.f90 b/src/gs/bcknd/cpu/gs_cpu.f90 index 6e26096372b..b95605f095b 100644 --- a/src/gs/bcknd/cpu/gs_cpu.f90 +++ b/src/gs/bcknd/cpu/gs_cpu.f90 @@ -47,9 +47,9 @@ module gs_cpu procedure, pass(this) :: gather => gs_gather_cpu procedure, pass(this) :: scatter => gs_scatter_cpu end type gs_cpu_t - + contains - + !> Dummy backend initialisation subroutine gs_cpu_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) class(gs_cpu_t), intent(inout) :: this @@ -78,7 +78,7 @@ subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) integer, intent(in) :: o integer, intent(in) :: op logical, intent(in) :: shrd - + select case(op) case (GS_OP_ADD) call gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b) @@ -89,9 +89,9 @@ subroutine gs_gather_cpu(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) case (GS_OP_MAX) call gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b) end select - + end subroutine gs_gather_cpu - + !> Gather kernel for addition of data !! \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b) @@ -115,9 +115,9 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b) tmp = tmp + u(gd(k + j)) end do v(dg(k + 1)) = tmp - k = k + blk_len + k = k + blk_len end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -128,7 +128,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_add !> Gather kernel for multiplication of data @@ -145,18 +145,18 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b) integer, intent(in) :: o integer :: i, j, k, blk_len real(kind=rp) :: tmp - + k = 0 do i = 1, nb blk_len = b(i) - tmp = u(gd(k + 1)) + tmp = u(gd(k + 1)) do j = 2, blk_len tmp = tmp * u(gd(k + j)) end do v(dg(k + 1)) = tmp - k = k + blk_len + k = k + blk_len end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -167,9 +167,9 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_mul - + !> Gather kernel for minimum of data !! \f$ v(dg(i)) = \min(v(dg(i)), u(gd(i))) \f$ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b) @@ -193,9 +193,9 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b) tmp = min(tmp, u(gd(k + j))) end do v(dg(k + 1)) = tmp - k = k + blk_len + k = k + blk_len end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -206,7 +206,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_min !> Gather kernel for maximum of data @@ -232,9 +232,9 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b) tmp = max(tmp, u(gd(k + j))) end do v(dg(k + 1)) = tmp - k = k + blk_len + k = k + blk_len end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -245,7 +245,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_max !> Scatter kernel @todo Make the kernel abstract @@ -261,7 +261,7 @@ subroutine gs_scatter_cpu(this, v, m, dg, u, n, gd, nb, b, shrd, event) integer, dimension(nb), intent(inout) :: b logical, intent(in) :: shrd type(c_ptr) :: event - + call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b) end subroutine gs_scatter_cpu @@ -278,7 +278,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b) integer, dimension(nb), intent(inout) :: b integer :: i, j, k, blk_len real(kind=rp) :: tmp - + k = 0 do i = 1, nb blk_len = b(i) diff --git a/src/gs/bcknd/device/cuda/gs_kernels.h b/src/gs/bcknd/device/cuda/gs_kernels.h index cf0d81af850..ad7539b6905 100644 --- a/src/gs/bcknd/device/cuda/gs_kernels.h +++ b/src/gs/bcknd/device/cuda/gs_kernels.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __GS_GS_KERNELS__ +#define __GS_GS_KERNELS__ + /** * Device gather kernel for addition of data * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$ @@ -283,3 +286,5 @@ __global__ void gs_unpack_add_kernel(T * __restrict__ u, u[idx-1] += val; } } + +#endif // __GS_GS_KERNELS__ \ No newline at end of file diff --git a/src/gs/bcknd/device/gs_device.F90 b/src/gs/bcknd/device/gs_device.F90 index f87fc0592ed..5f6ab705985 100644 --- a/src/gs/bcknd/device/gs_device.F90 +++ b/src/gs/bcknd/device/gs_device.F90 @@ -35,14 +35,14 @@ module gs_device use neko_config use num_types use gs_bcknd - use device + use device use gs_ops use utils use, intrinsic :: iso_c_binding, only : c_ptr, c_int, C_NULL_PTR, & c_associated implicit none private - + !> Gather-scatter backend for offloading devices type, public, extends(gs_bcknd_t) :: gs_device_t integer, allocatable :: local_blk_off(:) !< Local block offset @@ -57,7 +57,7 @@ module gs_device type(c_ptr) :: shared_blk_len_d = C_NULL_PTR!< Dev. ptr shared n-f blocks type(c_ptr) :: local_blk_off_d = C_NULL_PTR !< Dev. ptr local blk offset type(c_ptr) :: shared_blk_off_d = C_NULL_PTR!< Dev. ptr shared blk offset - integer :: nlocal + integer :: nlocal integer :: nshared logical :: shared_on_host !< Shared points are handled on host contains @@ -131,7 +131,7 @@ end subroutine opencl_scatter_kernel #endif contains - + !> Accelerator backend initialisation subroutine gs_device_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) class(gs_device_t), intent(inout) :: this @@ -165,7 +165,7 @@ subroutine gs_device_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) call device_event_create(this%scatter_event, 2) this%gs_stream = glb_cmd_queue - + end subroutine gs_device_init !> Dummy backend deallocation @@ -214,7 +214,7 @@ subroutine gs_device_free(this) if (c_associated(this%gather_event)) then call device_event_destroy(this%gather_event) end if - + if (c_associated(this%scatter_event)) then call device_event_destroy(this%scatter_event) end if @@ -222,7 +222,7 @@ subroutine gs_device_free(this) if (c_associated(this%gs_stream)) then this%gs_stream = C_NULL_PTR end if - + end subroutine gs_device_free !> Gather kernel @@ -243,7 +243,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) type(c_ptr) :: u_d u_d = device_get_ptr(u) - + if (.not. shrd) then associate(v_d=>this%local_gs_d, dg_d=>this%local_dof_gs_d, & gd_d=>this%local_gs_dof_d, b_d=>this%local_blk_len_d, & @@ -256,17 +256,20 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) if (.not. c_associated(dg_d)) then call device_map(dg, dg_d, m) - call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, strm=strm) + call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(gd_d)) then call device_map(gd, gd_d, m) - call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, strm=strm) + call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(b_d)) then call device_map(b, b_d, nb) - call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, strm=strm) + call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(bo_d)) then @@ -275,9 +278,10 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) do i = 2, nb bo(i) = bo(i - 1) + b(i - 1) end do - call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, strm=strm) + call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if - + #ifdef HAVE_HIP call hip_gather_kernel(v_d, m, o, dg_d, u_d, n, gd_d, & nb, b_d, bo_d, op, strm) @@ -290,7 +294,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) #else call neko_error('No device backend configured') #endif - + end associate else if (shrd) then associate(v_d=>this%shared_gs_d, dg_d=>this%shared_dof_gs_d, & @@ -304,17 +308,20 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) if (.not. c_associated(dg_d)) then call device_map(dg, dg_d, m) - call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, strm=strm) + call device_memcpy(dg, dg_d, m, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(gd_d)) then call device_map(gd, gd_d, m) - call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, strm=strm) + call device_memcpy(gd, gd_d, m, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(b_d)) then call device_map(b, b_d, nb) - call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, strm=strm) + call device_memcpy(b, b_d, nb, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if if (.not. c_associated(bo_d)) then @@ -323,11 +330,12 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) do i = 2, nb bo(i) = bo(i - 1) + b(i - 1) end do - call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, strm=strm) + call device_memcpy(bo, bo_d, nb, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if - -#ifdef HAVE_HIP + +#ifdef HAVE_HIP call hip_gather_kernel(v_d, m, o, dg_d, u_d, n, gd_d, & nb, b_d, bo_d, op, strm) #elif HAVE_CUDA @@ -340,11 +348,12 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) call neko_error('No device backend configured') #endif - call device_event_record(this%gather_event, strm) - + call device_event_record(this%gather_event, strm) + if (this%shared_on_host) then if (this%nshared .eq. m) then - call device_memcpy(v, v_d, m, DEVICE_TO_HOST,strm=strm) + call device_memcpy(v, v_d, m, DEVICE_TO_HOST, & + sync=.true., strm=strm) end if end if @@ -352,7 +361,7 @@ subroutine gs_gather_device(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) end if end subroutine gs_gather_device - + !> Scatter kernel subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event) integer, intent(in) :: m @@ -390,9 +399,10 @@ subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event) bo_d=>this%shared_blk_off_d, strm=>this%gs_stream) if (this%shared_on_host) then - call device_memcpy(v, v_d, m, HOST_TO_DEVICE, strm=strm) + call device_memcpy(v, v_d, m, HOST_TO_DEVICE, & + sync=.false., strm=strm) end if - + #ifdef HAVE_HIP call hip_scatter_kernel(v_d, m, dg_d, u_d, n, gd_d, nb, b_d, bo_d, strm) #elif HAVE_CUDA @@ -407,8 +417,8 @@ subroutine gs_scatter_device(this, v, m, dg, u, n, gd, nb, b, shrd, event) call device_event_record(event, strm) else call device_sync(strm) - end if - + end if + end associate end if diff --git a/src/gs/bcknd/device/gs_device_mpi.F90 b/src/gs/bcknd/device/gs_device_mpi.F90 index 2cb6320662b..e6cb4c55b58 100644 --- a/src/gs/bcknd/device/gs_device_mpi.F90 +++ b/src/gs/bcknd/device/gs_device_mpi.F90 @@ -65,7 +65,7 @@ module gs_device_mpi type(c_ptr), allocatable :: stream(:) type(c_ptr), allocatable :: event(:) integer :: nb_strtgy - type(c_ptr) :: send_event = C_NULL_PTR + type(c_ptr) :: send_event = C_NULL_PTR contains procedure, pass(this) :: init => gs_device_mpi_init procedure, pass(this) :: free => gs_device_mpi_free @@ -94,7 +94,7 @@ subroutine hip_gs_unpack(u_d, op, buf_d, dof_d, offset, n, stream) & type(c_ptr), value :: u_d, buf_d, dof_d, stream end subroutine hip_gs_unpack end interface -#elif HAVE_CUDA +#elif HAVE_CUDA interface subroutine cuda_gs_pack(u_d, buf_d, dof_d, offset, n, stream) & bind(c, name='cuda_gs_pack') @@ -230,28 +230,28 @@ subroutine gs_device_mpi_buf_init(this, pe_order, dof_stack, mark_dupes) ! %array() breaks on cray select type (arr => dof_stack(pe_order(i))%data) type is (integer) - do j = 1, this%ndofs(i) - k = this%offset(i) + j - if (mark_dupes) then - if (doftable%get(arr(j), dupe) .eq. 0) then - if (dofs(dupe) .gt. 0) then - dofs(dupe) = -dofs(dupe) - marked = marked + 1 - end if - dofs(k) = -arr(j) - marked = marked + 1 - else - call doftable%set(arr(j), k) - dofs(k) = arr(j) - end if - else - dofs(k) = arr(j) - end if - end do + do j = 1, this%ndofs(i) + k = this%offset(i) + j + if (mark_dupes) then + if (doftable%get(arr(j), dupe) .eq. 0) then + if (dofs(dupe) .gt. 0) then + dofs(dupe) = -dofs(dupe) + marked = marked + 1 + end if + dofs(k) = -arr(j) + marked = marked + 1 + else + call doftable%set(arr(j), k) + dofs(k) = arr(j) + end if + else + dofs(k) = arr(j) + end if + end do end select end do - call device_memcpy(dofs, this%dof_d, total, HOST_TO_DEVICE) + call device_memcpy(dofs, this%dof_d, total, HOST_TO_DEVICE, sync=.false.) deallocate(dofs) call doftable%free() @@ -268,7 +268,7 @@ subroutine gs_device_mpi_buf_free(this) if (c_associated(this%buf_d)) call device_free(this%buf_d) if (c_associated(this%dof_d)) call device_free(this%dof_d) - end subroutine + end subroutine gs_device_mpi_buf_free !> Initialise MPI based communication method subroutine gs_device_mpi_init(this, send_pe, recv_pe) @@ -319,7 +319,7 @@ subroutine gs_device_mpi_free(this) deallocate(this%stream) end if #endif - + end subroutine gs_device_mpi_free !> Post non-blocking send operations @@ -353,16 +353,16 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm) #endif call device_sync(strm) - + do i = 1, size(this%send_pe) call device_mpi_isend(this%send_buf%buf_d, & rp*this%send_buf%offset(i), & rp*this%send_buf%ndofs(i), this%send_pe(i), & this%send_buf%reqs, i) end do - + else - + do i = 1, size(this%send_pe) call device_stream_wait_event(this%stream(i), deps, 0) #ifdef HAVE_HIP @@ -383,7 +383,7 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm) call neko_error('gs_device_mpi: no backend') #endif end do - + ! Consider adding a poll loop here once we have device_query in place do i = 1, size(this%send_pe) call device_sync(this%stream(i)) @@ -393,7 +393,7 @@ subroutine gs_device_mpi_nbsend(this, u, n, deps, strm) this%send_buf%reqs, i) end do end if - + end subroutine gs_device_mpi_nbsend !> Post non-blocking receive operations @@ -443,12 +443,12 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm) ! Syncing here seems to prevent some race condition call device_sync(strm) - + else do while(device_mpi_waitany(size(this%recv_pe), & this%recv_buf%reqs, done_req) .ne. 0) - + #ifdef HAVE_HIP call hip_gs_unpack(u_d, op, & this%recv_buf%buf_d, & @@ -456,7 +456,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm) this%recv_buf%offset(done_req), & this%recv_buf%ndofs(done_req), & this%stream(done_req)) -#elif HAVE_CUDA +#elif HAVE_CUDA call cuda_gs_unpack(u_d, op, & this%recv_buf%buf_d, & this%recv_buf%dof_d, & @@ -468,7 +468,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm) #endif call device_event_record(this%event(done_req), this%stream(done_req)) end do - + call device_mpi_waitall(size(this%send_pe), this%send_buf%reqs) ! Sync non-blocking streams @@ -476,7 +476,7 @@ subroutine gs_device_mpi_nbwait(this, u, n, op, strm) call device_stream_wait_event(strm, & this%event(done_req), 0) end do - + end if end subroutine gs_device_mpi_nbwait diff --git a/src/gs/bcknd/device/hip/gs_kernels.h b/src/gs/bcknd/device/hip/gs_kernels.h index 0825e4e1c85..2858171a48b 100644 --- a/src/gs/bcknd/device/hip/gs_kernels.h +++ b/src/gs/bcknd/device/hip/gs_kernels.h @@ -32,6 +32,9 @@ POSSIBILITY OF SUCH DAMAGE. */ +#ifndef __GS_GS_KERNELS__ +#define __GS_GS_KERNELS__ + /** * Device gather kernel for addition of data * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$ @@ -74,7 +77,7 @@ __global__ void gather_kernel_add(T * __restrict__ v, } } } - + } /** @@ -91,7 +94,7 @@ __global__ void gather_kernel_mul(T * __restrict__ v, const int * __restrict__ gd, const int nb, const int * __restrict__ b, - const int * __restrict__ bo) { + const int * __restrict__ bo) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int str = blockDim.x * gridDim.x; @@ -136,7 +139,7 @@ __global__ void gather_kernel_min(T * __restrict__ v, const int * __restrict__ gd, const int nb, const int * __restrict__ b, - const int * __restrict__ bo) { + const int * __restrict__ bo) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int str = blockDim.x * gridDim.x; @@ -164,7 +167,7 @@ __global__ void gather_kernel_min(T * __restrict__ v, } } } - + } /** @@ -181,7 +184,7 @@ __global__ void gather_kernel_max(T * __restrict__ v, const int * __restrict__ gd, const int nb, const int * __restrict__ b, - const int * __restrict__ bo) { + const int * __restrict__ bo) { const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int str = blockDim.x * gridDim.x; @@ -195,7 +198,7 @@ __global__ void gather_kernel_max(T * __restrict__ v, } v[dg[k] - 1] = tmp; } - + if (o < 0) { for (int i = ((abs(o) - 1) + idx); i < m ; i += str) { v[dg[i] - 1] = u[gd[i] - 1]; @@ -209,7 +212,7 @@ __global__ void gather_kernel_max(T * __restrict__ v, } } } - + } /** @@ -226,25 +229,25 @@ __global__ void scatter_kernel(T * __restrict__ v, const int nb, const int *__restrict__ b, const int *__restrict__ bo) { - + const int idx = blockIdx.x * blockDim.x + threadIdx.x; const int str = blockDim.x * gridDim.x; - + for (int i = idx; i < nb; i += str) { const int blk_len = b[i]; const int k = bo[i]; T tmp = v[dg[k] - 1]; for (int j = 0; j < blk_len; j++) { u[gd[k + j] - 1] = tmp; - } + } } const int facet_offset = bo[nb - 1] + b[nb - 1]; - + for (int i = ((facet_offset - 1) + idx); i < m; i += str) { u[gd[i] - 1] = v[dg[i] - 1]; } - + } template< typename T > @@ -281,3 +284,5 @@ __global__ void gs_unpack_add_kernel(T * __restrict__ u, u[idx-1] += val; } } + +#endif // __GS_GS_KERNELS__ \ No newline at end of file diff --git a/src/gs/bcknd/device/opencl/gs_kernels.cl b/src/gs/bcknd/device/opencl/gs_kernels.cl index 9d978821f25..334b2922dc5 100644 --- a/src/gs/bcknd/device/opencl/gs_kernels.cl +++ b/src/gs/bcknd/device/opencl/gs_kernels.cl @@ -32,6 +32,10 @@ POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef __GS_GS_KERNELS__ +#define __GS_GS_KERNELS__ + /** * Device gather kernel for addition of data * \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$ @@ -241,3 +245,5 @@ __kernel void scatter_kernel(__global real * __restrict__ v, } } + +#endif // __GS_GS_KERNELS__ \ No newline at end of file diff --git a/src/gs/bcknd/sx/gs_sx.f90 b/src/gs/bcknd/sx/gs_sx.f90 index 58d68215d7d..3f5535d5818 100644 --- a/src/gs/bcknd/sx/gs_sx.f90 +++ b/src/gs/bcknd/sx/gs_sx.f90 @@ -51,9 +51,9 @@ module gs_sx procedure, pass(this) :: gather => gs_gather_sx procedure, pass(this) :: scatter => gs_scatter_sx end type gs_sx_t - + contains - + !> SX backend initialisation subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) class(gs_sx_t), intent(inout) :: this @@ -61,7 +61,7 @@ subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) integer, intent(in) :: nshared integer, intent(in) :: nlcl_blks integer, intent(in) :: nshrd_blks - + call this%free() this%nlocal = nlocal @@ -69,7 +69,7 @@ subroutine gs_sx_init(this, nlocal, nshared, nlcl_blks, nshrd_blks) allocate(this%local_wrk(nlocal)) allocate(this%shared_wrk(nshared)) - + end subroutine gs_sx_init !> SX backend deallocation @@ -86,7 +86,7 @@ subroutine gs_sx_free(this) this%nlocal = 0 this%nshared = 0 - + end subroutine gs_sx_free !> Gather kernel @@ -131,9 +131,9 @@ subroutine gs_gather_sx(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) end select end associate end if - + end subroutine gs_gather_sx - + !> Gather kernel for addition of data !! \f$ v(dg(i)) = v(dg(i)) + u(gd(i)) \f$ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w) @@ -150,7 +150,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w) integer :: i real(kind=rp) :: tmp - v = 0d0 + v = 0d0 do i = 1, abs(o) - 1 w(i) = u(gd(i)) end do @@ -158,7 +158,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w) do i = 1, abs(o) - 1 v(dg(i)) = v(dg(i)) + w(i) end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -169,7 +169,7 @@ subroutine gs_gather_kernel_add(v, m, o, dg, u, n, gd, nb, b, w) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_add !> Gather kernel for multiplication of data @@ -187,7 +187,7 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w) integer, intent(in) :: o integer :: i real(kind=rp) :: tmp - + do i = 1, abs(o) - 1 w(i) = u(gd(i)) end do @@ -195,7 +195,7 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w) do i = 1, abs(o) - 1 v(dg(i)) = v(dg(i)) * w(i) end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -206,9 +206,9 @@ subroutine gs_gather_kernel_mul(v, m, o, dg, u, n, gd, nb, b, w) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_mul - + !> Gather kernel for minimum of data !! \f$ v(dg(i)) = \min(v(dg(i)), u(gd(i))) \f$ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w) @@ -232,7 +232,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w) do i = 1, abs(o) - 1 v(dg(i)) = min(v(dg(i)), w(i)) end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -243,7 +243,7 @@ subroutine gs_gather_kernel_min(v, m, o, dg, u, n, gd, nb, b, w) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_min !> Gather kernel for maximum of data @@ -269,7 +269,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, w) do i = 1, abs(o) - 1 v(dg(i)) = max(v(dg(i)), w(i)) end do - + if (o .lt. 0) then do i = abs(o), m v(dg(i)) = u(gd(i)) @@ -280,7 +280,7 @@ subroutine gs_gather_kernel_max(v, m, o, dg, u, n, gd, nb, b, w) v(dg(i)) = tmp end do end if - + end subroutine gs_gather_kernel_max !> Scatter kernel @todo Make the kernel abstract @@ -296,7 +296,7 @@ subroutine gs_scatter_sx(this, v, m, dg, u, n, gd, nb, b, shrd, event) integer, dimension(nb), intent(inout) :: b logical, intent(in) :: shrd type(c_ptr) :: event - + if (.not. shrd) then call gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, this%local_wrk) else if (shrd) then @@ -317,7 +317,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, w) integer, dimension(m), intent(inout) :: gd integer, dimension(nb), intent(inout) :: b integer :: i - + !NEC$ IVDEP do i = 1, m w(i) = v(dg(i)) @@ -327,7 +327,7 @@ subroutine gs_scatter_kernel(v, m, dg, u, n, gd, nb, b, w) do i = 1, m u(gd(i)) = w(i) end do - + end subroutine gs_scatter_kernel end module gs_sx diff --git a/src/gs/gather_scatter.f90 b/src/gs/gather_scatter.f90 index 6b8473a994e..5665d55c830 100644 --- a/src/gs/gather_scatter.f90 +++ b/src/gs/gather_scatter.f90 @@ -84,7 +84,7 @@ module gather_scatter end type gs_t public :: GS_OP_ADD, GS_OP_MUL, GS_OP_MIN, GS_OP_MAX - + contains !> Initialize a gather-scatter kernel @@ -107,9 +107,9 @@ subroutine gs_init(gs, dofmap, bcknd) call gs%free() call neko_log%section('Gather-Scatter') - + gs%dofmap => dofmap - + ! Here one could use some heuristic or autotuning to select comm method, ! such as only using device MPI when there is enough data. !use_device_mpi = NEKO_DEVICE_MPI .and. gs%nshared .gt. 20000 @@ -130,17 +130,17 @@ subroutine gs_init(gs, dofmap, bcknd) glb_nlocal = int(gs%nlocal, i8) glb_nshared = int(gs%nshared, i8) - + if (pe_rank .eq. 0) then call MPI_Reduce(MPI_IN_PLACE, glb_nlocal, 1, & MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr) - + call MPI_Reduce(MPI_IN_PLACE, glb_nshared, 1, & MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr) else call MPI_Reduce(glb_nlocal, glb_nlocal, 1, & MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr) - + call MPI_Reduce(glb_nshared, glb_nshared, 1, & MPI_INTEGER8, MPI_SUM, 0, NEKO_COMM, ierr) end if @@ -149,7 +149,7 @@ subroutine gs_init(gs, dofmap, bcknd) call neko_log%message(log_buf) write(log_buf, '(A,I12)') 'Avg. external: ', glb_nshared/pe_size call neko_log%message(log_buf) - + if (present(bcknd)) then bcknd_ = bcknd else @@ -185,7 +185,7 @@ subroutine gs_init(gs, dofmap, bcknd) write(log_buf, '(A)') 'Backend : ' // trim(bcknd_str) call neko_log%message(log_buf) - + call gs%bcknd%init(gs%nlocal, gs%nshared, gs%nlocal_blks, gs%nshared_blks) @@ -201,14 +201,15 @@ subroutine gs_init(gs, dofmap, bcknd) select type(c => gs%comm) type is (gs_device_mpi_t) call get_environment_variable("NEKO_GS_STRTGY", env_strtgy, env_len) - if (env_len .eq. 0) then + if (env_len .eq. 0) then allocate(tmp(dofmap%size())) call device_map(tmp, tmp_d, dofmap%size()) tmp = 1.0_rp - call device_memcpy(tmp, tmp_d, dofmap%size(), HOST_TO_DEVICE) + call device_memcpy(tmp, tmp_d, dofmap%size(), & + HOST_TO_DEVICE, sync=.false.) call gs_op_vector(gs, tmp, dofmap%size(), GS_OP_ADD) - - do i = 1, size(strtgy) + + do i = 1, size(strtgy) c%nb_strtgy = strtgy(i) call device_sync call MPI_Barrier(NEKO_COMM) @@ -218,13 +219,13 @@ subroutine gs_init(gs, dofmap, bcknd) end do strtgy_time(i) = (MPI_Wtime() - strtgy_time(i)) / 100d0 end do - + call device_deassociate(tmp) call device_free(tmp_d) deallocate(tmp) - + c%nb_strtgy = strtgy(minloc(strtgy_time, 1)) - + avg_strtgy = minloc(strtgy_time, 1) call MPI_Allreduce(MPI_IN_PLACE, avg_strtgy, 1, & MPI_INTEGER, MPI_SUM, NEKO_COMM) @@ -239,22 +240,22 @@ subroutine gs_init(gs, dofmap, bcknd) if (i .lt. 1 .or. i .gt. 4) then call neko_error('Invalid gs sync strtgy') end if - + c%nb_strtgy = strtgy(i) avg_strtgy = i write(log_buf, '(A,B0.2,A)') 'Env. strtgy : [', & strtgy(avg_strtgy),']' end if - - call neko_log%message(log_buf) + + call neko_log%message(log_buf) end select end if end if call neko_log%end_section() - + end subroutine gs_init !> Deallocate a gather-scatter kernel @@ -266,7 +267,7 @@ subroutine gs_free(gs) if (allocated(gs%local_gs)) then deallocate(gs%local_gs) end if - + if (allocated(gs%local_dof_gs)) then deallocate(gs%local_dof_gs) end if @@ -282,7 +283,7 @@ subroutine gs_free(gs) if (allocated(gs%shared_gs)) then deallocate(gs%shared_gs) end if - + if (allocated(gs%shared_dof_gs)) then deallocate(gs%shared_dof_gs) end if @@ -311,7 +312,7 @@ subroutine gs_free(gs) call gs%comm%free() deallocate(gs%comm) end if - + end subroutine gs_free !> Setup mapping of dofs to gather-scatter operations @@ -339,24 +340,24 @@ subroutine gs_init_mapping(gs) !>@note this might be a bit overkill, !!but having many collisions makes the init take too long. call sdm%init(dofmap%size(), i) - + call local_dof%init() call dof_local%init() call local_face_dof%init() - call face_dof_local%init() - + call face_dof_local%init() + call shared_dof%init() call dof_shared%init() - + call shared_face_dof%init() - call face_dof_shared%init() + call face_dof_shared%init() ! ! Setup mapping for dofs points ! - + max_id = 0 max_sid = 0 do i = 1, msh%nelv @@ -496,7 +497,7 @@ subroutine gs_init_mapping(gs) id = linear_index(j, ly, 1, i, lx, ly, lz) call dof_shared%push(id) end do - + else do j = 2, lx - 1 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id) @@ -595,7 +596,7 @@ subroutine gs_init_mapping(gs) id = linear_index(1, 1, l, i, lx, ly, lz) call dof_shared%push(id) end do - else + else do l = 2, lz - 1 id = gs_mapping_add_dof(dm, dofmap%dof(1, 1, l, i), max_id) call local_dof%push(id) @@ -603,7 +604,7 @@ subroutine gs_init_mapping(gs) call dof_local%push(id) end do end if - + if (dofmap%shared_dof(lx, 1, 2, i)) then do l = 2, lz - 1 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, 1, l, i), max_sid) @@ -635,14 +636,14 @@ subroutine gs_init_mapping(gs) call dof_local%push(id) end do end if - + if (dofmap%shared_dof(lx, ly, 2, i)) then do l = 2, lz - 1 id = gs_mapping_add_dof(sdm, dofmap%dof(lx, ly, l, i), max_sid) call shared_dof%push(id) id = linear_index(lx, ly, l, i, lx, ly, lz) call dof_shared%push(id) - end do + end do else do l = 2, lz - 1 id = gs_mapping_add_dof(dm, dofmap%dof(lx, ly, l, i), max_id) @@ -688,7 +689,7 @@ subroutine gs_init_mapping(gs) id = linear_index(j, ly, 1, i, lx, ly, lz) call face_dof_shared%push(id) end do - + else do j = 2, lx - 1 id = gs_mapping_add_dof(dm, dofmap%dof(j, ly, 1, i), max_id) @@ -738,7 +739,7 @@ subroutine gs_init_mapping(gs) end if end if end do - else + else do i = 1, msh%nelv ! Facets in x-direction (s, t)-plane @@ -785,7 +786,7 @@ subroutine gs_init_mapping(gs) end do end if end if - + ! Facets in y-direction (r, t)-plane if (msh%facet_neigh(3, i) .ne. 0) then if (dofmap%shared_dof(2, 1, 2, i)) then @@ -830,7 +831,7 @@ subroutine gs_init_mapping(gs) end do end if end if - + ! Facets in z-direction (r, s)-plane if (msh%facet_neigh(5, i) .ne. 0) then if (dofmap%shared_dof(2, 2, 1, i)) then @@ -877,13 +878,13 @@ subroutine gs_init_mapping(gs) end if end do end if - + call dm%free() - + gs%nlocal = local_dof%size() + local_face_dof%size() gs%local_facet_offset = local_dof%size() + 1 - + ! Finalize local dof to gather-scatter index allocate(gs%local_dof_gs(gs%nlocal)) @@ -941,15 +942,15 @@ subroutine gs_init_mapping(gs) end do end select call face_dof_local%free() - + call gs_qsort_dofmap(gs%local_dof_gs, gs%local_gs_dof, & gs%nlocal, 1, gs%nlocal) - + call gs_find_blks(gs%local_dof_gs, gs%local_blk_len, & gs%nlocal_blks, gs%nlocal, gs%local_facet_offset) - + ! Allocate buffer for local gs-ops - allocate(gs%local_gs(gs%nlocal)) + allocate(gs%local_gs(gs%nlocal)) gs%nshared = shared_dof%size() + shared_face_dof%size() gs%shared_facet_offset = shared_dof%size() + 1 @@ -983,11 +984,11 @@ subroutine gs_init_mapping(gs) end do end select call shared_face_dof%free() - + ! Finalize shared gather-scatter index to dof allocate(gs%shared_gs_dof(gs%nshared)) - ! Add dofs on points and edges + ! Add dofs on points and edges ! We should use the %array() procedure, which works great for ! GNU, Intel and NEC, but it breaks horribly on Cray when using @@ -1022,9 +1023,9 @@ subroutine gs_init_mapping(gs) call gs_find_blks(gs%shared_dof_gs, gs%shared_blk_len, & gs%nshared_blks, gs%nshared, gs%shared_facet_offset) end if - + contains - + !> Register a unique dof function gs_mapping_add_dof(map_, dof, max_id) result(id) type(htable_i8_t), intent(inout) :: map_ @@ -1037,7 +1038,7 @@ function gs_mapping_add_dof(map_, dof, max_id) result(id) call map_%set(dof, max_id) id = max_id end if - + end function gs_mapping_add_dof !> Sort the dof lists based on the dof to gather-scatter list @@ -1052,16 +1053,16 @@ recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi) j = hi + 1 pivot = dg((lo + hi) / 2) do - do + do i = i + 1 if (dg(i) .ge. pivot) exit end do - - do + + do j = j - 1 if (dg(j) .le. pivot) exit end do - + if (i .lt. j) then tmp = dg(i) dg(i) = dg(j) @@ -1076,10 +1077,10 @@ recursive subroutine gs_qsort_dofmap(dg, gd, n, lo, hi) else exit end if - end do + end do if (lo .lt. j) call gs_qsort_dofmap(dg, gd, n, lo, j) if (i .lt. hi) call gs_qsort_dofmap(dg, gd, n, i, hi) - + end subroutine gs_qsort_dofmap !> Find blocks sharing dofs in non-facet data @@ -1092,7 +1093,7 @@ subroutine gs_find_blks(dg, blk_len, nblks, n, m) integer :: i, j integer :: id, count type(stack_i4_t), target :: blks - + call blks%init() i = 1 do while( i .lt. m) @@ -1116,7 +1117,7 @@ subroutine gs_find_blks(dg, blk_len, nblks, n, m) end do end select call blks%free() - + end subroutine gs_find_blks end subroutine gs_init_mapping @@ -1135,7 +1136,7 @@ subroutine gs_schedule(gs) integer :: nshared_unique nshared_unique = gs%shared_dofs%num_entries() - + call it%init(gs%shared_dofs) allocate(send_buf(nshared_unique)) i = 1 @@ -1146,7 +1147,7 @@ subroutine gs_schedule(gs) call send_pe%init() call recv_pe%init() - + ! ! Schedule exchange of shared dofs @@ -1163,7 +1164,7 @@ subroutine gs_schedule(gs) do i = 1, size(gs%dofmap%msh%neigh_order) src = modulo(pe_rank - gs%dofmap%msh%neigh_order(i) + pe_size, pe_size) dst = modulo(pe_rank + gs%dofmap%msh%neigh_order(i), pe_size) - + if (gs%dofmap%msh%neigh(src)) then call MPI_Irecv(recv_buf, max_recv, MPI_INTEGER8, & src, 0, NEKO_COMM, recv_req, ierr) @@ -1177,7 +1178,7 @@ subroutine gs_schedule(gs) if (gs%dofmap%msh%neigh(src)) then call MPI_Wait(recv_req, status, ierr) call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr) - + do j = 1, n_recv shared_flg(j) = gs%shared_dofs%get(recv_buf(j), shared_gs_id) if (shared_flg(j) .eq. 0) then @@ -1185,7 +1186,7 @@ subroutine gs_schedule(gs) call gs%comm%recv_dof(src)%push(shared_gs_id) end if end do - + if (gs%comm%recv_dof(src)%size() .gt. 0) then call recv_pe%push(src) end if @@ -1194,7 +1195,7 @@ subroutine gs_schedule(gs) if (gs%dofmap%msh%neigh(dst)) then call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr) call MPI_Irecv(recv_flg, max_recv, MPI_INTEGER2, & - dst, 0, NEKO_COMM, recv_req, ierr) + dst, 0, NEKO_COMM, recv_req, ierr) end if if (gs%dofmap%msh%neigh(src)) then @@ -1205,10 +1206,10 @@ subroutine gs_schedule(gs) if (gs%dofmap%msh%neigh(dst)) then call MPI_Wait(recv_req, status, ierr) call MPI_Get_count(status, MPI_INTEGER2, n_recv, ierr) - + do j = 1, n_recv if (recv_flg(j) .eq. 0) then - tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id) + tmp = gs%shared_dofs%get(send_buf(j), shared_gs_id) !> @todo don't touch others data... call gs%comm%send_dof(dst)%push(shared_gs_id) end if @@ -1222,18 +1223,18 @@ subroutine gs_schedule(gs) if (gs%dofmap%msh%neigh(src)) then call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr) end if - + end do call gs%comm%init(send_pe, recv_pe) - + call send_pe%free() call recv_pe%free() deallocate(send_buf) deallocate(recv_flg) deallocate(shared_flg) - !This arrays seems to take massive amounts of memory... + !This arrays seems to take massive amounts of memory... call gs%shared_dofs%free() end subroutine gs_schedule @@ -1244,21 +1245,21 @@ subroutine gs_op_fld(gs, u, op, event) type(field_t), intent(inout) :: u type(c_ptr), optional, intent(inout) :: event integer :: n, op - + n = u%msh%nelv * u%Xh%lx * u%Xh%ly * u%Xh%lz if (present(event)) then call gs_op_vector(gs, u%x, n, op, event) else call gs_op_vector(gs, u%x, n, op) end if - + end subroutine gs_op_fld - + !> Gather-scatter operation on a rank 4 array subroutine gs_op_r4(gs, u, n, op, event) class(gs_t), intent(inout) :: gs integer, intent(in) :: n - real(kind=rp), dimension(:,:,:,:), intent(inout) :: u + real(kind=rp), contiguous, dimension(:,:,:,:), intent(inout) :: u type(c_ptr), optional, intent(inout) :: event integer :: op @@ -1267,9 +1268,9 @@ subroutine gs_op_r4(gs, u, n, op, event) else call gs_op_vector(gs, u, n, op) end if - + end subroutine gs_op_r4 - + !> Gather-scatter operation on a vector @a u with op @a op subroutine gs_op_vector(gs, u, n, op, event) class(gs_t), intent(inout) :: gs @@ -1277,31 +1278,31 @@ subroutine gs_op_vector(gs, u, n, op, event) real(kind=rp), dimension(n), intent(inout) :: u type(c_ptr), optional, intent(inout) :: event integer :: m, l, op, lo, so - + lo = gs%local_facet_offset so = -gs%shared_facet_offset m = gs%nlocal l = gs%nshared - call profiler_start_region("gather-scatter") + call profiler_start_region("gather-scatter", 5) ! Gather shared dofs if (pe_size .gt. 1) then - call profiler_start_region("gs_nbrecv") + call profiler_start_region("gs_nbrecv", 13) call gs%comm%nbrecv() call profiler_end_region - call profiler_start_region("gs_gather_shared") + call profiler_start_region("gs_gather_shared", 14) call gs%bcknd%gather(gs%shared_gs, l, so, gs%shared_dof_gs, u, n, & gs%shared_gs_dof, gs%nshared_blks, gs%shared_blk_len, op, .true.) call profiler_end_region - call profiler_start_region("gs_nbsend") + call profiler_start_region("gs_nbsend", 6) call gs%comm%nbsend(gs%shared_gs, l, & gs%bcknd%gather_event, gs%bcknd%gs_stream) call profiler_end_region - + end if - + ! Gather-scatter local dofs - call profiler_start_region("gs_local") + call profiler_start_region("gs_local", 12) call gs%bcknd%gather(gs%local_gs, m, lo, gs%local_dof_gs, u, n, & gs%local_gs_dof, gs%nlocal_blks, gs%local_blk_len, op, .false.) call gs%bcknd%scatter(gs%local_gs, m, gs%local_dof_gs, u, n, & @@ -1309,10 +1310,10 @@ subroutine gs_op_vector(gs, u, n, op, event) call profiler_end_region ! Scatter shared dofs if (pe_size .gt. 1) then - call profiler_start_region("gs_nbwait") + call profiler_start_region("gs_nbwait", 7) call gs%comm%nbwait(gs%shared_gs, l, op, gs%bcknd%gs_stream) call profiler_end_region - call profiler_start_region("gs_scatter_shared") + call profiler_start_region("gs_scatter_shared", 15) if (present(event)) then call gs%bcknd%scatter(gs%shared_gs, l,& gs%shared_dof_gs, u, n, & @@ -1328,7 +1329,7 @@ subroutine gs_op_vector(gs, u, n, op, event) end if call profiler_end_region - + end subroutine gs_op_vector - + end module gather_scatter diff --git a/src/gs/gs_bcknd.f90 b/src/gs/gs_bcknd.f90 index 8d65a6a2dbe..efad3707b0e 100644 --- a/src/gs/gs_bcknd.f90 +++ b/src/gs/gs_bcknd.f90 @@ -39,7 +39,7 @@ module gs_bcknd integer, public, parameter :: GS_BCKND_CPU = 1, GS_BCKND_SX = 2, & GS_BCKND_DEV = 3 - + !> Gather-scatter backend type, public, abstract :: gs_bcknd_t type(c_ptr) :: gather_event = C_NULL_PTR @@ -76,7 +76,7 @@ end subroutine gs_backend_free !! \f$ v(dg(i)) = op(v(dg(i)), u(gd(i)) \f$ abstract interface subroutine gs_gather(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) - import gs_bcknd_t + import gs_bcknd_t import rp integer, intent(in) :: m integer, intent(in) :: n @@ -89,10 +89,10 @@ subroutine gs_gather(this, v, m, o, dg, u, n, gd, nb, b, op, shrd) integer, dimension(nb), intent(inout) :: b integer, intent(in) :: o integer, intent(in) :: op - logical, intent(in) :: shrd + logical, intent(in) :: shrd end subroutine gs_gather end interface - + !> Abstract interface for the Scatter kernel !! \f$ u(gd(i) = v(dg(i)) \f$ abstract interface @@ -103,7 +103,7 @@ subroutine gs_scatter(this, v, m, dg, u, n, gd, nb, b, shrd, event) integer, intent(in) :: m integer, intent(in) :: n integer, intent(in) :: nb - class(gs_bcknd_t), intent(inout) :: this + class(gs_bcknd_t), intent(inout) :: this real(kind=rp), dimension(m), intent(inout) :: v integer, dimension(m), intent(inout) :: dg real(kind=rp), dimension(n), intent(inout) :: u @@ -114,5 +114,5 @@ subroutine gs_scatter(this, v, m, dg, u, n, gd, nb, b, shrd, event) end subroutine gs_scatter end interface - + end module gs_bcknd diff --git a/src/gs/gs_comm.f90 b/src/gs/gs_comm.f90 index 398c78ac423..f3c573e823e 100644 --- a/src/gs/gs_comm.f90 +++ b/src/gs/gs_comm.f90 @@ -35,7 +35,7 @@ module gs_comm use num_types, only : rp use comm, only : pe_size use stack, only : stack_i4_t - use, intrinsic :: iso_c_binding + use, intrinsic :: iso_c_binding implicit none private @@ -63,10 +63,10 @@ module gs_comm abstract interface subroutine gs_comm_init(this, send_pe, recv_pe) import gs_comm_t - import stack_i4_t + import stack_i4_t class(gs_comm_t), intent(inout) :: this type(stack_i4_t), intent(inout) :: send_pe - type(stack_i4_t), intent(inout) :: recv_pe + type(stack_i4_t), intent(inout) :: recv_pe end subroutine gs_comm_init end interface @@ -132,7 +132,7 @@ subroutine init_dofs(this) call this%send_dof(i)%init() call this%recv_dof(i)%init() end do - + end subroutine init_dofs subroutine free_dofs(this) @@ -152,7 +152,7 @@ subroutine free_dofs(this) end do deallocate(this%recv_dof) end if - + end subroutine free_dofs subroutine init_order(this, send_pe, recv_pe) @@ -161,7 +161,7 @@ subroutine init_order(this, send_pe, recv_pe) type(stack_i4_t), intent(inout) :: recv_pe integer, pointer :: sp(:) integer :: i - + allocate(this%send_pe(send_pe%size())) sp => send_pe%array() @@ -175,7 +175,7 @@ subroutine init_order(this, send_pe, recv_pe) do i = 1, recv_pe%size() this%recv_pe(i) = sp(i) end do - + end subroutine init_order subroutine free_order(this) @@ -188,7 +188,7 @@ subroutine free_order(this) if (allocated(this%recv_pe)) then deallocate(this%recv_pe) end if - + end subroutine free_order - + end module gs_comm diff --git a/src/gs/gs_mpi.f90 b/src/gs/gs_mpi.f90 index c4e4c84e485..a6e358be1ab 100644 --- a/src/gs/gs_mpi.f90 +++ b/src/gs/gs_mpi.f90 @@ -30,7 +30,7 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Defines MPI gather-scatter communication +!> Defines MPI gather-scatter communication module gs_mpi use neko_config use num_types @@ -38,18 +38,18 @@ module gs_mpi use gs_ops use stack, only : stack_i4_t use comm - use, intrinsic :: iso_c_binding + use, intrinsic :: iso_c_binding !$ use omp_lib implicit none private - + !> MPI buffer for non-blocking operations type, private :: gs_comm_mpi_t type(MPI_Status) :: status type(MPI_Request) :: request logical :: flag real(kind=rp), allocatable :: data(:) - end type gs_comm_mpi_t + end type gs_comm_mpi_t !> Gather-scatter communication using MPI type, public, extends(gs_comm_t) :: gs_mpi_t @@ -69,10 +69,10 @@ module gs_mpi subroutine gs_mpi_init(this, send_pe, recv_pe) class(gs_mpi_t), intent(inout) :: this type(stack_i4_t), intent(inout) :: send_pe - type(stack_i4_t), intent(inout) :: recv_pe + type(stack_i4_t), intent(inout) :: recv_pe integer, pointer :: sp(:), rp(:) integer :: i - + call this%init_order(send_pe, recv_pe) allocate(this%send_buf(send_pe%size())) @@ -116,7 +116,7 @@ subroutine gs_mpi_free(this) call this%free_order() call this%free_dofs() - + end subroutine gs_mpi_free !> Post non-blocking send operations @@ -130,8 +130,8 @@ subroutine gs_nbsend_mpi(this, u, n, deps, strm) integer , pointer :: sp(:) thrdid = 0 - !$ thrdid = omp_get_thread_num() - +!$ thrdid = omp_get_thread_num() + do i = 1, size(this%send_pe) dst = this%send_pe(i) sp => this%send_dof(dst)%array() @@ -157,7 +157,7 @@ subroutine gs_nbrecv_mpi(this) thrdid = 0 !$ thrdid = omp_get_thread_num() - + do i = 1, size(this%recv_pe) ! We should not need this extra associate block, ant it works ! great without it for GNU, Intel, NEC and Cray, but throws an @@ -169,13 +169,13 @@ subroutine gs_nbrecv_mpi(this) end associate this%recv_buf(i)%flag = .false. end do - + end subroutine gs_nbrecv_mpi !> Wait for non-blocking operations subroutine gs_nbwait_mpi(this, u, n, op, strm) class(gs_mpi_t), intent(inout) :: this - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: u type(c_ptr), intent(inout) :: strm integer :: i, j, src, ierr @@ -185,7 +185,7 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm) nreqs = size(this%recv_pe) - do while (nreqs .gt. 0) + do while (nreqs .gt. 0) do i = 1, size(this%recv_pe) if (.not. this%recv_buf(i)%flag) then call MPI_Test(this%recv_buf(i)%request, this%recv_buf(i)%flag, & @@ -223,7 +223,7 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm) end do nreqs = size(this%send_pe) - do while (nreqs .gt. 0) + do while (nreqs .gt. 0) do i = 1, size(this%send_pe) if (.not. this%send_buf(i)%flag) then call MPI_Test(this%send_buf(i)%request, this%send_buf(i)%flag, & @@ -234,5 +234,5 @@ subroutine gs_nbwait_mpi(this, u, n, op, strm) end do end subroutine gs_nbwait_mpi - + end module gs_mpi diff --git a/src/gs/gs_ops.f90 b/src/gs/gs_ops.f90 index 1b5afa6a093..c7afde7c5dc 100644 --- a/src/gs/gs_ops.f90 +++ b/src/gs/gs_ops.f90 @@ -35,5 +35,5 @@ module gs_ops integer, public, parameter :: GS_OP_ADD = 1, GS_OP_MUL = 2, & GS_OP_MIN = 3, GS_OP_MAX = 4 - + end module gs_ops diff --git a/src/io/chkp_file.f90 b/src/io/chkp_file.f90 index a45c535244b..0cf37306457 100644 --- a/src/io/chkp_file.f90 +++ b/src/io/chkp_file.f90 @@ -35,7 +35,7 @@ module chkp_file use generic_file use field_series - use checkpoint + use checkpoint use num_types use field use dofmap, only: dofmap_t @@ -65,7 +65,7 @@ module chkp_file end type chkp_file_t contains - + !> Write a Neko checkpoint subroutine chkp_file_write(this, data, t) class(chkp_file_t), intent(inout) :: this @@ -98,7 +98,7 @@ subroutine chkp_file_write(this, data, t) else time = 0d0 end if - + select type(data) type is (chkp_t) @@ -108,16 +108,16 @@ subroutine chkp_file_write(this, data, t) .not. associated(data%p) ) then call neko_error('Checkpoint not initialized') end if - + u => data%u v => data%v w => data%w p => data%p msh => u%msh - + optional_fields = 0 - if (associated(data%ulag)) then + if (associated(data%ulag)) then ulag => data%ulag vlag => data%vlag wlag => data%wlag @@ -126,23 +126,23 @@ subroutine chkp_file_write(this, data, t) else write_lag = .false. end if - - if (associated(data%s)) then + + if (associated(data%s)) then s => data%s write_scalar = .true. optional_fields = optional_fields + 2 else write_scalar = .false. - end if + end if - if (associated(data%tlag)) then + if (associated(data%tlag)) then tlag => data%tlag dtlag => data%dtlag write_dtlag = .true. optional_fields = optional_fields + 4 else write_dtlag = .false. - end if + end if write_abvel = .false. if (associated(data%abx1)) then abx1 => data%abx1 @@ -166,7 +166,7 @@ subroutine chkp_file_write(this, data, t) call neko_error('Invalid data') end select - + suffix_pos = filename_suffix_pos(this%fname) write(id_str, '(i5.5)') this%counter fname = trim(this%fname(1:suffix_pos-1))//id_str//'.chkp' @@ -174,7 +174,7 @@ subroutine chkp_file_write(this, data, t) dof_offset = int(msh%offset_el, i8) * int(u%Xh%lx * u%Xh%ly * u%Xh%lz, i8) n_glb_dofs = int(u%Xh%lx * u%Xh%ly * u%Xh%lz, i8) * int(msh%glb_nelv, i8) - + call MPI_File_open(NEKO_COMM, trim(fname), & MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr) call MPI_File_write_all(fh, msh%glb_nelv, 1, MPI_INTEGER, status, ierr) @@ -182,21 +182,22 @@ subroutine chkp_file_write(this, data, t) call MPI_File_write_all(fh, u%Xh%lx, 1, MPI_INTEGER, status, ierr) call MPI_File_write_all(fh, optional_fields, 1, MPI_INTEGER, status, ierr) call MPI_File_write_all(fh, time, 1, MPI_DOUBLE_PRECISION, status, ierr) - - + + ! ! Dump mandatory checkpoint data ! - + + byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) byte_offset = byte_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call MPI_File_write_at_all(fh, byte_offset,u%x, u%dof%size(), & MPI_REAL_PRECISION, status, ierr) - mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) mpi_offset = mpi_offset +& n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) - + byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call MPI_File_write_at_all(fh, byte_offset, v%x, v%dof%size(), & @@ -259,10 +260,10 @@ subroutine chkp_file_write(this, data, t) end associate mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end do - + end if - if (write_scalar) then + if (write_scalar) then byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call MPI_File_write_at_all(fh, byte_offset, s%x, p%dof%size(), & @@ -323,7 +324,7 @@ subroutine chkp_file_write(this, data, t) end associate mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end do - + byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call MPI_File_write_at_all(fh, byte_offset, abs1%x, abx1%dof%size(), & @@ -335,13 +336,13 @@ subroutine chkp_file_write(this, data, t) MPI_REAL_PRECISION, status, ierr) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if - + call MPI_File_close(fh, ierr) this%counter = this%counter + 1 - + end subroutine chkp_file_write - + !> Load a checkpoint from file subroutine chkp_file_read(this, data) class(chkp_file_t) :: this @@ -375,10 +376,11 @@ subroutine chkp_file_read(this, data) real(kind=rp) :: center_x, center_y, center_z integer :: i, e type(dofmap_t) :: dof + + call this%check_exists() - select type(data) - type is (chkp_t) + type is (chkp_t) if ( .not. associated(data%u) .or. & .not. associated(data%v) .or. & @@ -386,7 +388,7 @@ subroutine chkp_file_read(this, data) .not. associated(data%p) ) then call neko_error('Checkpoint not initialized') end if - + u => data%u v => data%v w => data%w @@ -399,9 +401,9 @@ subroutine chkp_file_read(this, data) else !< The checkpoint was written on the same mesh msh => u%msh this%mesh2mesh = .false. - end if + end if - if (associated(data%ulag)) then + if (associated(data%ulag)) then ulag => data%ulag vlag => data%vlag wlag => data%wlag @@ -410,13 +412,13 @@ subroutine chkp_file_read(this, data) read_lag = .false. end if - if (associated(data%s)) then + if (associated(data%s)) then s => data%s read_scalar = .true. else read_scalar = .false. end if - if (associated(data%dtlag)) then + if (associated(data%dtlag)) then dtlag => data%dtlag tlag => data%tlag read_dtlag = .true. @@ -442,13 +444,13 @@ subroutine chkp_file_read(this, data) end if chkp => data - + class default call neko_error('Invalid data') end select - + call MPI_File_open(NEKO_COMM, trim(this%fname), & MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr) call MPI_File_read_all(fh, glb_nelv, 1, MPI_INTEGER, status, ierr) @@ -509,23 +511,23 @@ subroutine chkp_file_read(this, data) deallocate(y_coord) deallocate(z_coord) else - call this%space_interp%init(this%sim_Xh, this%chkp_Xh) + call this%space_interp%init(this%sim_Xh, this%chkp_Xh) end if dof_offset = int(msh%offset_el, i8) * int(this%chkp_Xh%lxyz, i8) - n_glb_dofs = int(this%chkp_Xh%lxyz, i8) * int(msh%glb_nelv, i8) - + n_glb_dofs = int(this%chkp_Xh%lxyz, i8) * int(msh%glb_nelv, i8) + ! ! Read mandatory checkpoint data ! - + byte_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) byte_offset = byte_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call this%read_field(fh, byte_offset, u%x, nel) - mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) + mpi_offset = 4_i8 * int(MPI_INTEGER_SIZE,i8) + int(MPI_DOUBLE_PRECISION_SIZE,i8) mpi_offset = mpi_offset +& n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) - + byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call this%read_field(fh, byte_offset, v%x, nel) @@ -559,7 +561,7 @@ subroutine chkp_file_read(this, data) call this%read_field(fh, byte_offset, vlag%lf(i)%x, nel) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end do - + do i = 1, wlag%size() byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) @@ -568,21 +570,21 @@ subroutine chkp_file_read(this, data) end do end if - if (read_scalar) then + if (read_scalar) then byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call this%read_field(fh, byte_offset, s%x, nel) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if - if (read_dtlag .and. have_dtlag .eq. 1) then + if (read_dtlag .and. have_dtlag .eq. 1) then call MPI_File_read_at_all(fh, mpi_offset, tlag, 10, MPI_REAL_PRECISION, status, ierr) mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) call MPI_File_read_at_all(fh, mpi_offset, dtlag, 10, MPI_REAL_PRECISION, status, ierr) mpi_offset = mpi_offset + 10_i8 * int(MPI_REAL_PREC_SIZE, i8) end if - if (read_abvel .and. have_abvel .eq. 1) then + if (read_abvel .and. have_abvel .eq. 1) then byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) call this%read_field(fh, byte_offset, abx1%x, nel) @@ -608,7 +610,7 @@ subroutine chkp_file_read(this, data) call this%read_field(fh, byte_offset, abz2%x, nel) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if - if (read_scalarlag .and. have_scalarlag .eq. 1) then + if (read_scalarlag .and. have_scalarlag .eq. 1) then do i = 1, slag%size() byte_offset = mpi_offset + & dof_offset * int(MPI_REAL_PREC_SIZE, i8) @@ -625,7 +627,7 @@ subroutine chkp_file_read(this, data) mpi_offset = mpi_offset + n_glb_dofs * int(MPI_REAL_PREC_SIZE, i8) end if - call MPI_File_close(fh, ierr) + call MPI_File_close(fh, ierr) call this%global_interp%free() call this%space_interp%free() @@ -643,7 +645,7 @@ subroutine chkp_read_field(this, fh, byte_offset, x, nel) type(MPI_Status) :: status integer :: ierr - allocate(read_array(this%chkp_Xh%lxyz*nel)) + allocate(read_array(this%chkp_Xh%lxyz*nel)) call rzero(read_array,this%chkp_xh%lxyz*nel) call MPI_File_read_at_all(fh, byte_offset, read_array, & @@ -657,5 +659,5 @@ subroutine chkp_read_field(this, fh, byte_offset, x, nel) end if deallocate(read_array) end subroutine chkp_read_field - + end module chkp_file diff --git a/src/io/chkp_output.f90 b/src/io/chkp_output.f90 index ec765ec35b1..8302348eeda 100644 --- a/src/io/chkp_output.f90 +++ b/src/io/chkp_output.f90 @@ -34,7 +34,7 @@ module chkp_output use checkpoint, only : chkp_t use output - use num_types, only : rp + use num_types, only : rp implicit none type, public, extends(output_t) :: chkp_output_t @@ -46,7 +46,7 @@ module chkp_output interface chkp_output_t module procedure chkp_output_init end interface chkp_output_t - + contains function chkp_output_init(chkp, name, path) result(this) @@ -66,7 +66,7 @@ function chkp_output_init(chkp, name, path) result(this) fname = 'fluid.chkp' end if - call output_init(this, fname) + call this%init_base(fname) this%chkp => chkp end function chkp_output_init @@ -79,5 +79,5 @@ subroutine chkp_output_sample(this, t) call this%file_%write(this%chkp, t) end subroutine chkp_output_sample - + end module chkp_output diff --git a/src/io/csv_file.f90 b/src/io/csv_file.f90 old mode 100755 new mode 100644 index d177a624a87..3bfca41158f --- a/src/io/csv_file.f90 +++ b/src/io/csv_file.f90 @@ -68,29 +68,32 @@ subroutine csv_file_write(this, data, t) real(kind=rp), intent(in), optional :: t real(kind=rp) :: time - type(vector_t), pointer :: vec => null() - type(matrix_t), pointer :: mat => null() + type(vector_t), pointer :: vec + type(matrix_t), pointer :: mat + + nullify(vec) + nullify(mat) select type(data) type is (vector_t) if (.not. allocated(data%x)) then call neko_error("Vector is not allocated! Use & - &vector%init() to associate your array & - &with a vector_t object") + &vector%init() to associate your array & + &with a vector_t object") end if vec => data type is (matrix_t) if (.not. allocated(data%x)) then call neko_error("Matrix is not allocated! Use & - &matrix%init() to associate your array & - &with a matrix_t object") + &matrix%init() to associate your array & + &with a matrix_t object") end if mat => data class default call neko_error("Invalid data. Expected vector_t or & - &matrix_t") + &matrix_t") end select ! Write is performed on rank 0 @@ -177,30 +180,35 @@ end subroutine csv_file_write_matrix subroutine csv_file_read(this, data) class(csv_file_t) :: this class(*), target, intent(inout) :: data - type(vector_t), pointer :: vec => null() - type(matrix_t), pointer :: mat => null() + type(vector_t), pointer :: vec + type(matrix_t), pointer :: mat + + call this%check_exists() + + nullify(vec) + nullify(mat) select type(data) type is (vector_t) vec => data if (.not. allocated(data%x)) then call neko_error("Vector is not allocated! Use & - &vector%init() to associate your array & - &with a vector_t object") + &vector%init() to associate your array & + &with a vector_t object") end if type is (matrix_t) mat => data if (.not. allocated(data%x)) then call neko_error("Matrix is not allocated! Use & - &matrix%init() to associate your array & - &with a matrix_t object") + &matrix%init() to associate your array & + &with a matrix_t object") end if class default call neko_error("Invalid data type for csv_file (expected: vector_t, & - &matrix_t)") + &matrix_t)") end select if (pe_rank .eq. 0) then @@ -301,7 +309,7 @@ function csv_file_count_lines(this) result(n) rewind(file_unit) n = 0 - + ! Keep reading (ierr = 0) until we reach the end (ierr != 0) do read (file_unit,*,iostat=ierr) diff --git a/src/io/data_streamer.F90 b/src/io/data_streamer.F90 index 820263b70ef..2407237f70f 100644 --- a/src/io/data_streamer.F90 +++ b/src/io/data_streamer.F90 @@ -36,24 +36,24 @@ module data_streamer use field, only: field_t use coefs, only: coef_t use utils, only: neko_warning - use device + use device use comm use neko_mpi_types use neko_config use, intrinsic :: iso_c_binding implicit none private - + !> Provides access to data streaming by interfacing with c++ !! ADIOS2 subroutines. !! @details - !! Adios2 is an API that allows for easy coupling of codes + !! Adios2 is an API that allows for easy coupling of codes !! through data streaming and gives the posibility to perform !! other IO operations such as data compression, etc. - !! This type wraps and interfaces the needed calls to allow + !! This type wraps and interfaces the needed calls to allow !! the use of the c++ routines that ultimately expose the data !! from neko to any executable that counts with a proper reader. - type, public :: data_streamer_t + type, public :: data_streamer_t !> Define if the execution is asyncrhonous integer :: if_asynch !> global element numbers @@ -65,6 +65,8 @@ module data_streamer procedure, pass(this) :: free => data_streamer_free !> Stream data procedure, pass(this) :: stream => data_streamer_stream + !> Stream back the data + procedure, pass(this) :: recieve => data_streamer_recieve end type data_streamer_t @@ -76,34 +78,20 @@ module data_streamer !! on the case. !! @param if_asynch Controls whether the asyncrhonous executions !! is to be enabled. - subroutine data_streamer_init(this, coef, if_asynch) + subroutine data_streamer_init(this, coef) class(data_streamer_t), intent(inout) :: this type(coef_t), intent(inout) :: coef - integer, intent(in) :: if_asynch - integer :: nelb, nelb2, nelv, nelgv,npts,e - - !Allocate and initialize the global element number - allocate(this%lglel(coef%msh%nelv)) - do e = 1, coef%msh%nelv - this%lglel(e) = e + coef%msh%offset_el - end do - - !Assign if the streaming is asynchronous - this%if_asynch = if_asynch + integer :: nelb, nelv, nelgv, npts, gdim !Assign the set up parameters nelv = coef%msh%nelv npts = coef%Xh%lx*coef%Xh%ly*coef%Xh%lz nelgv = coef%msh%glb_nelv nelb = coef%msh%offset_el - ! Alternative way to get nelb: - !nelb = elem_running_sum(nelv) - !nelb = nelb - nelv + gdim = coef%msh%gdim #ifdef HAVE_ADIOS2 - call fortran_adios2_setup(npts, nelv, nelb, nelgv, & - nelgv, coef%dof%x, coef%dof%y, & - coef%dof%z, if_asynch, NEKO_COMM) + call fortran_adios2_initialize(npts, nelv, nelb, nelgv, gdim, NEKO_COMM) #else call neko_warning('Is not being built with ADIOS2 support.') call neko_warning('Not able to use stream/compression functionality') @@ -117,46 +105,23 @@ end subroutine data_streamer_init subroutine data_streamer_free(this) class(data_streamer_t), intent(inout) :: this - if (allocated(this%lglel)) deallocate(this%lglel) - #ifdef HAVE_ADIOS2 call fortran_adios2_finalize() #else call neko_warning('Is not being built with ADIOS2 support.') call neko_warning('Not able to use stream/compression functionality') #endif - + end subroutine data_streamer_free - + !> streamer - !! wraps the adios2 stream function. - !! @param u velocity in x - !! @param v velocity in y - !! @param w velocity in z - !! @param p pressure - !! @param coef type - subroutine data_streamer_stream(this, u, v, w, p, coef) + !! @param fld array of shape field%x + subroutine data_streamer_stream(this, fld) class(data_streamer_t), intent(inout) :: this - type(coef_t), intent(inout) :: coef - type(field_t), intent(inout) :: u - type(field_t), intent(inout) :: v - type(field_t), intent(inout) :: w - type(field_t), intent(inout) :: p - integer :: nelv, npts - - nelv = coef%msh%nelv - npts = coef%Xh%lx*coef%Xh%ly*coef%Xh%lz - - if (NEKO_BCKND_DEVICE .eq. 1) then - ! Move the data to the CPU to be able to write it - call device_memcpy(u%x, u%x_d, nelv*npts, DEVICE_TO_HOST) - call device_memcpy(v%x, v%x_d, nelv*npts, DEVICE_TO_HOST) - call device_memcpy(w%x, w%x_d, nelv*npts, DEVICE_TO_HOST) - call device_memcpy(p%x, p%x_d, nelv*npts, DEVICE_TO_HOST, sync=.true.) - end if + real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld #ifdef HAVE_ADIOS2 - call fortran_adios2_stream(this%lglel, p%x, u%x, v%x, w%x, coef%B, u%x) + call fortran_adios2_stream(fld) #else call neko_warning('Is not being built with ADIOS2 support.') call neko_warning('Not able to use stream/compression functionality') @@ -164,54 +129,47 @@ subroutine data_streamer_stream(this, u, v, w, p, coef) end subroutine data_streamer_stream - !> Supporting function to calculate the element number offset. - !! returns the number of elements that the ranks previous to the - !! present one have. - !! @param nelv number of elements in current rank. - function elem_running_sum(nelv) result(rbuff) - integer, intent(in) :: nelv - integer :: ierr, xbuff, wbuff, rbuff + !> reciever + !! @param fld array of shape field%x + subroutine data_streamer_recieve(this, fld) + class(data_streamer_t), intent(inout) :: this + real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld + +#ifdef HAVE_ADIOS2 + call fortran_adios2_recieve(fld) +#else + call neko_warning('Is not being built with ADIOS2 support.') + call neko_warning('Not able to use stream/compression functionality') +#endif + + end subroutine data_streamer_recieve - xbuff = nelv ! running sum - wbuff = nelv ! working buff - rbuff = 0 ! recv buff - - call mpi_scan(xbuff, rbuff, 1, mpi_integer, mpi_sum, NEKO_COMM, ierr) - end function elem_running_sum #ifdef HAVE_ADIOS2 - - !> Interface to adios2_setup in c++. + + !> Interface to adios2_initialize in c++. !! @details This routine interfaces with c++ routine that set up adios2 !! if streaming, the global array to pair writer and reader is opened. !! @param npts number of points per element !! @param nelv number of elements in this rank !! @param nelb number of elements in ranks before this one !! @param nelgv total number of elements in velocity mesh - !! @param nelgt total number of elements in temperature mesh (not used) - !! @param x coordinates in x direction - !! @param y coordinates in y direction - !! @param z coordinates in z direction - !! @param asynch integer that indicates asynchronous execution + !! @param gdim dimension (2d or 3d) !! @param comm simulation communicator - subroutine fortran_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, & - z, asynch, comm) + subroutine fortran_adios2_initialize(npts, nelv, nelb, nelgv, gdim, comm) use, intrinsic :: ISO_C_BINDING implicit none - real(kind=rp), dimension(:,:,:,:), intent(inout) :: x - real(kind=rp), dimension(:,:,:,:), intent(inout) :: y - real(kind=rp), dimension(:,:,:,:), intent(inout) :: z - integer, intent(in) :: npts, nelv, nelb,nelgv, nelgt, asynch + integer, intent(in) :: npts, nelv, nelb, nelgv, gdim type(MPI_COMM) :: comm - + interface - !> C-definition is: void adios2_setup_(const int *nval, + !> C-definition is: void adios2_initialize_(const int *nval, !! const int *nelvin,const int *nelb, const int *nelgv, !! const int *nelgt, const double *xml,const double *yml, !! const double *zml, const int *if_asynchronous, !! const int *comm_int) - subroutine c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, & - z, asynch, comm) bind(C,name="adios2_setup_") + subroutine c_adios2_initialize(npts, nelv, nelb, nelgv, gdim, & + comm) bind(C,name="adios2_initialize_") use, intrinsic :: ISO_C_BINDING import c_rp implicit none @@ -219,25 +177,20 @@ subroutine c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, & integer(kind=C_INT) :: nelv integer(kind=C_INT) :: nelb integer(kind=C_INT) :: nelgv - integer(kind=C_INT) :: nelgt - real(kind=c_rp), intent(INOUT) :: x(*) - real(kind=c_rp), intent(INOUT) :: y(*) - real(kind=c_rp), intent(INOUT) :: z(*) - integer(kind=C_INT) :: asynch + integer(kind=C_INT) :: gdim type(*) :: comm - end subroutine c_adios2_setup + end subroutine c_adios2_initialize end interface - - call c_adios2_setup(npts, nelv, nelb, nelgv, nelgt, x, y, z, & - asynch, comm) - end subroutine fortran_adios2_setup + + call c_adios2_initialize(npts, nelv, nelb, nelgv, gdim, comm) + end subroutine fortran_adios2_initialize !> Interface to adios2_finalize in c++. !! closes any writer openned at initialization time - subroutine fortran_adios2_finalize() + subroutine fortran_adios2_finalize() use, intrinsic :: ISO_C_BINDING implicit none - + interface !> C-definition is: void adios2_finalize_() subroutine c_adios2_finalize() bind(C,name="adios2_finalize_") @@ -245,54 +198,58 @@ subroutine c_adios2_finalize() bind(C,name="adios2_finalize_") implicit none end subroutine c_adios2_finalize end interface - - call c_adios2_finalize() + + call c_adios2_finalize() end subroutine fortran_adios2_finalize !> Interface to adios2_stream in c++. !! @details This routine communicates the data to a global array that !! is accessed by a data processor. The operations do not write to disk. !! data is communicated with mpi. - !! @param lglel global element number - !! @param p pressure - !! @param u velocity in x - !! @param v velocity in y - !! @param w velocity in z - !! @param bm1 mass matrix - !! @param t temperature / (Not really used in adios2 routine) - subroutine fortran_adios2_stream(lglel, p, u, v, w, bm1, t) + !! @param fld array of shape field%x + subroutine fortran_adios2_stream(fld) use, intrinsic :: ISO_C_BINDING implicit none - integer, dimension(:), intent(inout) :: lglel - real(kind=rp), dimension(:,:,:,:), intent(inout) :: p - real(kind=rp), dimension(:,:,:,:), intent(inout) :: u - real(kind=rp), dimension(:,:,:,:), intent(inout) :: v - real(kind=rp), dimension(:,:,:,:), intent(inout) :: w - real(kind=rp), dimension(:,:,:,:), intent(inout) :: bm1 - real(kind=rp), dimension(:,:,:,:), intent(inout) :: t + real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld interface - !> C-definition is: void adios2_stream_( - !! const int *lglel, const double *pr, const double *u, - !! const double *v, const double *w, const double *mass1, - !! const double *temp) - subroutine c_adios2_stream(lglel, p, u, v, w, bm1, t) & + !> C-definition is: void adios2_stream_(const double *fld) + subroutine c_adios2_stream(fld) & bind(C,name="adios2_stream_") use, intrinsic :: ISO_C_BINDING import c_rp implicit none - integer(kind=C_INT), intent(INOUT) :: lglel(*) - real(kind=c_rp), intent(INOUT) :: p(*) - real(kind=c_rp), intent(INOUT) :: u(*) - real(kind=c_rp), intent(INOUT) :: v(*) - real(kind=c_rp), intent(INOUT) :: w(*) - real(kind=c_rp), intent(INOUT) :: bm1(*) - real(kind=c_rp), intent(INOUT) :: t(*) + real(kind=c_rp), intent(INOUT) :: fld(*) end subroutine c_adios2_stream end interface - - call c_adios2_stream(lglel, p, u, v, w, bm1, t) + + call c_adios2_stream(fld) end subroutine fortran_adios2_stream + + !> Interface to adios2_recieve in ci++. + !! @details This routine communicates the data to a global array that + !! is accessed by a data processor. The operations do not write to disk. + !! data is communicated with mpi. + !! @param fld array of shape field%x + subroutine fortran_adios2_recieve(fld) + use, intrinsic :: ISO_C_BINDING + implicit none + real(kind=rp), dimension(:,:,:,:), intent(inout) :: fld + + interface + !> C-definition is: void adios2_stream_(const double *fld) + subroutine c_adios2_recieve(fld) & + bind(C,name="adios2_recieve_") + use, intrinsic :: ISO_C_BINDING + import c_rp + implicit none + real(kind=c_rp), intent(INOUT) :: fld(*) + end subroutine c_adios2_recieve + end interface + + call c_adios2_recieve(fld) + end subroutine fortran_adios2_recieve + #endif end module data_streamer diff --git a/src/io/file.f90 b/src/io/file.f90 index bc4dc6376f0..95cdb298e9f 100644 --- a/src/io/file.f90 +++ b/src/io/file.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2019-2023, The Neko Authors +! Copyright (c) 2019-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -32,20 +32,24 @@ ! !> Module for file I/O operations. module file - use utils - use generic_file - use nmsh_file - use chkp_file - use map_file - use rea_file - use re2_file - use fld_file - use fld_file_data - use vtk_file - use stl_file - use csv_file + use utils, only : neko_error, neko_warning, filename_suffix + use num_types, only : rp + use generic_file, only : generic_file_t + use nmsh_file, only : nmsh_file_t + use chkp_file, only : chkp_file_t + use map_file, only : map_file_t + use rea_file, only : rea_file_t + use re2_file, only : re2_file_t + use fld_file, only : fld_file_t + use fld_file_data, only : fld_file_data_t + use vtk_file, only : vtk_file_t + use stl_file, only : stl_file_t + use csv_file, only : csv_file_t implicit none - + + !> A wrapper around a polymorphic `generic_file_t` that handles its init. + !! This is essentially a factory for `generic_file_t` descendants additionally + !! handling special CSV file parameters (header and precision). type file_t class(generic_file_t), allocatable :: file_type contains @@ -57,8 +61,10 @@ module file procedure :: set_counter => file_set_counter !> Set a file's start counter. procedure :: set_start_counter => file_set_start_counter - !> Set a file's header + !> Set a file's header. procedure :: set_header => file_set_header + !> Set a file's output precision. + procedure :: set_precision => file_set_precision !> File operation destructor. final :: file_free end type file_t @@ -71,18 +77,20 @@ module file !> File reader/writer constructor. !! @param fname Filename. - function file_init(fname) result(this) + function file_init(fname, header, precision) result(this) character(len=*) :: fname + character(len=*), optional :: header + integer, optional :: precision type(file_t), target :: this character(len=80) :: suffix class(generic_file_t), pointer :: q - + call filename_suffix(fname, suffix) - + if (allocated(this%file_type)) then deallocate(this%file_type) end if - + if (suffix .eq. "rea") then allocate(rea_file_t::this%file_type) else if (suffix .eq. "re2") then @@ -100,13 +108,22 @@ function file_init(fname) result(this) else if (suffix .eq. "stl") then allocate(stl_file_t::this%file_type) else if (suffix .eq. "csv") then - allocate(csv_file_t::this%file_type) + allocate(csv_file_t::this%file_type) + this%file_type%serial = .true. else call neko_error('Unknown file format') end if call this%file_type%init(fname) + if (present(header)) then + call this%set_header(header) + end if + + if (present(precision)) then + call this%set_precision(precision) + end if + end function file_init !> File operation destructor. @@ -131,9 +148,9 @@ subroutine file_write(this, data, t) else call this%file_type%write(data) end if - + end subroutine file_write - + !> Read @a data from a file. !! @param data Read data. subroutine file_read(this, data) @@ -141,7 +158,7 @@ subroutine file_read(this, data) class(*), intent(inout) :: data call this%file_type%read(data) - + end subroutine file_read !> Set a file's counter. @@ -153,7 +170,7 @@ subroutine file_set_counter(this, n) class is (generic_file_t) call ft%set_counter(n) end select - + end subroutine file_set_counter !> Set a file's start counter. @@ -165,10 +182,10 @@ subroutine file_set_start_counter(this, n) class is (generic_file_t) call ft%set_start_counter(n) end select - + end subroutine file_set_start_counter - !> Set a file's header, mainly for csv_file for now. + !> Set a file's header. subroutine file_set_header(this, hd) class(file_t), intent(inout) :: this character(len=*), intent(in) :: hd @@ -185,5 +202,23 @@ subroutine file_set_header(this, hd) end subroutine file_set_header + !> Set a file's output precision. + !! @param precision Precision as defined in `num_types`. + subroutine file_set_precision(this, precision) + class(file_t), intent(inout) :: this + integer, intent(in) :: precision + + character(len=80) :: suffix + + select type(ft => this%file_type) + type is (fld_file_t) + call ft%set_precision(precision) + class default + call filename_suffix(this%file_type%fname, suffix) + call neko_warning("No precision strategy defined for " // trim(suffix) //& + " files!") + end select + + end subroutine file_set_precision end module file diff --git a/src/io/fld_file.f90 b/src/io/fld_file.f90 index 47ecd622130..626e9e10e1d 100644 --- a/src/io/fld_file.f90 +++ b/src/io/fld_file.f90 @@ -49,10 +49,11 @@ module fld_file use utils use comm use datadist + use math, only : vlmin, vlmax use neko_mpi_types implicit none private - + real(kind=dp), private, allocatable :: tmp_dp(:) real(kind=sp), private, allocatable :: tmp_sp(:) @@ -100,7 +101,7 @@ subroutine fld_file_write(this, data, t) else time = 0d0 end if - + nullify(msh) nullify(dof) nullify(Xh) @@ -114,13 +115,13 @@ subroutine fld_file_write(this, data, t) if (data%x%n .gt. 0) x%x => data%x%x if (data%y%n .gt. 0) y%x => data%y%x if (data%z%n .gt. 0) z%x => data%z%x - if (data%u%n .gt. 0) then + if (data%u%n .gt. 0) then u%x => data%u%x write_velocity = .true. end if if (data%v%n .gt. 0) v%x => data%v%x if (data%w%n .gt. 0) w%x => data%w%x - if (data%p%n .gt. 0) then + if (data%p%n .gt. 0) then p%x => data%p%x write_pressure = .true. end if @@ -131,7 +132,7 @@ subroutine fld_file_write(this, data, t) n_scalar_fields = data%n_scalars allocate(scalar_fields(n_scalar_fields)) do i = 1, n_scalar_fields - scalar_fields(i)%x => data%s(i)%x + scalar_fields(i)%x => data%s(i)%x end do nelv = data%nelv lx = data%lx @@ -140,7 +141,7 @@ subroutine fld_file_write(this, data, t) gdim = data%gdim glb_nelv = data%glb_nelv offset_el = data%offset_el - + allocate(idx(nelv)) do i = 1, nelv idx(i) = data%idx(i) @@ -173,7 +174,7 @@ subroutine fld_file_write(this, data, t) w%x => data%fields(4)%f%x(:,1,1,1) write_pressure = .true. write_velocity = .true. - case (5:99) + case (5:99) p%x => data%fields(1)%f%x(:,1,1,1) u%x => data%fields(2)%f%x(:,1,1,1) v%x => data%fields(3)%f%x(:,1,1,1) @@ -190,7 +191,7 @@ subroutine fld_file_write(this, data, t) case default call neko_error('This many fields not supported yet, fld_file') end select - dof => data%fields(1)%f%dof + dof => data%fields(1)%f%dof type is (mean_flow_t) u%x => data%u%mf%x(:,1,1,1) @@ -205,7 +206,7 @@ subroutine fld_file_write(this, data, t) v%x => data%vv%mf%x(:,1,1,1) w%x => data%ww%mf%x(:,1,1,1) p%x => data%pp%mf%x(:,1,1,1) - dof => data%pp%mf%dof + dof => data%pp%mf%dof write_pressure = .true. write_velocity = .true. class default @@ -252,14 +253,14 @@ subroutine fld_file_write(this, data, t) else allocate(tmp_sp(gdim*n)) end if - - + + ! ! Create fld header for NEKTON's multifile output ! write_mesh = (this%counter .eq. this%start_counter) - + ! Build rdcode note that for field_t, we only support scalar ! fields at the moment rdcode = ' ' @@ -299,7 +300,7 @@ subroutine fld_file_write(this, data, t) suffix_pos = filename_suffix_pos(this%fname) write(id_str, '(a,i5.5)') 'f', this%counter fname = trim(this%fname(1:suffix_pos-1))//'0.'//id_str - + call MPI_File_open(NEKO_COMM, trim(fname), & MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr) @@ -316,16 +317,16 @@ subroutine fld_file_write(this, data, t) mpi_offset = mpi_offset + int(glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8) deallocate(idx) - + if (write_mesh) then byte_offset = mpi_offset + int(offset_el, i8) * & (int(gdim*lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - + call fld_file_write_vector_field(this, fh, byte_offset, x%x, y%x, z%x, n, gdim, lxyz, nelv) mpi_offset = mpi_offset + int(glb_nelv, i8) * & - (int(gdim *lxyz, i8) * & + (int(gdim *lxyz, i8) * & int(FLD_DATA_SIZE, i8)) end if @@ -338,7 +339,7 @@ subroutine fld_file_write(this, data, t) mpi_offset = mpi_offset + int(glb_nelv, i8) * & (int(gdim * (lxyz), i8) * & int(FLD_DATA_SIZE, i8)) - + end if if (write_pressure) then @@ -365,7 +366,7 @@ subroutine fld_file_write(this, data, t) do i = 1, n_scalar_fields !Without this redundant if statement, Cray optimizes this loop to Oblivion - if (i .eq. 2) then + if (i .eq. 2) then mpi_offset = int(temp_offset,i8) + int(1_i8*glb_nelv, i8) * & (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) @@ -378,11 +379,94 @@ subroutine fld_file_write(this, data, t) (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) end do - + + + !> Include metadata with bounding boxes (Just copying from nek5000) + if (write_mesh) then + !The offset is: mpioff + element_off*2(min max value)*4(single precision)*gdim(dimensions) + byte_offset = int(mpi_offset,i8) + & + int(offset_el, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) * & + int(gdim, i8) + call fld_file_write_metadata_vector(this, fh, byte_offset, x%x, y%x, z%x, gdim, lxyz, nelv) + mpi_offset = int(mpi_offset,i8) + & + int(glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) * & + int(gdim, i8) + end if + + if (write_velocity) then + byte_offset = int(mpi_offset,i8) + & + int(offset_el, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) * & + int(gdim, i8) + call fld_file_write_metadata_vector(this, fh, byte_offset, u%x, v%x, w%x, gdim, lxyz, nelv) + mpi_offset = int(mpi_offset,i8) + & + int(glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) * & + int(gdim, i8) + + end if + + if (write_pressure) then + byte_offset = int(mpi_offset,i8) + & + int(offset_el, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + call fld_file_write_metadata_scalar(this, fh, byte_offset, p%x, lxyz, nelv) + mpi_offset = int(mpi_offset,i8) + & + int(glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + + end if + + if (write_temperature) then + byte_offset = int(mpi_offset,i8) + & + int(offset_el, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + call fld_file_write_metadata_scalar(this, fh, byte_offset, tem%x, lxyz, nelv) + mpi_offset = int(mpi_offset,i8) + & + int(glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + + end if + + + + temp_offset = mpi_offset + + do i = 1, n_scalar_fields + !Without this redundant if statement, Cray optimizes this loop to Oblivion + if (i .eq. 2) then + mpi_offset = int(temp_offset,i8) + & + int(1_i8*glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + end if + + byte_offset = int(mpi_offset,i8) + & + int(offset_el, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + call fld_file_write_metadata_scalar(this, fh, byte_offset, scalar_fields(i)%x, lxyz, nelv) + mpi_offset = int(mpi_offset,i8) + & + int(glb_nelv, i8) * & + int(2, i8) * & + int(MPI_REAL_SIZE, i8) + end do + + call MPI_File_sync(fh, ierr) call MPI_File_close(fh, ierr) - ! Write metadata file + ! Write metadata file if (pe_rank .eq. 0) then tslash_pos = filename_tslash_pos(this%fname) write(start_field,"(I5,A8)") this%start_counter,'.nek5000' @@ -397,11 +481,66 @@ subroutine fld_file_write(this, data, t) end if this%counter = this%counter + 1 - + if (allocated(tmp_dp)) deallocate(tmp_dp) if (allocated(tmp_sp)) deallocate(tmp_sp) end subroutine fld_file_write + subroutine fld_file_write_metadata_vector(this, fh, byte_offset, x, y, z, gdim, lxyz, nelv) + class(fld_file_t), intent(inout) :: this + type(MPI_File), intent(inout) :: fh + integer, intent(in) :: gdim, lxyz, nelv + real(kind=rp), intent(in) :: x(lxyz,nelv), y(lxyz,nelv), z(lxyz,nelv) + integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset + integer :: i, el, j, ierr, nout + type(MPI_Status) :: status + real(kind=sp) :: buffer(2*gdim*nelv) + + j = 1 + do el=1,nelv + buffer(j+0) = real(vlmin(x(1,el),lxyz),sp) + buffer(j+1) = real(vlmax(x(1,el),lxyz),sp) + buffer(j+2) = real(vlmin(y(1,el),lxyz) ,sp) + buffer(j+3) = real(vlmax(y(1,el),lxyz),sp) + j = j + 4 + buffer(j+0) = real(vlmin(z(1,el),lxyz) ,sp) + buffer(j+1) = real(vlmax(z(1,el),lxyz),sp) + j = j + 2 + enddo + + ! write out data + nout = 2*gdim*nelv + + call MPI_File_write_at_all(fh, byte_offset, buffer, nout, & + MPI_REAL, status, ierr) + + end subroutine fld_file_write_metadata_vector + + subroutine fld_file_write_metadata_scalar(this, fh, byte_offset, x, lxyz, nelv) + class(fld_file_t), intent(inout) :: this + type(MPI_File), intent(inout) :: fh + integer, intent(in) :: lxyz, nelv + real(kind=rp), intent(in) :: x(lxyz,nelv) + integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset + integer :: i, el, j, ierr, nout + type(MPI_Status) :: status + real(kind=sp) :: buffer(2*nelv) + + j = 1 + do el=1,nelv + buffer(j+0) = real(vlmin(x(1,el),lxyz),sp) + buffer(j+1) = real(vlmax(x(1,el),lxyz),sp) + j = j + 2 + enddo + + ! write out data + nout = 2*nelv + + call MPI_File_write_at_all(fh, byte_offset, buffer, nout, & + MPI_REAL, status, ierr) + + end subroutine fld_file_write_metadata_scalar + subroutine fld_file_write_field(this, fh, byte_offset, p, n) class(fld_file_t), intent(inout) :: this type(MPI_File), intent(inout) :: fh @@ -415,7 +554,7 @@ subroutine fld_file_write_field(this, fh, byte_offset, p, n) do i = 1, n tmp_dp(i) = real(p(i),dp) end do - + call MPI_File_write_at_all(fh, byte_offset, tmp_dp, n, & MPI_DOUBLE_PRECISION, status, ierr) else @@ -425,9 +564,9 @@ subroutine fld_file_write_field(this, fh, byte_offset, p, n) call MPI_File_write_at_all(fh, byte_offset, tmp_sp, n, & MPI_REAL, status, ierr) end if - + end subroutine fld_file_write_field - + subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim, lxyz, nelv) class(fld_file_t), intent(inout) :: this type(MPI_File), intent(inout) :: fh @@ -436,7 +575,7 @@ subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim, integer (kind=MPI_OFFSET_KIND), intent(in) :: byte_offset integer :: i, el, j, ierr type(MPI_Status) :: status - + if (this%dp_precision) then i = 1 do el = 1, nelv @@ -481,7 +620,7 @@ subroutine fld_file_write_vector_field(this, fh, byte_offset, x, y, z, n, gdim, end subroutine fld_file_write_vector_field - + !> Load a field from a NEKTON fld file subroutine fld_file_read(this, data) class(fld_file_t) :: this @@ -497,234 +636,238 @@ subroutine fld_file_read(this, data) integer (kind=MPI_OFFSET_KIND) :: mpi_offset, byte_offset integer :: lx, ly, lz, glb_nelv, counter, lxyz integer :: FLD_DATA_SIZE, n_scalars, n, nd - real(kind=rp) :: time + real(kind=rp) :: time real(kind=sp) :: temp type(linear_dist_t) :: dist real(kind=sp), parameter :: test_pattern = 6.54321 character :: rdcode(10),temp_str(4) + select type(data) type is (fld_file_data_t) - call filename_chsuffix(this%fname, meta_fname,'nek5000') - - inquire(file=trim(meta_fname), exist=meta_file) - if (meta_file .and. data%meta_nsamples .eq. 0) then - if (pe_rank .eq. 0) then - open(unit=9, file=trim(meta_fname)) - read(9, fmt='(A)') string - read(string(14:),fmt='(A)') string - string = trim(string) - data%fld_series_fname = string(:scan(trim(string), '%')-1) - data%fld_series_fname = trim(data%fld_series_fname)//'0' - read(9, fmt='(A)') string - read(string(scan(string,':')+1:),*) data%meta_start_counter - read(9, fmt='(A)') string - read(string(scan(string,':')+1:),*) data%meta_nsamples - - close(9) - write(*,*) 'Reading meta file for fld series' - write(*,*) 'Name: ', trim(data%fld_series_fname) - write(*,*) 'Start counter: ', data%meta_start_counter, 'Nsamples: ', data%meta_nsamples - end if - call MPI_Bcast(data%fld_series_fname, 1024, MPI_CHARACTER, 0, NEKO_COMM, ierr) - call MPI_Bcast(data%meta_start_counter, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) - call MPI_Bcast(data%meta_nsamples, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) - if(this%counter .eq. 0) this%counter = data%meta_start_counter - end if - - if (meta_file) then - write(id_str, '(a,i5.5)') 'f', this%counter - fname = trim(data%fld_series_fname)//'.'//id_str - if (this%counter .ge. data%meta_nsamples+data%meta_start_counter) then - call neko_error('Trying to read more fld files than exist') - end if - else - suffix_pos = filename_suffix_pos(this%fname) - write(id_str, '(a,i5.5)') 'f', this%counter - fname = trim(this%fname(1:suffix_pos-1))//'.'//id_str - end if - call MPI_File_open(NEKO_COMM, trim(fname), & + call filename_chsuffix(this%fname, meta_fname,'nek5000') + + inquire(file=trim(meta_fname), exist=meta_file) + if (meta_file .and. data%meta_nsamples .eq. 0) then + if (pe_rank .eq. 0) then + open(unit=9, file=trim(meta_fname)) + read(9, fmt='(A)') string + read(string(14:),fmt='(A)') string + string = trim(string) + data%fld_series_fname = string(:scan(trim(string), '%')-1) + data%fld_series_fname = trim(data%fld_series_fname)//'0' + read(9, fmt='(A)') string + read(string(scan(string,':')+1:),*) data%meta_start_counter + read(9, fmt='(A)') string + read(string(scan(string,':')+1:),*) data%meta_nsamples + + close(9) + write(*,*) 'Reading meta file for fld series' + write(*,*) 'Name: ', trim(data%fld_series_fname) + write(*,*) 'Start counter: ', data%meta_start_counter, 'Nsamples: ', data%meta_nsamples + end if + call MPI_Bcast(data%fld_series_fname, 1024, MPI_CHARACTER, 0, NEKO_COMM, ierr) + call MPI_Bcast(data%meta_start_counter, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) + call MPI_Bcast(data%meta_nsamples, 1, MPI_INTEGER, 0, NEKO_COMM, ierr) + if(this%counter .eq. 0) this%counter = data%meta_start_counter + end if + + if (meta_file) then + write(id_str, '(a,i5.5)') 'f', this%counter + fname = trim(data%fld_series_fname)//'.'//id_str + if (this%counter .ge. data%meta_nsamples+data%meta_start_counter) then + call neko_error('Trying to read more fld files than exist') + end if + else + suffix_pos = filename_suffix_pos(this%fname) + write(id_str, '(a,i5.5)') 'f', this%counter + fname = trim(this%fname(1:suffix_pos-1))//'.'//id_str + end if + call MPI_File_open(NEKO_COMM, trim(fname), & MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr) - call MPI_File_read_all(fh, hdr, 132, MPI_CHARACTER, status, ierr) - !This read can prorbably be done wihtout the temp variables, temp_str, i, j - - read(hdr, 1) temp_str,FLD_DATA_SIZE, lx, ly, lz, glb_nelv, glb_nelv,& + + if (ierr .ne. 0) call neko_error("Could not read "//trim(fname)) + + call MPI_File_read_all(fh, hdr, 132, MPI_CHARACTER, status, ierr) + !This read can prorbably be done wihtout the temp variables, temp_str, i, j + + read(hdr, 1) temp_str,FLD_DATA_SIZE, lx, ly, lz, glb_nelv, glb_nelv,& time, counter, i, j, (rdcode(i),i=1,10) -1 format(4a,1x,i1,1x,i2,1x,i2,1x,i2,1x,i10,1x,i10,1x,e20.13,& +1 format(4a,1x,i1,1x,i2,1x,i2,1x,i2,1x,i10,1x,i10,1x,e20.13,& 1x,i9,1x,i6,1x,i6,1x,10a) - if (data%nelv .eq. 0) then - dist = linear_dist_t(glb_nelv, pe_rank, pe_size, NEKO_COMM) - data%nelv = dist%num_local() - data%offset_el = dist%start_idx() - end if - data%lx = lx - data%ly = ly - data%lz = lz - data%glb_nelv = glb_nelv - data%t_counter = counter - data%time = time - lxyz = lx * ly * lz - n = lxyz * data%nelv - - if (lz .eq. 1) then - data%gdim = 2 - else - data%gdim = 3 - end if - - - if (this%dp_precision) then - FLD_DATA_SIZE = MPI_DOUBLE_PRECISION_SIZE - else - FLD_DATA_SIZE = MPI_REAL_SIZE - end if - if (this%dp_precision) then - allocate(tmp_dp(data%gdim*n)) - else - allocate(tmp_sp(data%gdim*n)) - end if - - - i = 1 - read_mesh = .false. - read_velocity = .false. - read_pressure = .false. - read_temp = .false. - if (rdcode(i) .eq. 'X') then - read_mesh = .true. - if (data%x%n .ne. n) call data%x%init(n) - if (data%y%n .ne. n) call data%y%init(n) - if (data%z%n .ne. n) call data%z%init(n) - i = i + 1 - end if - if (rdcode(i) .eq. 'U') then - read_velocity = .true. - if (data%u%n .ne. n) call data%u%init(n) - if (data%v%n .ne. n) call data%v%init(n) - if (data%w%n .ne. n) call data%w%init(n) - i = i + 1 - end if - if (rdcode(i) .eq. 'P') then - read_pressure = .true. - if (data%p%n .ne. n) call data%p%init(n) - i = i + 1 - end if - if (rdcode(i) .eq. 'T') then - read_temp = .true. - if (data%t%n .ne. n) call data%t%init(n) - i = i + 1 - end if - n_scalars = 0 - if (rdcode(i) .eq. 'S') then - i = i + 1 - read(rdcode(i),*) n_scalars - n_scalars = n_scalars*10 - i = i + 1 - read(rdcode(i),*) j - n_scalars = n_scalars+j - i = i + 1 - if (allocated(data%s)) then - if (data%n_scalars .ne. n_scalars) then - do j = 1, data%n_scalars - call data%s(j)%free() - end do - deallocate(data%s) - data%n_scalars = n_scalars - allocate(data%s(n_scalars)) - do j = 1, data%n_scalars - call data%s(j)%init(n) - end do - end if - else - data%n_scalars = n_scalars - allocate(data%s(data%n_scalars)) - do j = 1, data%n_scalars - call data%s(j)%init(n) - end do - end if - i = i + 1 - end if - - mpi_offset = 132 * MPI_CHARACTER_SIZE - call MPI_File_read_at_all(fh, mpi_offset, temp, 1, & + if (data%nelv .eq. 0) then + dist = linear_dist_t(glb_nelv, pe_rank, pe_size, NEKO_COMM) + data%nelv = dist%num_local() + data%offset_el = dist%start_idx() + end if + data%lx = lx + data%ly = ly + data%lz = lz + data%glb_nelv = glb_nelv + data%t_counter = counter + data%time = time + lxyz = lx * ly * lz + n = lxyz * data%nelv + + if (lz .eq. 1) then + data%gdim = 2 + else + data%gdim = 3 + end if + + + if (FLD_DATA_SIZE .eq. MPI_DOUBLE_PRECISION_SIZE) then + this%dp_precision = .true. + else + this%dp_precision = .false. + end if + if (this%dp_precision) then + allocate(tmp_dp(data%gdim*n)) + else + allocate(tmp_sp(data%gdim*n)) + end if + + + i = 1 + read_mesh = .false. + read_velocity = .false. + read_pressure = .false. + read_temp = .false. + if (rdcode(i) .eq. 'X') then + read_mesh = .true. + if (data%x%n .ne. n) call data%x%init(n) + if (data%y%n .ne. n) call data%y%init(n) + if (data%z%n .ne. n) call data%z%init(n) + i = i + 1 + end if + if (rdcode(i) .eq. 'U') then + read_velocity = .true. + if (data%u%n .ne. n) call data%u%init(n) + if (data%v%n .ne. n) call data%v%init(n) + if (data%w%n .ne. n) call data%w%init(n) + i = i + 1 + end if + if (rdcode(i) .eq. 'P') then + read_pressure = .true. + if (data%p%n .ne. n) call data%p%init(n) + i = i + 1 + end if + if (rdcode(i) .eq. 'T') then + read_temp = .true. + if (data%t%n .ne. n) call data%t%init(n) + i = i + 1 + end if + n_scalars = 0 + if (rdcode(i) .eq. 'S') then + i = i + 1 + read(rdcode(i),*) n_scalars + n_scalars = n_scalars*10 + i = i + 1 + read(rdcode(i),*) j + n_scalars = n_scalars+j + i = i + 1 + if (allocated(data%s)) then + if (data%n_scalars .ne. n_scalars) then + do j = 1, data%n_scalars + call data%s(j)%free() + end do + deallocate(data%s) + data%n_scalars = n_scalars + allocate(data%s(n_scalars)) + do j = 1, data%n_scalars + call data%s(j)%init(n) + end do + end if + else + data%n_scalars = n_scalars + allocate(data%s(data%n_scalars)) + do j = 1, data%n_scalars + call data%s(j)%init(n) + end do + end if + i = i + 1 + end if + + mpi_offset = 132 * MPI_CHARACTER_SIZE + call MPI_File_read_at_all(fh, mpi_offset, temp, 1, & MPI_REAL, status, ierr) - if (temp .ne. test_pattern) then - call neko_error('Incorrect format for fld file, test pattern does not match.') - end if - mpi_offset = mpi_offset + MPI_REAL_SIZE - - - if (allocated(data%idx)) then - if (size(data%idx) .ne. data%nelv) then - deallocate(data%idx) - allocate(data%idx(data%nelv)) - end if - else - allocate(data%idx(data%nelv)) - end if - - byte_offset = mpi_offset + & + if (temp .ne. test_pattern) then + call neko_error('Incorrect format for fld file, test pattern does not match.') + end if + mpi_offset = mpi_offset + MPI_REAL_SIZE + + + if (allocated(data%idx)) then + if (size(data%idx) .ne. data%nelv) then + deallocate(data%idx) + allocate(data%idx(data%nelv)) + end if + else + allocate(data%idx(data%nelv)) + end if + + byte_offset = mpi_offset + & int(data%offset_el, i8) * int(MPI_INTEGER_SIZE, i8) - call MPI_File_read_at_all(fh, byte_offset, data%idx, data%nelv, & + call MPI_File_read_at_all(fh, byte_offset, data%idx, data%nelv, & MPI_INTEGER, status, ierr) - - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8) - - if (read_mesh) then - byte_offset = mpi_offset + int(data%offset_el, i8) * & + + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * int(MPI_INTEGER_SIZE, i8) + + if (read_mesh) then + byte_offset = mpi_offset + int(data%offset_el, i8) * & (int(data%gdim*lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - call fld_file_read_vector_field(this, fh, byte_offset, data%x, data%y, data%z, data) - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & - (int(data%gdim *lxyz, i8) * & + call fld_file_read_vector_field(this, fh, byte_offset, data%x, data%y, data%z, data) + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & + (int(data%gdim *lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - end if + end if - if (read_velocity) then - byte_offset = mpi_offset + int(data%offset_el, i8) * & + if (read_velocity) then + byte_offset = mpi_offset + int(data%offset_el, i8) * & (int(data%gdim*lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - call fld_file_read_vector_field(this, fh, byte_offset, data%u, data%v, data%w, data) - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & - (int(data%gdim *lxyz, i8) * & + call fld_file_read_vector_field(this, fh, byte_offset, data%u, data%v, data%w, data) + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & + (int(data%gdim *lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - end if + end if - if (read_pressure) then - byte_offset = mpi_offset + int(data%offset_el, i8) * & + if (read_pressure) then + byte_offset = mpi_offset + int(data%offset_el, i8) * & (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - call fld_file_read_field(this, fh, byte_offset, data%p, data) - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & - (int(lxyz, i8) * & + call fld_file_read_field(this, fh, byte_offset, data%p, data) + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & + (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - end if + end if - if (read_temp) then - byte_offset = mpi_offset + int(data%offset_el, i8) * & + if (read_temp) then + byte_offset = mpi_offset + int(data%offset_el, i8) * & (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - call fld_file_read_field(this, fh, byte_offset, data%t, data) - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & - (int(lxyz, i8) * & + call fld_file_read_field(this, fh, byte_offset, data%t, data) + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & + (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - end if + end if - do i = 1, n_scalars - byte_offset = mpi_offset + int(data%offset_el, i8) * & + do i = 1, n_scalars + byte_offset = mpi_offset + int(data%offset_el, i8) * & (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - call fld_file_read_field(this, fh, byte_offset, data%s(i), data) - mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & - (int(lxyz, i8) * & + call fld_file_read_field(this, fh, byte_offset, data%s(i), data) + mpi_offset = mpi_offset + int(data%glb_nelv, i8) * & + (int(lxyz, i8) * & int(FLD_DATA_SIZE, i8)) - end do + end do - this%counter = this%counter + 1 + this%counter = this%counter + 1 - if (allocated(tmp_dp)) deallocate(tmp_dp) - if (allocated(tmp_sp)) deallocate(tmp_sp) - class default + if (allocated(tmp_dp)) deallocate(tmp_dp) + if (allocated(tmp_sp)) deallocate(tmp_sp) + class default call neko_error('Currently we only read into fld_file_data_t, please use that data structure instead.') end select @@ -749,15 +892,15 @@ subroutine fld_file_read_field(this, fh, byte_offset, x, fld_data) call MPI_File_read_at_all(fh, byte_offset, tmp_sp, n, & MPI_REAL, status, ierr) end if - + if (this%dp_precision) then do i = 1, n - x%x(i) = tmp_dp(i) - end do + x%x(i) = tmp_dp(i) + end do else do i = 1, n - x%x(i) = tmp_sp(i) - end do + x%x(i) = tmp_sp(i) + end do end if @@ -785,21 +928,21 @@ subroutine fld_file_read_vector_field(this, fh, byte_offset, x, y, z, fld_data) MPI_REAL, status, ierr) end if - + if (this%dp_precision) then i = 1 do e = 1, fld_data%nelv do j = 1, lxyz - x%x((e-1)*lxyz+j) = tmp_dp(i) + x%x((e-1)*lxyz+j) = tmp_dp(i) i = i +1 end do do j = 1, lxyz - y%x((e-1)*lxyz+j) = tmp_dp(i) + y%x((e-1)*lxyz+j) = tmp_dp(i) i = i +1 end do if (fld_data%gdim .eq. 3) then do j = 1, lxyz - z%x((e-1)*lxyz+j) = tmp_dp(i) + z%x((e-1)*lxyz+j) = tmp_dp(i) i = i +1 end do end if @@ -808,27 +951,27 @@ subroutine fld_file_read_vector_field(this, fh, byte_offset, x, y, z, fld_data) i = 1 do e = 1, fld_data%nelv do j = 1, lxyz - x%x((e-1)*lxyz+j) = tmp_sp(i) + x%x((e-1)*lxyz+j) = tmp_sp(i) i = i +1 end do do j = 1, lxyz - y%x((e-1)*lxyz+j) = tmp_sp(i) + y%x((e-1)*lxyz+j) = tmp_sp(i) i = i +1 end do if (fld_data%gdim .eq. 3) then do j = 1, lxyz - z%x((e-1)*lxyz+j) = tmp_sp(i) + z%x((e-1)*lxyz+j) = tmp_sp(i) i = i +1 end do end if - end do + end do end if end subroutine fld_file_read_vector_field subroutine fld_file_set_precision(this, precision) class(fld_file_t) :: this - integer, intent(inout) :: precision + integer, intent(in) :: precision if (precision .eq. dp) then this%dp_precision = .true. @@ -837,8 +980,8 @@ subroutine fld_file_set_precision(this, precision) else call neko_error('Invalid precision') end if - + end subroutine fld_file_set_precision - + end module fld_file diff --git a/src/io/fld_file_data.f90 b/src/io/fld_file_data.f90 index d6b70ed9432..093863048fc 100644 --- a/src/io/fld_file_data.f90 +++ b/src/io/fld_file_data.f90 @@ -10,7 +10,7 @@ module fld_file_data use vector, only : vector_t, vector_ptr_t implicit none private - + type, public :: fld_file_data_t type(vector_t) :: x !< x-coords type(vector_t) :: y !< y-coords @@ -46,7 +46,7 @@ module fld_file_data end type fld_file_data_t contains - + !> Initialise a fld_file_data object with nelv elements with a offset_nel subroutine fld_file_data_init(this, nelv, offset_el) class(fld_file_data_t), intent(inout) :: this @@ -54,24 +54,24 @@ subroutine fld_file_data_init(this, nelv, offset_el) call this%free() if (present(nelv)) this%nelv = nelv if (present(offset_el)) this%offset_el = offset_el - + end subroutine fld_file_data_init !> Get number of fields in this fld file function fld_file_data_size(this) result(i) class(fld_file_data_t) :: this integer :: i i = 0 - if(this%u%n .gt. 0) i = i + 1 - if(this%v%n .gt. 0) i = i + 1 - if(this%w%n .gt. 0) i = i + 1 - if(this%p%n .gt. 0) i = i + 1 - if(this%t%n .gt. 0) i = i + 1 + if(this%u%n .gt. 0) i = i + 1 + if(this%v%n .gt. 0) i = i + 1 + if(this%w%n .gt. 0) i = i + 1 + if(this%p%n .gt. 0) i = i + 1 + if(this%t%n .gt. 0) i = i + 1 i = i + this%n_scalars end function fld_file_data_size !> Get a list with pointers to the fields in the fld file - subroutine fld_file_get_list(this, ptr_list, n) + subroutine fld_file_get_list(this, ptr_list, n) class(fld_file_data_t), target, intent(in) :: this integer, intent(in) :: n integer :: i, j @@ -80,23 +80,23 @@ subroutine fld_file_get_list(this, ptr_list, n) if(this%u%n .gt. 0) then ptr_list(i)%v => this%u i = i + 1 - end if + end if if(this%v%n .gt. 0) then ptr_list(i)%v => this%v i = i + 1 - end if + end if if(this%w%n .gt. 0) then ptr_list(i)%v => this%w i = i + 1 - end if + end if if(this%p%n .gt. 0) then ptr_list(i)%v => this%p i = i + 1 - end if + end if if(this%t%n .gt. 0) then ptr_list(i)%v => this%t i = i + 1 - end if + end if do j = 1, this%n_scalars ptr_list(i)%v => this%s(j) i = i +1 @@ -154,14 +154,14 @@ subroutine fld_file_data_free(this) call this%p%free() call this%t%free() if (allocated(this%s)) then - do i = 1, this%n_scalars - call this%s(i)%free() - end do + do i = 1, this%n_scalars + call this%s(i)%free() + end do end if this%n_scalars = 0 this%time = 0.0 - this%glb_nelv = 0 - this%nelv = 0 + this%glb_nelv = 0 + this%nelv = 0 this%offset_el = 0 this%lx = 0 this%ly = 0 diff --git a/src/io/fld_file_output.f90 b/src/io/fld_file_output.f90 new file mode 100644 index 00000000000..4a43fe8fb03 --- /dev/null +++ b/src/io/fld_file_output.f90 @@ -0,0 +1,107 @@ +! Copyright (c) 2020-2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements `fld_file_output_t`. +module fld_file_output + use num_types, only : rp + use field_list, only : field_list_t + use neko_config, only : NEKO_BCKND_DEVICE + use device, only : device_memcpy, DEVICE_TO_HOST + use output, only : output_t + implicit none + private + + !> A simple output saving a list of fields to a .fld file. + type, public, extends(output_t) :: fld_file_output_t + ! The list of fields to save. + type(field_list_t) :: fields + contains + ! Constructor. + procedure, pass(this) :: init => fld_file_output_init + ! Writes the data. + procedure, pass(this) :: sample => fld_file_output_sample + end type fld_file_output_t + +contains + + !> Constructor. + !! @param precision the precison of the reals in the file. + !! @param name The base name of the files. + !! @param name The number of field pointers to preallocate in the field list. + !! @param path Optional path to the write folder. + subroutine fld_file_output_init(this, precision, name, nfields, path) + class(fld_file_output_t), intent (inout) :: this + integer, intent(in) :: precision + character(len=*), intent(in) :: name + character(len=*), intent(in), optional :: path + integer, intent(in) :: nfields + character(len=1024) :: fname + + if (present(path)) then + fname = trim(path) // trim(name) // '.fld' + else + fname = trim(name) // '.fld' + end if + + call this%init_base(fname, precision) + + if (allocated(this%fields%fields)) then + deallocate(this%fields%fields) + end if + + allocate(this%fields%fields(nfields)) + + end subroutine fld_file_output_init + + !> Writes the data. + !! @param t The time value. + subroutine fld_file_output_sample(this, t) + class(fld_file_output_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer :: i + + if (NEKO_BCKND_DEVICE .eq. 1) then + associate(fields => this%fields%fields) + do i = 1, size(fields) + call device_memcpy(fields(i)%f%x, fields(i)%f%x_d, & + fields(i)%f%dof%size(), DEVICE_TO_HOST, & + sync=(i .eq. size(fields))) ! Sync on the last field + end do + end associate + + end if + + call this%file_%write(this%fields, t) + + end subroutine fld_file_output_sample + +end module fld_file_output diff --git a/src/io/fluid_output.f90 b/src/io/fluid_output.f90 index 8b130be5b0a..d08fc4bf8d5 100644 --- a/src/io/fluid_output.f90 +++ b/src/io/fluid_output.f90 @@ -32,12 +32,13 @@ ! !> Defines an output for a fluid module fluid_output + use num_types, only : rp use fluid_scheme, only : fluid_scheme_t use scalar_scheme, only : scalar_scheme_t use field_list, only : field_list_t use neko_config use device - use output + use output, only : output_t implicit none !> Fluid output @@ -53,7 +54,8 @@ module fluid_output contains - function fluid_output_init(fluid, scalar, name, path) result(this) + function fluid_output_init(precision, fluid, scalar, name, path) result(this) + integer, intent(inout) :: precision class(fluid_scheme_t), intent(in), target :: fluid class(scalar_scheme_t), intent(in), optional, target :: scalar character(len=*), intent(in), optional :: name @@ -67,11 +69,11 @@ function fluid_output_init(fluid, scalar, name, path) result(this) fname = trim(name) // '.fld' else if (present(path)) then fname = trim(path) // 'field.fld' - else + else fname = 'field.fld' end if - call output_init(this, fname) + call this%init_base(fname, precision) if (allocated(this%fluid%fields)) then deallocate(this%fluid%fields) @@ -91,7 +93,7 @@ function fluid_output_init(fluid, scalar, name, path) result(this) if (present(scalar)) then this%fluid%fields(5)%f => scalar%s end if - + end function fluid_output_init !> Sample a fluid solution at time @a t @@ -106,14 +108,14 @@ subroutine fluid_output_sample(this, t) do i = 1, size(fields) call device_memcpy(fields(i)%f%x, fields(i)%f%x_d, & fields(i)%f%dof%size(), DEVICE_TO_HOST, & - sync=(i .eq. size(fields))) + sync=(i .eq. size(fields))) ! Sync on the last field end do end associate end if - + call this%file_%write(this%fluid, t) end subroutine fluid_output_sample - + end module fluid_output diff --git a/src/io/fluid_stats_output.f90 b/src/io/fluid_stats_output.f90 index 222c4dc168d..46bf3fba3f8 100644 --- a/src/io/fluid_stats_output.f90 +++ b/src/io/fluid_stats_output.f90 @@ -52,7 +52,7 @@ module fluid_stats_output end interface fluid_stats_output_t contains - + function fluid_stats_output_init(stats, T_begin, name, path) result(this) type(fluid_stats_t), intent(in), target :: stats real(kind=rp), intent(in) :: T_begin @@ -71,7 +71,7 @@ function fluid_stats_output_init(stats, T_begin, name, path) result(this) fname = 'stats.fld' end if - call output_init(this, fname) + call this%init_base(fname) this%stats => stats this%T_begin = T_begin end function fluid_stats_output_init @@ -82,21 +82,21 @@ subroutine fluid_stats_output_sample(this, t) real(kind=rp), intent(in) :: t integer :: i associate (out_fields => this%stats%stat_fields%fields) - if (t .ge. this%T_begin) then - call this%stats%make_strong_grad() - if ( NEKO_BCKND_DEVICE .eq. 1) then - do i = 1, size(out_fields) - call device_memcpy(out_fields(i)%f%x, out_fields(i)%f%x_d,& + if (t .ge. this%T_begin) then + call this%stats%make_strong_grad() + if ( NEKO_BCKND_DEVICE .eq. 1) then + do i = 1, size(out_fields) + call device_memcpy(out_fields(i)%f%x, out_fields(i)%f%x_d,& out_fields(i)%f%dof%size(), DEVICE_TO_HOST, & - sync=(i .eq. size(out_fields))) - end do - end if - call this%file_%write(this%stats%stat_fields, t) - call this%stats%reset() - end if + sync=(i .eq. size(out_fields))) ! Sync on last field + end do + end if + call this%file_%write(this%stats%stat_fields, t) + call this%stats%reset() + end if end associate end subroutine fluid_stats_output_sample - + end module fluid_stats_output diff --git a/src/io/format/map.f90 b/src/io/format/map.f90 index 973c6e9cab6..386b229a4a6 100644 --- a/src/io/format/map.f90 +++ b/src/io/format/map.f90 @@ -24,7 +24,7 @@ subroutine map_init_mesh(m, msh) m%nel = msh%nelv m%nlv = msh%npts - + call map_init_common(m) end subroutine map_init_mesh @@ -45,11 +45,11 @@ end subroutine map_init_nel_nelv subroutine map_init_common(m) type(map_t), intent(inout) :: m - + allocate(m%imap(m%nel)) - + allocate(m%vertex(m%nlv, m%nel)) - + end subroutine map_init_common subroutine map_free(m) @@ -63,5 +63,5 @@ subroutine map_free(m) deallocate(m%vertex) end if end subroutine map_free - + end module map diff --git a/src/io/format/nmsh.f90 b/src/io/format/nmsh.f90 index 068be507f1c..37e09bdd755 100644 --- a/src/io/format/nmsh.f90 +++ b/src/io/format/nmsh.f90 @@ -14,7 +14,7 @@ module nmsh integer :: v_idx !< Vertex id (global) real(kind=dp), dimension(3) :: v_xyz ! Vertex coordinates end type nmsh_vertex_t - + !> Neko quad element data type, public, extends(nmsh_t) :: nmsh_quad_t type(nmsh_vertex_t), dimension(4) :: v !< List of vertices @@ -29,19 +29,19 @@ module nmsh type, public :: nmsh_zone_t integer :: e !< Element id (global) integer :: f !< Facet number - integer :: p_e !< Perio dic connection (element) + integer :: p_e !< Perio dic connection (element) integer :: p_f !< Periodic connection (facet) integer, dimension(4) :: glb_pt_ids !< Global point ids integer :: type !< Zone type end type nmsh_zone_t - - !> Neko curve data + + !> Neko curve data type, public :: nmsh_curve_el_t integer :: e !< Element id (global) real(kind=dp), dimension(5,12) :: curve_data !< Save 6 values for each edge integer, dimension(12) :: type !< type of curve for each edge end type nmsh_curve_el_t - + end module nmsh diff --git a/src/io/format/re2.f90 b/src/io/format/re2.f90 index 7aed51a8bd5..8bcce6151aa 100644 --- a/src/io/format/re2.f90 +++ b/src/io/format/re2.f90 @@ -15,14 +15,14 @@ module re2 end type re2v1_t !> NEKTON re2 element data (3d) (version 1) - type, public, extends(re2v1_t) :: re2v1_xyz_t + type, public, extends(re2v1_t) :: re2v1_xyz_t real(kind=sp), dimension(8) :: x real(kind=sp), dimension(8) :: y real(kind=sp), dimension(8) :: z end type re2v1_xyz_t !> NEKTON re2 element data (2d) (version 1) - type, public, extends(re2v1_t) :: re2v1_xy_t + type, public, extends(re2v1_t) :: re2v1_xy_t real(kind=sp), dimension(4) :: x real(kind=sp), dimension(4) :: y end type re2v1_xy_t @@ -34,7 +34,7 @@ module re2 real(kind=sp), dimension(5) :: point character(len=4) :: type end type re2v1_curve_t - + !> NEKTON re2 bc data (version 1) type, public :: re2v1_bc_t integer :: elem @@ -49,14 +49,14 @@ module re2 end type re2v2_t !> NEKTON re2 element data (3d) (version 2) - type, public, extends(re2v2_t) :: re2v2_xyz_t + type, public, extends(re2v2_t) :: re2v2_xyz_t real(kind=dp), dimension(8) :: x real(kind=dp), dimension(8) :: y real(kind=dp), dimension(8) :: z end type re2v2_xyz_t !> NEKTON re2 element data (2d) (version 2) - type, public, extends(re2v1_t) :: re2v2_xy_t + type, public, extends(re2v1_t) :: re2v2_xy_t real(kind=dp), dimension(4) :: x real(kind=dp), dimension(4) :: y end type re2v2_xy_t @@ -68,7 +68,7 @@ module re2 real(kind=dp), dimension(5) :: point character(len=8) :: type end type re2v2_curve_t - + !> NEKTON re2 bc data (version 2) type, public :: re2v2_bc_t real(kind=dp) :: elem diff --git a/src/io/format/rea.f90 b/src/io/format/rea.f90 index 28a6987be73..3aab159d0cd 100644 --- a/src/io/format/rea.f90 +++ b/src/io/format/rea.f90 @@ -16,17 +16,17 @@ module rea character(len=3), allocatable :: cbc(:,:) end type rea_t - + public :: rea_free contains - + !> Free a NEKTON session data subroutine rea_free(r) type(rea_t), intent(inout) :: r call r%msh%free() - + if (allocated(r%params)) then deallocate(r%params) end if diff --git a/src/io/format/stl.f90 b/src/io/format/stl.f90 index e7f63397a07..c709484dd62 100644 --- a/src/io/format/stl.f90 +++ b/src/io/format/stl.f90 @@ -9,14 +9,14 @@ module stl character(len=80) :: hdr integer :: ntri end type stl_hdr_t - + !> Defines a STL triangle type, public :: stl_triangle_t real(kind=sp) :: n(3) real(kind=sp) :: v1(3) real(kind=sp) :: v2(3) real(kind=sp) :: v3(3) - integer(kind=i2) :: attrib + integer(kind=i2) :: attrib end type stl_triangle_t - + end module stl diff --git a/src/io/generic_file.f90 b/src/io/generic_file.f90 index 374908048c4..da26a09581e 100644 --- a/src/io/generic_file.f90 +++ b/src/io/generic_file.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2019-2022, The Neko Authors +! Copyright (c) 2019-2023, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -33,23 +33,27 @@ module generic_file use num_types implicit none - + !> A generic file handler. type, abstract :: generic_file_t character(len=1024) :: fname integer :: counter integer :: start_counter = 0 + !> File format is serial + logical :: serial = .false. contains !> Generic file constructor. procedure :: init => generic_file_init !< Write method. - procedure(generic_file_write), deferred :: write + procedure(generic_file_write), deferred :: write !< Read method. - procedure(generic_file_read), deferred :: read + procedure(generic_file_read), deferred :: read !> Set the file counter to @a n. procedure :: set_counter => generic_file_set_counter !> Set the file start counter to @a n. procedure :: set_start_counter => generic_file_set_start_counter + !> Ensure the file exists + procedure :: check_exists => generic_file_check_exists end type generic_file_t abstract interface @@ -61,7 +65,7 @@ subroutine generic_file_write(this, data, t) real(kind=rp), intent(in), optional :: t end subroutine generic_file_write end interface - + abstract interface subroutine generic_file_read(this, data) import :: generic_file_t @@ -71,31 +75,58 @@ end subroutine generic_file_read end interface contains - + !> Generic file constructor. !! @param fname Filename. subroutine generic_file_init(this, fname) class(generic_file_t) :: this character(len=*) :: fname - + this%fname = fname this%counter = 0 - + end subroutine generic_file_init !> Set the file counter to @a n. subroutine generic_file_set_counter(this, n) class(generic_file_t), intent(inout) :: this integer, intent(in) :: n - this%counter = n + this%counter = n end subroutine generic_file_set_counter !> Set the file start counter to @a n. subroutine generic_file_set_start_counter(this, n) class(generic_file_t), intent(inout) :: this integer, intent(in) :: n - this%start_counter = n + this%start_counter = n end subroutine generic_file_set_start_counter + !> check if the file exists + subroutine generic_file_check_exists(this) + use utils, only: neko_error + use comm, only: pe_rank, NEKO_COMM + use mpi_f08 + implicit none + + class(generic_file_t), intent(in) :: this + logical :: file_exists + integer :: neko_mpi_ierr + + file_exists = .false. + + if (pe_rank .eq. 0 .or. this%serial) then + ! Stop if the file does not exist + inquire(file=this%fname, exist=file_exists) + end if + if (.not. this%serial) then + call MPI_Bcast(file_exists, 1, MPI_LOGICAL, 0, NEKO_COMM, neko_mpi_ierr) + end if + + if (.not. file_exists) then + call neko_error('File does not exist: '//trim(this%fname)) + end if + + end subroutine generic_file_check_exists + end module generic_file diff --git a/src/io/map_file.f90 b/src/io/map_file.f90 index 19f69128a87..8cdd8e1c120 100644 --- a/src/io/map_file.f90 +++ b/src/io/map_file.f90 @@ -55,7 +55,9 @@ subroutine map_file_read(this, data) class(*), target, intent(inout) :: data type(map_t), pointer :: nm integer :: j, k, neli, nnzi, ierr - + + call this%check_exists() + select type(data) type is (map_t) nm => data @@ -71,13 +73,13 @@ subroutine map_file_read(this, data) read(10, *) neli, nnzi !> @todo Check if neli matches map%nel - + do j = 1, nm%nel read(10, *) nm%imap(j),(nm%vertex(k, j), k=1,nm%nlv) end do - + close(unit=10) - + end subroutine map_file_read subroutine map_file_write(this, data, t) @@ -86,5 +88,5 @@ subroutine map_file_write(this, data, t) real(kind=rp), intent(in), optional :: t call neko_error("Not implemented yet!") end subroutine map_file_write - + end module map_file diff --git a/src/io/mean_flow_output.f90 b/src/io/mean_flow_output.f90 index 97508241354..e3dd8f7f970 100644 --- a/src/io/mean_flow_output.f90 +++ b/src/io/mean_flow_output.f90 @@ -34,7 +34,7 @@ module mean_flow_output use mean_flow, only : mean_flow_t use num_types - use device + use device use output implicit none private @@ -51,7 +51,7 @@ module mean_flow_output end interface mean_flow_output_t contains - + function mean_flow_output_init(mf, T_begin, name, path) result(this) type(mean_flow_t), intent(in), target ::mf real(kind=rp), intent(in) :: T_begin @@ -70,7 +70,7 @@ function mean_flow_output_init(mf, T_begin, name, path) result(this) fname = 'mean_field.fld' end if - call output_init(this, fname) + call this%init_base(fname) this%mf => mf this%T_begin = T_begin end function mean_flow_output_init @@ -81,16 +81,20 @@ subroutine mean_flow_output_sample(this, t) real(kind=rp), intent(in) :: t if (t .ge. this%T_begin) then - call device_memcpy(this%mf%p%mf%x, this%mf%p%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST) - call device_memcpy(this%mf%u%mf%x, this%mf%u%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST) - call device_memcpy(this%mf%v%mf%x, this%mf%v%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST) - call device_memcpy(this%mf%w%mf%x, this%mf%w%mf%x_d, this%mf%p%mf%dof%size(), DEVICE_TO_HOST, sync=.true.) + call device_memcpy(this%mf%p%mf%x, this%mf%p%mf%x_d, this%mf%p%mf%dof%size(), & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%mf%u%mf%x, this%mf%u%mf%x_d, this%mf%p%mf%dof%size(), & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%mf%v%mf%x, this%mf%v%mf%x_d, this%mf%p%mf%dof%size(), & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%mf%w%mf%x, this%mf%w%mf%x_d, this%mf%p%mf%dof%size(), & + DEVICE_TO_HOST, sync=.true.) call this%file_%write(this%mf, t) call this%mf%reset() end if end subroutine mean_flow_output_sample - + end module mean_flow_output diff --git a/src/io/mean_sqr_flow_output.f90 b/src/io/mean_sqr_flow_output.f90 index 8ea02626a95..04e9492529a 100644 --- a/src/io/mean_sqr_flow_output.f90 +++ b/src/io/mean_sqr_flow_output.f90 @@ -50,7 +50,7 @@ module mean_sqr_flow_output end interface mean_sqr_flow_output_t contains - + function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this) type(mean_sqr_flow_t), intent(in), target ::msqrf real(kind=rp), intent(in) :: T_begin @@ -58,7 +58,7 @@ function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this) character(len=*), intent(in), optional :: path type(mean_sqr_flow_output_t) :: this character(len=1024) :: fname - + if (present(name) .and. present(path)) then fname = trim(path) // trim(name) // '.fld' else if (present(name)) then @@ -69,7 +69,7 @@ function mean_sqr_flow_output_init(msqrf, T_begin, name, path) result(this) fname = 'mean_sqr_field.fld' end if - call output_init(this, fname) + call this%init_base(fname) this%msqrf => msqrf this%T_begin = T_begin end function mean_sqr_flow_output_init @@ -84,7 +84,7 @@ subroutine mean_sqr_flow_output_sample(this, t) end if end subroutine mean_sqr_flow_output_sample - + end module mean_sqr_flow_output diff --git a/src/io/nek_adios2.cpp b/src/io/nek_adios2.cpp index 9c5d0b2c92d..451ec534494 100644 --- a/src/io/nek_adios2.cpp +++ b/src/io/nek_adios2.cpp @@ -3,358 +3,91 @@ #include #include +// Global Adios2 variables adios2::ADIOS adios; -adios2::IO io; -adios2::IO ior; -adios2::IO io_head; adios2::IO io_asynchronous; -adios2::Engine writer; -adios2::Engine writer_head; -adios2::Engine readr; adios2::Engine writer_st; -adios2::Variable x; -adios2::Variable y; -adios2::Variable z; -adios2::Variable lglelw; -adios2::Variable lglelw_st; -adios2::Variable p; -adios2::Variable vx; -adios2::Variable vy; -adios2::Variable vz; -adios2::Variable bm1; -adios2::Variable p_st; -adios2::Variable vx_st; -adios2::Variable vy_st; -adios2::Variable vz_st; -adios2::Variable bm1_st; -adios2::Variable vxr; -adios2::Variable vyr; -adios2::Variable vzr; -adios2::Variable prr; -adios2::Variable lglelr; -std::vector vVXr; -std::vector vVYr; -std::vector vVZr; -std::vector vVPrr; -std::vector vVLGLELr; -// adios2::Variable t; -double dataTime = 0.0; -std::clock_t startT; -std::clock_t elapsedT; +adios2::Engine reader_st; +adios2::Variable f2py_field; +adios2::Variable py2f_field; +// Global C variables int rank, size; -int ifile; -int ifilew; -int ifstream; -int decide_stream_global; - - -extern "C" void adios2_setup_( - const int *nval, - const int *nelvin, - const int *nelb, - const int *nelgv, - const int *nelgt, - const double *xml, - const double *yml, - const double *zml, - const int *if_asynchronous, +int reader_start; +int reader_count; + +extern "C" void adios2_initialize_( + const int *lxyz, + const int *nelv, + const int *offset_el, + const int *glb_nelv, + const int *gdim, const int *comm_int ){ - std::string configFile="adios2_config/config.xml"; MPI_Comm comm = MPI_Comm_f2c(*comm_int); - adios = adios2::ADIOS(configFile, comm); + adios = adios2::ADIOS(comm); MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); - // Compressor writer. - io = adios.DeclareIO("writer"); - // Mesh writer. - io_head = adios.DeclareIO("writer0"); - // Asynchronous writer. - io_asynchronous = adios.DeclareIO("writerISMPI"); - // Compressor reader - ior = adios.DeclareIO("inputReader"); + // Asynchronous IO. + io_asynchronous = adios.DeclareIO("streamIO"); + io_asynchronous.SetEngine("SST"); - // Determine if asyncrhonous operation will be needed for this set up - unsigned int decide_stream = static_cast((*if_asynchronous)); - decide_stream_global = decide_stream; - // Number of elements in my rank. - unsigned int nelv = static_cast((*nelvin)); - + unsigned int nel = static_cast((*nelv)); // Determine where my rank writes in the global array according to number of element in previous ranks - unsigned int start = static_cast(*nelb); - start *= static_cast(*nval); - + unsigned int start = static_cast(*offset_el); + start *= static_cast(*lxyz); // n is count, i.e number of entries in the array in my rank - unsigned int n = static_cast (*nval) * nelv; + unsigned int n = static_cast (*lxyz) * nel; // gn is the total size of the arrays, not per io rank - unsigned int gn = static_cast((*nelgv)*(*nval)); - std::cout << rank << ": " << gn << ", " << start << "," << n << std::endl; - - // Create the adios2 variables for writer that depend on the current start and n - p = io.DefineVariable("P_OUT", {gn}, {start}, {n}); - vx = io.DefineVariable("VX_OUT", {gn}, {start}, {n}); - vy = io.DefineVariable("VY_OUT", {gn}, {start}, {n}); - vz = io.DefineVariable("VZ_OUT", {gn}, {start}, {n}); - bm1 = io.DefineVariable("BM1_OUT", {gn}, {start}, {n}); - - // Create the adios2 variables for writer0 - x = io_head.DefineVariable("X", {gn}, {start}, {n}); - y = io_head.DefineVariable("Y", {gn}, {start}, {n}); - z = io_head.DefineVariable("Z", {gn}, {start}, {n}); + unsigned int gn = static_cast((*glb_nelv)*(*lxyz)); + + // Assign to global variables + reader_start = start; + reader_count = n; // If the process is asynchronous, define the relevant variables for writer_st - if (decide_stream == 1){ - p_st = io_asynchronous.DefineVariable("P", {gn}, {start}, {n}); - vx_st = io_asynchronous.DefineVariable("VX", {gn}, {start}, {n}); - vy_st = io_asynchronous.DefineVariable("VY", {gn}, {start}, {n}); - vz_st = io_asynchronous.DefineVariable("VZ", {gn}, {start}, {n}); - bm1_st = io_asynchronous.DefineVariable("BM1", {gn}, {start}, {n}); - } - - - // Do everything again for the global indices - nelv = static_cast((*nelvin)); - start = static_cast(*nelb); - n = static_cast (nelv); - gn = static_cast((*nelgv)); - // Define variable for compression writer - lglelw = io.DefineVariable("LGLEL_OUT", {gn}, {start}, {n}); - // Define variable for asyncrhonous writet - if (decide_stream == 1){ - lglelw_st = io_asynchronous.DefineVariable("LGLEL", {gn}, {start}, {n}); - } - - // Write the mesh information only once (Currently commented out). - //writer_head = io_head.Open("geo.bp", adios2::Mode::Write); - //writer_head.Put(x, xml); - //writer_head.Put(y, yml); - //writer_head.Put(z, yml); - //writer_head.Close(); - //if(!rank) - //std::cout << "geo.bp written" << std::endl; + f2py_field = io_asynchronous.DefineVariable("f2py_field", {gn}, {start}, {n}); // If asyncrhonous execution, open the global array - if (decide_stream == 1){ - std::cout << "create global array" << std::endl; - writer_st = io_asynchronous.Open("globalArray", adios2::Mode::Write); - } + std::cout << "create global array" << std::endl; + writer_st = io_asynchronous.Open("globalArray_f2py", adios2::Mode::Write); + reader_st = io_asynchronous.Open("globalArray_py2f", adios2::Mode::Read); - // Initialize global variables for writing. This could be done in global definition - ifile = 0 ; - ifilew = 0 ; + // Put necesary information in a header stream + writer_st.BeginStep(); + adios2::Variable hdr_elems = io_asynchronous.DefineVariable("global_elements"); + adios2::Variable hdr_lxyz = io_asynchronous.DefineVariable("points_per_element"); + adios2::Variable hdr_gdim = io_asynchronous.DefineVariable("problem_dimension"); + if( rank == 0 ) + { + writer_st.Put(hdr_elems, static_cast (*glb_nelv)); + writer_st.Put(hdr_lxyz, static_cast (*lxyz)); + writer_st.Put(hdr_gdim, static_cast (*gdim)); + } + writer_st.EndStep(); } -extern "C" void adios2_update_( - const int *lglel, - const double *pr, - const double *u, - const double *v, - const double *w, - const double *temp -){ - startT = std::clock(); - ifilew=ifilew+1; - std:: string fileName= "out.f0000"+ std::to_string(ifilew) +".bp"; - writer = io.Open(fileName, adios2::Mode::Write); - // Begin a step of the writer - writer.BeginStep(); - writer.Put(p, pr); - writer.Put(vx, u); - writer.Put(vy, v); - writer.Put(vz, w); - writer.Put(lglelw, lglel); - writer.EndStep(); - writer.Close(); - dataTime += (std::clock() - startT) / (double) CLOCKS_PER_SEC; +extern "C" void adios2_finalize_(){ + std::cout << "Close global arrays" << std::endl; + writer_st.Close(); + reader_st.Close(); + } extern "C" void adios2_stream_( - const int *lglel, - const double *pr, - const double *u, - const double *v, - const double *w, - const double *mass1, - const double *temp + const double *field ){ - startT = std::clock(); - // Begin a step of the writer writer_st.BeginStep(); - writer_st.Put(p_st, pr); - writer_st.Put(vx_st, u); - writer_st.Put(vy_st, v); - writer_st.Put(vz_st, w); - writer_st.Put(bm1_st, mass1); - writer_st.Put(lglelw_st, lglel); + writer_st.Put(f2py_field, field); writer_st.EndStep(); - dataTime += (std::clock() - startT) / (double) CLOCKS_PER_SEC; -} -extern "C" void adios2_read_( - int *lglelrr, - double *pr, - double *v, - double *u, - double *w, - const int *nval, - const int *nelvin, - const int *nelb, - const int *nelgv, - const int *nelgt, - const int *comm_int, - char *fname -){ - startT = std::clock(); - std::string configFile="adios2_config/config.xml"; - MPI_Comm comm = MPI_Comm_f2c(*comm_int); - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - - // See how much of the file my rank needs to read - unsigned int nelv = static_cast((*nelvin)); - unsigned int start = static_cast(*nelb); - start *= static_cast(*nval); - unsigned int n = static_cast (*nval) * nelv; - unsigned int gn = static_cast((*nelgv)*(*nval)); - - // See how much of the file my rank needs to read for the global indices - unsigned int nelv3 = static_cast((*nelvin)); - unsigned int start3 = static_cast(*nelb); - unsigned int n3 = static_cast (nelv); - unsigned int gn3 = static_cast((*nelgv)); - - // Write some debuffing info - if (rank==0){ - std::cout << "what adios is getting:"<< std::endl; - std::cout << "nvals:"<< static_cast (*nval) << std::endl; - std::cout << "nelgv:"<< static_cast (*nelgv) << std::endl; - std::cout << "nelgt:"<< static_cast (*nelgt) << std::endl; - - std::cout << "what adios is calculating:"<< std::endl; - std::cout << "nelv:"<< nelv << std::endl; - std::cout << "start:"<< start << std::endl; - std::cout << "count:"<< n << std::endl; - std::cout << "total number of entries:"<< gn << std::endl; - } - - - // Read the file with factory name - ifile=ifile+1; - // Make a new name every time the reader is called - std:: string fileName= "out.f0000"+ std::to_string(ifile) +".bp"; - startT = std::clock(); - readr = ior.Open(fileName,adios2::Mode::Read); - - int step = 1; - bool firstStep = true; - std::cout << "CAREFULL, YOU ARE SAYING EVERY STEP IS THE FIRST, SO IT ALLOCATES EVERY TIME" << std::endl; - - // Check if the variables that we are requesting are in the file - vxr = ior.InquireVariable("VX_OUT"); - if (!vxr) - { - std::cout << "Error: NO variable VX_OUT found. Unable to proceed. " << std::endl; - } - - vyr = ior.InquireVariable("VY_OUT"); - if (!vyr) - { - std::cout << "Error: NO variable VY_OUT found. Unable to proceed. " << std::endl; - } - - vzr = ior.InquireVariable("VZ_OUT"); - if (!vzr) - { - std::cout << "Error: NO variable VZ_OUT found. Unable to proceed. " << std::endl; - } - - prr = ior.InquireVariable("P_OUT"); - if (!prr) - { - std::cout << "Error: NO variable P_OUT found. Unable to proceed. " << std::endl; - } - lglelr = ior.InquireVariable("LGLEL_OUT"); - if (!lglelr) - { - std::cout << "Error: NO variable LGLEL_OUT found. Unable to proceed. " << std::endl; - } - - elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC; - std::cout << "rank: " << rank << " open and inquire: " << elapsedT << "s." << std::endl; - startT = std::clock(); - // Allocate variables if it is the first step - if (firstStep) - { - readr.LockReaderSelections(); - vVXr.resize(n); - vVYr.resize(n); - vVZr.resize(n); - vVPrr.resize(n); - vVLGLELr.resize(n3); - firstStep=false; - } - - elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC; - std::cout << "rank: " << rank << " resize vector: " << elapsedT << "s." << std::endl; - startT = std::clock(); - - // Select in the adios file the sections to read - vxr.SetSelection({{start}, {n}}); - vyr.SetSelection({{start}, {n}}); - vzr.SetSelection({{start}, {n}}); - prr.SetSelection({{start}, {n}}); - lglelr.SetSelection({{start3}, {n3}}); - - elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC; - std::cout << "rank: " << rank << " set selection: " << elapsedT << "s." << std::endl; - startT = std::clock(); - - // get the data that has been selected - readr.Get(vxr,vVXr.data()); - readr.Get(vyr,vVYr.data()); - readr.Get(vzr,vVZr.data()); - readr.Get(prr,vVPrr.data()); - readr.Get(lglelr,vVLGLELr.data()); - - elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC; - std::cout << "rank: " << rank << " get data: " << elapsedT << "s." << std::endl; - startT = std::clock(); - - // Close the reader - readr.Close(); - - elapsedT = (std::clock() - startT) / (double) CLOCKS_PER_SEC; - std::cout << "rank: " << rank << " Close: " << elapsedT << "s." << std::endl; - startT = std::clock(); - - //copy the data from the buffer to the array that is used in nek/neko - if (rank==0){ - std::cout <<"Copying data into nek vector " << std::endl; - } - - for (int i=0; i("py2f_field"); + py2f_field.SetSelection({{reader_start}, {reader_count}}); + reader_st.Get(py2f_field, field); + reader_st.EndStep(); } diff --git a/src/io/nmsh_file.f90 b/src/io/nmsh_file.f90 index cc83ba8a5e5..48e5f765f36 100644 --- a/src/io/nmsh_file.f90 +++ b/src/io/nmsh_file.f90 @@ -45,11 +45,11 @@ module nmsh_file use mpi_f08 use logger implicit none - + private !> Specifices the maximum number of elements any rank is allowed to write (for nmsh). !! Needed in order to generate large meshes where an individual write might exceed 2GB. - integer, parameter :: max_write_nel = 8000000 + integer, parameter :: max_write_nel = 8000000 !> Interface for Neko nmsh files type, public, extends(generic_file_t) :: nmsh_file_t contains @@ -62,7 +62,7 @@ module nmsh_file !> Load a mesh from a binary Neko nmsh file subroutine nmsh_file_read(this, data) class(nmsh_file_t) :: this - class(*), target, intent(inout) :: data + class(*), target, intent(inout) :: data type(nmsh_hex_t), allocatable :: nmsh_hex(:) type(nmsh_quad_t), allocatable :: nmsh_quad(:) type(nmsh_zone_t), allocatable :: nmsh_zone(:) @@ -75,10 +75,12 @@ subroutine nmsh_file_read(this, data) integer :: nmsh_quad_size, nmsh_hex_size, nmsh_zone_size integer :: nelv, gdim, nzones, ncurves integer :: el_idx - type(point_t) :: p(8) + type(point_t), target :: p(8) type(linear_dist_t) :: dist character(len=LOG_SIZE) :: log_buf + call this%check_exists() + select type(data) type is(mesh_t) msh => data @@ -96,155 +98,155 @@ subroutine nmsh_file_read(this, data) call MPI_File_open(NEKO_COMM, trim(this%fname), & MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr) - + if (ierr > 0) then call neko_error('Could not open the mesh file ' // this%fname // & 'for reading!') - end if + end if call MPI_File_read_all(fh, nelv, 1, MPI_INTEGER, status, ierr) call MPI_File_read_all(fh, gdim, 1, MPI_INTEGER, status, ierr) write(log_buf,1) gdim, nelv -1 format('gdim = ', i1, ', nelements =', i9) +1 format('gdim = ', i1, ', nelements =', i9) call neko_log%message(log_buf) if (gdim .eq. 2) then call MPI_File_close(fh, ierr) call nmsh_file_read_2d(this, msh) else - - dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM) - nelv = dist%num_local() - element_offset = dist%start_idx() - - call msh%init(gdim, nelv) - - call neko_log%message('Reading elements') - if (msh%gdim .eq. 2) then - allocate(nmsh_quad(msh%nelv)) - mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8) - call MPI_File_read_at_all(fh, mpi_offset, & + dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM) + nelv = dist%num_local() + element_offset = dist%start_idx() + + call msh%init(gdim, nelv) + + call neko_log%message('Reading elements') + + if (msh%gdim .eq. 2) then + allocate(nmsh_quad(msh%nelv)) + mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8) + call MPI_File_read_at_all(fh, mpi_offset, & nmsh_quad, msh%nelv, MPI_NMSH_QUAD, status, ierr) - do i = 1, nelv - do j = 1, 4 - p(j) = point_t(nmsh_quad(i)%v(j)%v_xyz, nmsh_quad(i)%v(j)%v_idx) + do i = 1, nelv + do j = 1, 4 + p(j) = point_t(nmsh_quad(i)%v(j)%v_xyz, nmsh_quad(i)%v(j)%v_idx) + end do + ! swap vertices to keep symmetric vertex numbering in neko + call msh%add_element(i, p(1), p(2), p(4), p(3)) end do - ! swap vertices to keep symmetric vertex numbering in neko - call msh%add_element(i, p(1), p(2), p(4), p(3)) - end do - deallocate(nmsh_quad) - mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_quad_size,i8) - else if (msh%gdim .eq. 3) then - allocate(nmsh_hex(msh%nelv)) - mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_hex_size,i8) - call MPI_File_read_at_all(fh, mpi_offset, & + deallocate(nmsh_quad) + mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_quad_size,i8) + else if (msh%gdim .eq. 3) then + allocate(nmsh_hex(msh%nelv)) + mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_hex_size,i8) + call MPI_File_read_at_all(fh, mpi_offset, & nmsh_hex, msh%nelv, MPI_NMSH_HEX, status, ierr) - do i = 1, nelv - do j = 1, 8 - p(j) = point_t(nmsh_hex(i)%v(j)%v_xyz, nmsh_hex(i)%v(j)%v_idx) - end do - ! swap vertices to keep symmetric vertex numbering in neko - call msh%add_element(i, & + do i = 1, nelv + do j = 1, 8 + p(j) = point_t(nmsh_hex(i)%v(j)%v_xyz, nmsh_hex(i)%v(j)%v_idx) + end do + ! swap vertices to keep symmetric vertex numbering in neko + call msh%add_element(i, & p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7)) - end do - deallocate(nmsh_hex) - mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_hex_size,i8) - else - if (pe_rank .eq. 0) call neko_error('Invalid dimension of mesh') - end if - call neko_log%message('Reading BC/zone data') - - mpi_offset = mpi_el_offset - call MPI_File_read_at_all(fh, mpi_offset, & - nzones, 1, MPI_INTEGER, status, ierr) - if (nzones .gt. 0) then - allocate(nmsh_zone(nzones)) - - !> - !!@todo Fix the parallel reading in this part, let each rank read - !!a piece and pass the pieces around, filtering out matching zones - !!in the local mesh. - !! - mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + end do + deallocate(nmsh_hex) + mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(dist%num_global(),i8) * int(nmsh_hex_size,i8) + else + if (pe_rank .eq. 0) call neko_error('Invalid dimension of mesh') + end if + call neko_log%message('Reading BC/zone data') + + mpi_offset = mpi_el_offset call MPI_File_read_at_all(fh, mpi_offset, & + nzones, 1, MPI_INTEGER, status, ierr) + if (nzones .gt. 0) then + allocate(nmsh_zone(nzones)) + + !> + !!@todo Fix the parallel reading in this part, let each rank read + !!a piece and pass the pieces around, filtering out matching zones + !!in the local mesh. + !! + mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + call MPI_File_read_at_all(fh, mpi_offset, & nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr) - - do i = 1, nzones - el_idx = nmsh_zone(i)%e - if (el_idx .gt. msh%offset_el .and. & - el_idx .le. msh%offset_el + msh%nelv) then - el_idx = el_idx - msh%offset_el - select case(nmsh_zone(i)%type) - case(1) - call msh%mark_wall_facet(nmsh_zone(i)%f, el_idx) - case(2) - call msh%mark_inlet_facet(nmsh_zone(i)%f, el_idx) - case(3) - call msh%mark_outlet_facet(nmsh_zone(i)%f, el_idx) - case(4) - call msh%mark_sympln_facet(nmsh_zone(i)%f, el_idx) - case(5) - call msh%mark_periodic_facet(nmsh_zone(i)%f, el_idx, & + + do i = 1, nzones + el_idx = nmsh_zone(i)%e + if (el_idx .gt. msh%offset_el .and. & + el_idx .le. msh%offset_el + msh%nelv) then + el_idx = el_idx - msh%offset_el + select case(nmsh_zone(i)%type) + case(1) + call msh%mark_wall_facet(nmsh_zone(i)%f, el_idx) + case(2) + call msh%mark_inlet_facet(nmsh_zone(i)%f, el_idx) + case(3) + call msh%mark_outlet_facet(nmsh_zone(i)%f, el_idx) + case(4) + call msh%mark_sympln_facet(nmsh_zone(i)%f, el_idx) + case(5) + call msh%mark_periodic_facet(nmsh_zone(i)%f, el_idx, & nmsh_zone(i)%p_f, nmsh_zone(i)%p_e, nmsh_zone(i)%glb_pt_ids) - case(6) - call msh%mark_outlet_normal_facet(nmsh_zone(i)%f, el_idx) - case(7) - call msh%mark_labeled_facet(nmsh_zone(i)%f, el_idx,nmsh_zone(i)%p_f) - end select - end if - end do - !Apply facets, important that marking is finished - do i = 1, nzones - el_idx = nmsh_zone(i)%e - if (el_idx .gt. msh%offset_el .and. & - el_idx .le. msh%offset_el + msh%nelv) then - el_idx = el_idx - msh%offset_el - select case(nmsh_zone(i)%type) - case(5) - call msh%apply_periodic_facet(nmsh_zone(i)%f, el_idx, & + case(6) + call msh%mark_outlet_normal_facet(nmsh_zone(i)%f, el_idx) + case(7) + call msh%mark_labeled_facet(nmsh_zone(i)%f, el_idx,nmsh_zone(i)%p_f) + end select + end if + end do + !Apply facets, important that marking is finished + do i = 1, nzones + el_idx = nmsh_zone(i)%e + if (el_idx .gt. msh%offset_el .and. & + el_idx .le. msh%offset_el + msh%nelv) then + el_idx = el_idx - msh%offset_el + select case(nmsh_zone(i)%type) + case(5) + call msh%apply_periodic_facet(nmsh_zone(i)%f, el_idx, & nmsh_zone(i)%p_f, nmsh_zone(i)%p_e, nmsh_zone(i)%glb_pt_ids) - end select - end if - end do + end select + end if + end do - deallocate(nmsh_zone) - end if - call neko_log%message('Reading deformation data') + deallocate(nmsh_zone) + end if + call neko_log%message('Reading deformation data') - mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) - call MPI_File_read_at_all(fh, mpi_offset, & + mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) + call MPI_File_read_at_all(fh, mpi_offset, & ncurves, 1, MPI_INTEGER, status, ierr) - if (ncurves .gt. 0) then - - allocate(nmsh_curve(ncurves)) - mpi_offset = mpi_el_offset + int(2 * MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) - call MPI_File_read_at_all(fh, mpi_offset, & + if (ncurves .gt. 0) then + + allocate(nmsh_curve(ncurves)) + mpi_offset = mpi_el_offset + int(2 * MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) + call MPI_File_read_at_all(fh, mpi_offset, & nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr) - - do i = 1, ncurves - el_idx = nmsh_curve(i)%e - msh%offset_el - if (el_idx .gt. 0 .and. & - el_idx .le. msh%nelv) then - call msh%mark_curve_element(el_idx, & + + do i = 1, ncurves + el_idx = nmsh_curve(i)%e - msh%offset_el + if (el_idx .gt. 0 .and. & + el_idx .le. msh%nelv) then + call msh%mark_curve_element(el_idx, & nmsh_curve(i)%curve_data, nmsh_curve(i)%type) - end if - - end do - - deallocate(nmsh_curve) - end if + end if - call MPI_File_close(fh, ierr) - call neko_log%message('Mesh read, setting up connectivity') + end do - call msh%finalize() - call neko_log%message('Done setting up mesh and connectivity') - - call neko_log%end_section() + deallocate(nmsh_curve) + end if + + call MPI_File_close(fh, ierr) + call neko_log%message('Mesh read, setting up connectivity') + + call msh%finalize() + call neko_log%message('Done setting up mesh and connectivity') + + call neko_log%end_section() end if - + end subroutine nmsh_file_read !> Load a mesh from a binary Neko nmsh file @@ -280,16 +282,16 @@ subroutine nmsh_file_read_2d(this, msh) call MPI_File_read_all(fh, gdim, 1, MPI_INTEGER, status, ierr) write(log_buf,2) gdim -2 format('gdim = ', i1, ', no full 2d support, creating thin slab') +2 format('gdim = ', i1, ', no full 2d support, creating thin slab') call neko_log%message(log_buf) gdim = 3 - + dist = linear_dist_t(nelv, pe_rank, pe_size, NEKO_COMM) nelv = dist%num_local() element_offset = dist%start_idx() - + call msh%init(gdim, nelv) - + allocate(nmsh_quad(msh%nelv)) mpi_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(element_offset,i8) * int(nmsh_quad_size,i8) call MPI_File_read_at_all(fh, mpi_offset, & @@ -323,15 +325,15 @@ subroutine nmsh_file_read_2d(this, msh) !!@todo Fix the parallel reading in this part, let each rank read !!a piece and pass the pieces around, filtering out matching zones !!in the local mesh. - !! + !! mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) call MPI_File_read_at_all(fh, mpi_offset, & nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr) - + do i = 1, nzones el_idx = nmsh_zone(i)%e if (el_idx .gt. msh%offset_el .and. & - el_idx .le. msh%offset_el + msh%nelv) then + el_idx .le. msh%offset_el + msh%nelv) then el_idx = el_idx - msh%offset_el select case(nmsh_zone(i)%type) case(1) @@ -370,7 +372,7 @@ subroutine nmsh_file_read_2d(this, msh) do i = 1, nzones el_idx = nmsh_zone(i)%e if (el_idx .gt. msh%offset_el .and. & - el_idx .le. msh%offset_el + msh%nelv) then + el_idx .le. msh%offset_el + msh%nelv) then el_idx = el_idx - msh%offset_el select case(nmsh_zone(i)%type) case(5) @@ -396,7 +398,7 @@ subroutine nmsh_file_read_2d(this, msh) call msh%apply_periodic_facet(5, el_idx, & 6, el_idx, glb_pt_ids%x) end do - + deallocate(nmsh_zone) end if @@ -406,32 +408,32 @@ subroutine nmsh_file_read_2d(this, msh) ncurves, 1, MPI_INTEGER, status, ierr) if (ncurves .gt. 0) then - + allocate(nmsh_curve(ncurves)) mpi_offset = mpi_el_offset + & int(2*MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) call MPI_File_read_at_all(fh, mpi_offset, & nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr) - - do i = 1, ncurves + + do i = 1, ncurves el_idx = nmsh_curve(i)%e - msh%offset_el if (el_idx .gt. 0 .and. & - el_idx .le. msh%nelv) then + el_idx .le. msh%nelv) then call msh%mark_curve_element(el_idx, & nmsh_curve(i)%curve_data, nmsh_curve(i)%type) end if - + end do - + deallocate(nmsh_curve) end if call MPI_File_close(fh, ierr) call msh%finalize() - + call neko_log%end_section() - + end subroutine nmsh_file_read_2d @@ -450,10 +452,10 @@ subroutine nmsh_file_write(this, data, t) integer (kind=MPI_OFFSET_KIND) :: mpi_offset, mpi_el_offset integer :: i, j, ierr, nelgv, element_offset, k integer :: nmsh_quad_size, nmsh_hex_size, nmsh_zone_size, nmsh_curve_size - integer :: nzones, ncurves + integer :: nzones, ncurves class(element_t), pointer :: ep integer(i4), dimension(8), parameter :: vcyc_to_sym = (/1, 2, 4, 3, 5, & - & 6, 8, 7/) ! cyclic to symmetric vertex mapping + & 6, 8, 7/) ! cyclic to symmetric vertex mapping select type(data) type is (mesh_t) @@ -484,7 +486,7 @@ subroutine nmsh_file_write(this, data, t) call msh%reset_periodic_ids() if (msh%gdim .eq. 2) then - allocate(nmsh_quad(msh%nelv)) + allocate(nmsh_quad(msh%nelv)) do i = 1, msh%nelv ep => msh%elements(i)%e nmsh_quad(i)%el_idx = ep%id() @@ -499,7 +501,7 @@ subroutine nmsh_file_write(this, data, t) deallocate(nmsh_quad) mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(nelgv,i8) * int(nmsh_quad_size,i8) else if (msh%gdim .eq. 3) then - allocate(nmsh_hex(msh%nelv)) + allocate(nmsh_hex(msh%nelv)) do i = 1, msh%nelv ep => msh%elements(i)%e nmsh_hex(i)%el_idx = ep%id() @@ -516,14 +518,14 @@ subroutine nmsh_file_write(this, data, t) call MPI_File_write_at_all(fh, mpi_offset, & nmsh_HEX(i*max_write_nel+1), min(msh%nelv-i*max_write_nel,max_write_nel), MPI_NMSH_HEX, status, ierr) end do - deallocate(nmsh_hex) + deallocate(nmsh_hex) mpi_el_offset = int(2 * MPI_INTEGER_SIZE,i8) + int(nelgv,i8) * int(nmsh_hex_size,i8) - else + else call neko_error('Invalid dimension of mesh') end if nzones = msh%wall%size + msh%inlet%size + msh%outlet%size + & - msh%sympln%size + msh%periodic%size + msh%outlet_normal%size + msh%sympln%size + msh%periodic%size + msh%outlet_normal%size do i = 1, NEKO_MSH_MAX_ZLBLS nzones = nzones + msh%labeled_zones(i)%size @@ -534,9 +536,9 @@ subroutine nmsh_file_write(this, data, t) if (nzones .gt. 0) then allocate(nmsh_zone(nzones)) - + nmsh_zone(:)%type = 0 - + j = 1 do i = 1, msh%wall%size nmsh_zone(j)%e = msh%wall%facet_el(i)%x(2) + msh%offset_el @@ -590,16 +592,16 @@ subroutine nmsh_file_write(this, data, t) j = j + 1 end do end do - - + + mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) call MPI_File_write_at_all(fh, mpi_offset, & nmsh_zone, nzones, MPI_NMSH_ZONE, status, ierr) - + deallocate(nmsh_zone) end if - - ncurves = msh%curve%size + + ncurves = msh%curve%size mpi_offset = mpi_el_offset + int(MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) call MPI_File_write_at_all(fh, mpi_offset, & @@ -610,7 +612,7 @@ subroutine nmsh_file_write(this, data, t) do i = 1, ncurves nmsh_curve(i)%type = 0 end do - + do i = 1, ncurves nmsh_curve(i)%e = msh%curve%curve_el(i)%el_idx + msh%offset_el nmsh_curve(i)%curve_data = msh%curve%curve_el(i)%curve_data @@ -619,15 +621,15 @@ subroutine nmsh_file_write(this, data, t) mpi_offset = mpi_el_offset + int(2*MPI_INTEGER_SIZE,i8) + int(nzones,i8)*int(nmsh_zone_size,i8) call MPI_File_write_at_all(fh, mpi_offset, & nmsh_curve, ncurves, MPI_NMSH_CURVE, status, ierr) - + deallocate(nmsh_curve) end if - + call MPI_File_sync(fh, ierr) call MPI_File_close(fh, ierr) call neko_log%message('Done') end subroutine nmsh_file_write - + end module nmsh_file - + diff --git a/src/io/output.f90 b/src/io/output.f90 index a0284131cd8..41f8d94a671 100644 --- a/src/io/output.f90 +++ b/src/io/output.f90 @@ -35,12 +35,13 @@ module output use num_types, only : rp use file, only : file_t implicit none + private !> Abstract type defining an output type type, public, abstract :: output_t type(file_t) :: file_ contains - procedure, pass(this) :: init => output_init + procedure, pass(this) :: init_base => output_init procedure, pass(this) :: set_counter => output_set_counter procedure, pass(this) :: set_start_counter => output_set_start_counter procedure(output_sample), pass(this), deferred :: sample @@ -58,13 +59,20 @@ end subroutine output_sample contains - !> Output constructor - subroutine output_init(this, fname) + !> Output constructor. + !! @param fname Name of the output file. + !! @param precision Output precision (sp or dp). + subroutine output_init(this, fname, precision) class(output_t), intent(inout) :: this character(len=*), intent(inout) :: fname + integer, intent(in), optional :: precision + + if (present(precision)) then + this%file_ = file_t(fname, precision=precision) + else + this%file_ = file_t(fname) + end if - this%file_ = file_t(fname) - end subroutine output_init !> Update the output's file counter @@ -73,13 +81,13 @@ subroutine output_set_counter(this, n) integer, intent(in) :: n call this%file_%set_counter(n) end subroutine output_set_counter - + !> Update the start of output's file counter subroutine output_set_start_counter(this, n) class(output_t), intent(inout) :: this integer, intent(in) :: n call this%file_%set_start_counter(n) end subroutine output_set_start_counter - - + + end module output diff --git a/src/io/re2_file.f90 b/src/io/re2_file.f90 index 3f7933582f0..8a0077d904c 100644 --- a/src/io/re2_file.f90 +++ b/src/io/re2_file.f90 @@ -49,7 +49,7 @@ module re2_file use logger implicit none private - + !> Interface for NEKTON re2 files type, public, extends(generic_file_t) :: re2_file_t @@ -86,7 +86,9 @@ subroutine re2_file_read(this, data) integer :: re2_data_bc_size logical :: v2_format character(len=LOG_SIZE) :: log_buf - + + call this%check_exists() + select type(data) type is (mesh_t) msh => data @@ -120,7 +122,7 @@ subroutine re2_file_read(this, data) call MPI_Type_size(MPI_RE2V1_DATA_XY, re2_data_xy_size, ierr) call MPI_Type_size(MPI_RE2V1_DATA_XYZ, re2_data_xyz_size, ierr) call MPI_Type_size(MPI_RE2V1_DATA_CV, re2_data_cv_size, ierr) - call MPI_Type_size(MPI_RE2V1_DATA_BC, re2_data_bc_size, ierr) + call MPI_Type_size(MPI_RE2V1_DATA_BC, re2_data_bc_size, ierr) end if write(log_buf,1) ndim, nelv @@ -141,7 +143,7 @@ subroutine re2_file_read(this, data) call MPI_File_open(NEKO_COMM, trim(this%fname), & MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr) - + if (ierr .ne. 0) then call neko_log%error("Can't open binary NEKTON file ") end if @@ -155,15 +157,15 @@ subroutine re2_file_read(this, data) call MPI_File_read_at_all(fh, mpi_offset, test, 1, MPI_REAL, status, ierr) mpi_offset = mpi_offset + MPI_REAL_SIZE - + if (abs(RE2_ENDIAN_TEST - test) .gt. 1e-4) then call neko_error('Invalid endian of re2 file, byte swap not implemented yet') end if call re2_file_read_points(msh, ndim, nel, dist, fh, & mpi_offset, re2_data_xy_size, re2_data_xyz_size, v2_format) - - + + ! Set offset to start of curved side data mpi_offset = RE2_HDR_SIZE * MPI_CHARACTER_SIZE + MPI_REAL_SIZE if (ndim .eq. 2) then @@ -185,7 +187,7 @@ subroutine re2_file_read(this, data) mpi_offset = mpi_offset + MPI_DOUBLE_PRECISION_SIZE call re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) - else + else call MPI_File_read_at_all(fh, mpi_offset, ncurv, 1, MPI_INTEGER, status, ierr) mpi_offset = mpi_offset + MPI_INTEGER_SIZE call re2_file_read_curve(msh, ncurv, dist, fh, mpi_offset, v2_format) @@ -203,7 +205,7 @@ subroutine re2_file_read(this, data) call neko_log%message('Done') - + end subroutine re2_file_read subroutine re2_file_write(this, data, t) @@ -217,13 +219,13 @@ subroutine re2_file_write(this, data, t) character(len=54), parameter :: RE2_HDR_STR = 'RE2 exported by NEKO' integer :: i, j, ierr, nelgv type(MPI_Status) :: status - type(MPI_File) :: fh + type(MPI_File) :: fh integer (kind=MPI_OFFSET_KIND) :: mpi_offset integer :: element_offset integer :: re2_data_xy_size integer :: re2_data_xyz_size character(len=LOG_SIZE) :: log_buf - + select type(data) type is (mesh_t) msh => data @@ -243,7 +245,7 @@ subroutine re2_file_write(this, data, t) if (pe_rank .eq. 0) then open(unit=9,file=trim(this%fname), status='new', iostat=ierr) - write(9, '(a5,i9,i3,i9,a54)') RE2_HDR_VER, nelgv, msh%gdim,& + write(9, '(a5,i9,i3,i9,a54)') RE2_HDR_VER, nelgv, msh%gdim,& nelgv, RE2_HDR_STR close(9) end if @@ -252,7 +254,7 @@ subroutine re2_file_write(this, data, t) call MPI_File_open(NEKO_COMM, trim(this%fname), & MPI_MODE_WRONLY + MPI_MODE_CREATE, MPI_INFO_NULL, fh, ierr) mpi_offset = RE2_HDR_SIZE * MPI_CHARACTER_SIZE - + call MPI_File_write_at(fh, mpi_offset, RE2_ENDIAN_TEST, 1, & MPI_REAL, status, ierr) mpi_offset = mpi_offset + MPI_REAL_SIZE @@ -276,7 +278,7 @@ subroutine re2_file_write(this, data, t) allocate(re2_data_xyz(msh%nelv)) do i = 1, msh%nelv re2_data_xyz(i)%rgroup = 1.0 ! Not used - do j = 1, 8 + do j = 1, 8 re2_data_xyz(i)%x(j) = real(msh%elements(i)%e%pts(j)%p%x(1)) re2_data_xyz(i)%y(j) = real(msh%elements(i)%e%pts(j)%p%x(2)) re2_data_xyz(i)%z(j) = real(msh%elements(i)%e%pts(j)%p%x(3)) @@ -285,17 +287,17 @@ subroutine re2_file_write(this, data, t) mpi_offset = mpi_offset + element_offset * re2_data_xyz_size call MPI_File_write_at(fh, mpi_offset, & re2_data_xyz, msh%nelv, MPI_RE2V1_DATA_XYZ, status, ierr) - + deallocate(re2_data_xyz) else call neko_error("Invalid dimension of mesh") end if - + call MPI_FILE_close(fh, ierr) call neko_log%message('Done') !> @todo Add support for curved side data - + end subroutine re2_file_write subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & @@ -320,20 +322,20 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & integer :: pt_idx, nelv integer :: i, j, ierr - + nelv = dist%num_local() element_offset = dist%start_idx() call htp%init(2*nel, ndim) pt_idx = 0 if (ndim .eq. 2) then - mpi_offset = mpi_offset + element_offset * re2_data_xy_size + mpi_offset = mpi_offset + element_offset * re2_data_xy_size if (.not. v2_format) then allocate(re2v1_data_xy(nelv)) call MPI_File_read_at_all(fh, mpi_offset, & re2v1_data_xy, nelv, MPI_RE2V1_DATA_XY, status, ierr) do i = 1, nelv - do j = 1, 4 + do j = 1, 4 p(j) = point_t(real(re2v1_data_xy(i)%x(j),dp), & real(re2v1_data_xy(i)%y(j),dp), 0.0d0) call re2_file_add_point(htp, p(j), pt_idx) @@ -350,7 +352,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & call MPI_File_read_at_all(fh, mpi_offset, & re2v2_data_xy, nelv, MPI_RE2V2_DATA_XY, status, ierr) do i = 1, nelv - do j = 1, 4 + do j = 1, 4 p(j) = point_t(re2v2_data_xy(i)%x(j), & re2v2_data_xy(i)%y(j), 0.0d0) call re2_file_add_point(htp, p(j), pt_idx) @@ -370,7 +372,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & call MPI_File_read_at_all(fh, mpi_offset, & re2v1_data_xyz, nelv, MPI_RE2V1_DATA_XYZ, status, ierr) do i = 1, nelv - do j = 1, 8 + do j = 1, 8 p(j) = point_t(real(re2v1_data_xyz(i)%x(j),dp), & real(re2v1_data_xyz(i)%y(j),dp),& real(re2v1_data_xyz(i)%z(j),dp)) @@ -381,7 +383,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & end if ! swap vertices to keep symmetric vertex numbering in neko call msh%add_element(i, & - p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7)) + p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7)) end do deallocate(re2v1_data_xyz) else @@ -389,7 +391,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & call MPI_File_read_at_all(fh, mpi_offset, & re2v2_data_xyz, nelv, MPI_RE2V2_DATA_XYZ, status, ierr) do i = 1, nelv - do j = 1, 8 + do j = 1, 8 p(j) = point_t(re2v2_data_xyz(i)%x(j), & re2v2_data_xyz(i)%y(j),& re2v2_data_xyz(i)%z(j)) @@ -400,7 +402,7 @@ subroutine re2_file_read_points(msh, ndim, nel, dist, fh, & end if ! swap vertices to keep symmetric vertex numbering in neko call msh%add_element(i, & - p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7)) + p(1), p(2), p(4), p(3), p(5), p(6), p(8), p(7)) end do deallocate(re2v2_data_xyz) end if @@ -419,13 +421,13 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format) type(MPI_Status) :: status integer :: i, j, l, ierr, el_idx, id type(re2v1_curve_t), allocatable :: re2v1_data_curve(:) - type(re2v2_curve_t), allocatable :: re2v2_data_curve(:) + type(re2v2_curve_t), allocatable :: re2v2_data_curve(:) real(kind=dp), allocatable :: curve_data(:,:,:) integer, allocatable :: curve_type(:,:) logical, allocatable :: curve_element(:) character(len=1) :: chtemp logical :: curve_skip = .false. - + allocate(curve_data(5,12,msh%nelv)) allocate(curve_element(msh%nelv)) allocate(curve_type(12,msh%nelv)) @@ -438,7 +440,7 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format) end do end do end do - write(*,*) 'reading curved data' + write(*,*) 'reading curved data' if (.not. v2_format) then allocate(re2v1_data_curve(ncurve)) call MPI_File_read_at_all(fh, mpi_offset, re2v1_data_curve, ncurve, & @@ -454,39 +456,39 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format) el_idx = re2v2_data_curve(i)%elem - dist%start_idx() id = re2v2_data_curve(i)%zone chtemp = re2v2_data_curve(i)%type - do j = 1, 5 + do j = 1, 5 curve_data(j,id, el_idx) = re2v2_data_curve(i)%point(j) enddo - else + else el_idx = re2v1_data_curve(i)%elem - dist%start_idx() id = re2v1_data_curve(i)%zone chtemp = re2v1_data_curve(i)%type - do j = 1, 5 - curve_data(j,id, el_idx) = real(re2v1_data_curve(i)%point(j),dp) + do j = 1, 5 + curve_data(j,id, el_idx) = real(re2v1_data_curve(i)%point(j),dp) enddo end if - - curve_element(el_idx) = .true. + + curve_element(el_idx) = .true. !This might need to be extended select case(trim(chtemp)) case ('s') - curve_type(id,el_idx) = 1 - call neko_log%warning('curve type s not supported, treating mesh as non-curved') - curve_skip = .true. - exit + curve_type(id,el_idx) = 1 + call neko_log%warning('curve type s not supported, treating mesh as non-curved') + curve_skip = .true. + exit case ('e') - curve_type(id,el_idx) = 2 - call neko_log%warning('curve type e not supported, treating mesh as non-curved') - curve_skip = .true. - exit + curve_type(id,el_idx) = 2 + call neko_log%warning('curve type e not supported, treating mesh as non-curved') + curve_skip = .true. + exit case ('C') - curve_type(id,el_idx) = 3 + curve_type(id,el_idx) = 3 case ('m') - curve_type(id,el_idx) = 4 + curve_type(id,el_idx) = 4 case default write(*,*) chtemp, 'curve type not supported yet, treating mesh as non-curved',id, el_idx - curve_skip = .true. + curve_skip = .true. end select end do @@ -501,8 +503,8 @@ subroutine re2_file_read_curve(msh, ncurve, dist, fh, mpi_offset, v2_format) call msh%mark_curve_element(el_idx, & curve_data(1,1,el_idx), curve_type(1,el_idx)) end if - end do - end if + end do + end if deallocate(curve_data) deallocate(curve_element) @@ -525,7 +527,7 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) integer, parameter, dimension(6) :: facet_map = (/3, 2, 4, 1, 5, 6/) logical :: periodic type(re2v1_bc_t), allocatable :: re2v1_data_bc(:) - type(re2v2_bc_t), allocatable :: re2v2_data_bc(:) + type(re2v2_bc_t), allocatable :: re2v2_data_bc(:) if (.not. v2_format) then allocate(re2v1_data_bc(nbcs)) @@ -538,7 +540,7 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) end if periodic = .false. - + !> @todo Use element offset in parallel if (v2_format) then ! V2 format do i = 1, nbcs @@ -570,11 +572,11 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) end if call msh%mark_labeled_facet(sym_facet, el_idx, label) case default - write (*,*) re2v2_data_bc(i)%type, 'bc type not supported yet' + write (*,*) re2v2_data_bc(i)%type, 'bc type not supported yet' write (*,*) re2v2_data_bc(i)%bc_data end select end do - + ! ! Fix periodic condition for shared nodes ! @@ -588,13 +590,13 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) p_el_idx = int(re2v2_data_bc(i)%bc_data(1)) p_facet = facet_map(int(re2v2_data_bc(i)%bc_data(2))) call msh%create_periodic_ids(sym_facet, el_idx, & - p_facet, p_el_idx) + p_facet, p_el_idx) end select end do end do end if deallocate(re2v2_data_bc) - + else ! V! format do i = 1, nbcs el_idx = re2v1_data_bc(i)%elem - dist%start_idx() @@ -619,12 +621,12 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) label = int(re2v1_data_bc(i)%bc_data(5)) call msh%mark_labeled_facet(sym_facet, el_idx, label) case default - write (*,*) re2v1_data_bc(i)%type, 'bc type not supported yet' + write (*,*) re2v1_data_bc(i)%type, 'bc type not supported yet' write (*,*) re2v1_data_bc(i)%bc_data end select end do - + ! ! Fix periodic condition for shared nodes ! @@ -638,15 +640,15 @@ subroutine re2_file_read_bcs(msh, nbcs, dist, fh, mpi_offset, v2_format) p_el_idx = int(re2v1_data_bc(i)%bc_data(1)) p_facet = facet_map(int(re2v1_data_bc(i)%bc_data(2))) call msh%create_periodic_ids(sym_facet, el_idx, & - p_facet, p_el_idx) + p_facet, p_el_idx) end select end do end do end if - + deallocate(re2v1_data_bc) end if - + end subroutine re2_file_read_bcs @@ -655,7 +657,7 @@ subroutine re2_file_add_point(htp, p, idx) type(point_t), intent(inout) :: p integer, intent(inout) :: idx integer :: tmp - + if (htp%get(p, tmp) .gt. 0) then idx = idx + 1 call htp%set(p, idx) @@ -663,7 +665,7 @@ subroutine re2_file_add_point(htp, p, idx) else call p%set_id(tmp) end if - + end subroutine re2_file_add_point - + end module re2_file diff --git a/src/io/rea_file.f90 b/src/io/rea_file.f90 index 23f2c44cdf0..5c617345298 100644 --- a/src/io/rea_file.f90 +++ b/src/io/rea_file.f90 @@ -37,7 +37,7 @@ module rea_file use num_types use utils use mesh - use point + use point use map use rea use re2_file @@ -88,16 +88,18 @@ subroutine rea_file_read(this, data) integer, parameter, dimension(6) :: facet_map = (/3, 2, 4, 1, 5, 6/) logical :: curve_skip = .false. character(len=LOG_SIZE) :: log_buf + + call this%check_exists() select type(data) type is (rea_t) - call rea_free(data) + call rea_free(data) msh => data%msh params => data%params cbc => data%cbc read_param = .true. read_bcs = .true. - type is (mesh_t) + type is (mesh_t) msh => data read_param = .false. read_bcs = .false. @@ -108,50 +110,50 @@ subroutine rea_file_read(this, data) if (read_param .and. read_bcs .and. pe_size .gt. 1) then call neko_error('Reading NEKTON session data only implemented in serial') end if - - + + open(unit=9,file=trim(this%fname), status='old', iostat=ierr) call neko_log%message('Reading NEKTON file ' // this%fname) - + read(9, *) read(9, *) read(9, *) ndim read(9, *) nparam - + if (.not. read_param) then ! Skip parameters do i = 1, nparam read(9, *) end do - else + else allocate(params(nparam)) do i = 1, nparam read(9, *) params(i) end do end if - + ! Skip passive scalars read(9, *) nskip do i = 1, nskip read(9, *) end do - + ! Skip logic switches read(9, *) nlogic do i = 1, nlogic read(9, *) end do - + ! Read mesh info read(9, *) read(9, *) read(9, *) nelgs,ndim, nelgv if (nelgs .lt. 0) then re2_fname = trim(this%fname(1:scan(trim(this%fname), & - '.', back=.true.)))//'re2' + '.', back=.true.)))//'re2' call re2_file%init(re2_fname) call re2_file%read(msh) - else + else write(log_buf,1) ndim, nelgv 1 format('gdim = ', i1, ', nelements =', i7) call neko_log%message(log_Buf) @@ -214,8 +216,8 @@ subroutine rea_file_read(this, data) end do call htp%free() - - read(9, *) + + read(9, *) read(9, *) ncurve allocate(curve_data(5,8,nelgv)) allocate(curve_element(nelgv)) @@ -230,20 +232,22 @@ subroutine rea_file_read(this, data) end do end do do i = 1, ncurve - read(9, *) edge, el_idx, (curve(j),j=1,5), chtemp + read(9, *) edge, el_idx, (curve(j),j=1,5), chtemp do j = 1, 5 curve_data(j,edge,el_idx) = curve(j) end do - curve_element(el_idx) = .true. + curve_element(el_idx) = .true. select case(trim(chtemp)) case ('s') - curve_type(edge,el_idx) = 1 - curve_skip = .true. + curve_type(edge,el_idx) = 1 + curve_skip = .true. case ('e') - curve_type(edge,el_idx) = 2 - curve_skip = .true. + curve_type(edge,el_idx) = 2 + curve_skip = .true. case ('C') - curve_type(edge,el_idx) = 3 + curve_type(edge,el_idx) = 3 + case ('m') + curve_type(edge,el_idx) = 4 end select end do if (curve_skip) then @@ -254,15 +258,15 @@ subroutine rea_file_read(this, data) call msh%mark_curve_element(el_idx, & curve_data(1,1,el_idx), curve_type(1,el_idx)) end if - end do + end do end if deallocate(curve_data) deallocate(curve_element) deallocate(curve_type) ! Read fluid boundary conditions - read(9,*) - read(9,*) + read(9,*) + read(9,*) if (.not. read_bcs) then ! Mark zones in the mesh allocate(cbc(6,nelgv)) allocate(bc_data(6,2*ndim,nelgv)) @@ -306,7 +310,7 @@ subroutine rea_file_read(this, data) p_el_idx = int(bc_data(2+off,j,i)) p_facet = facet_map(int(bc_data(3+off,j,i))) call msh%create_periodic_ids(sym_facet, el_idx, & - p_facet, p_el_idx) + p_facet, p_el_idx) end select end do end if @@ -321,7 +325,7 @@ subroutine rea_file_read(this, data) p_el_idx = int(bc_data(2+off,j,i)) p_facet = facet_map(int(bc_data(3+off,j,i))) call msh%create_periodic_ids(sym_facet, el_idx, & - p_facet, p_el_idx) + p_facet, p_el_idx) end select end do end if @@ -336,7 +340,7 @@ subroutine rea_file_read(this, data) p_el_idx = int(bc_data(2+off,j,i)) p_facet = facet_map(int(bc_data(3+off,j,i))) call msh%create_periodic_ids(sym_facet, el_idx, & - p_facet, p_el_idx) + p_facet, p_el_idx) end select end do end if @@ -353,11 +357,11 @@ subroutine rea_file_read(this, data) end if call msh%finalize() - + call neko_log%message('Done') close(9) endif - + end subroutine rea_file_read subroutine rea_file_write(this, data, t) @@ -371,7 +375,7 @@ subroutine rea_file_add_point(htp, p, idx) type(point_t), intent(inout) :: p integer, intent(inout) :: idx integer :: tmp - + if (htp%get(p, tmp) .gt. 0) then idx = idx + 1 call htp%set(p, idx) @@ -379,7 +383,7 @@ subroutine rea_file_add_point(htp, p, idx) else call p%set_id(tmp) end if - + end subroutine rea_file_add_point end module rea_file diff --git a/src/io/stl_file.f90 b/src/io/stl_file.f90 index 519811dbdbf..c1d4d658d49 100644 --- a/src/io/stl_file.f90 +++ b/src/io/stl_file.f90 @@ -30,17 +30,18 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Stereolithography (STL) file +!> Stereolithography (STL) file module stl_file use num_types use generic_file - use tri_mesh + use tri_mesh use logger use point use neko_mpi_types use mpi_f08 + use utils, only: neko_error use comm - use stl + use stl implicit none private @@ -59,11 +60,11 @@ subroutine stl_file_write(this, data, t) real(kind=rp), intent(in), optional :: t call neko_log%error('Not implemented') end subroutine stl_file_write - + subroutine stl_file_read(this, data) class(stl_file_t) :: this class(*), target, intent(inout) :: data - type(tri_mesh_t), pointer :: tri_msh => null() + type(tri_mesh_t), pointer :: tri_msh => null() type(MPI_Status) :: status type(MPI_File) :: fh type(point_t) :: p(3) @@ -71,6 +72,8 @@ subroutine stl_file_read(this, data) type(stl_triangle_t), allocatable :: stl_tri(:) integer :: i, p_idx, ierr + call this%check_exists() + select type(data) type is(tri_mesh_t) tri_msh => data @@ -85,7 +88,7 @@ subroutine stl_file_read(this, data) if (stl_hdr%hdr(1:6) .eq. 'solid') then call neko_log%error('Invalid STL file (ASCII)') end if - + call tri_msh%init(stl_hdr%ntri) allocate(stl_tri(stl_hdr%ntri)) @@ -106,7 +109,7 @@ subroutine stl_file_read(this, data) deallocate(stl_tri) call MPI_File_close(fh, ierr) - + end subroutine stl_file_read - + end module stl_file diff --git a/src/io/vtk_file.f90 b/src/io/vtk_file.f90 index 3f64cda7ace..baae5f41c5a 100644 --- a/src/io/vtk_file.f90 +++ b/src/io/vtk_file.f90 @@ -46,7 +46,7 @@ module vtk_file use comm implicit none private - + !> Interface for legacy VTK files type, public, extends(generic_file_t) :: vtk_file_t contains @@ -105,7 +105,7 @@ subroutine vtk_file_write(this, data, t) if (associated(msh)) then write(9, fmt='(A)') 'DATASET UNSTRUCTURED_GRID' - call vtk_file_write_mesh(9, msh) + call vtk_file_write_mesh(9, msh) if (associated(mfld)) then call vtk_file_write_cell_data(9, mfld) @@ -127,13 +127,15 @@ subroutine vtk_file_write(this, data, t) else call neko_error('Invalid data') end if - + close(9) end subroutine vtk_file_write subroutine vtk_file_read(this, data) class(vtk_file_t) :: this class(*), target, intent(inout) :: data + + call neko_error('VTK file read not implemented') end subroutine vtk_file_read !> Write a mesh in legacy VTK format @@ -177,15 +179,15 @@ subroutine vtk_file_write_cell_data(unit, mfld) write(unit, fmt='(A,I8)') 'CELL_DATA', mfld%msh%nelv write(unit, fmt='(A,A,A,I8)') 'SCALARS ', trim(mfld%name), ' int', 1 write(unit, fmt='(A)') 'LOOKUP_TABLE default' - + do i = 1, mfld%msh%nelv write(unit, fmt='(I8)') mfld%data(i) end do - + end subroutine vtk_file_write_cell_data !> Write a field @a fld as point data - !! @note High-order fields will be interpolated down + !! @note High-order fields will be interpolated down !! to the low-order mesh subroutine vtk_file_write_point_data(unit, fld) integer :: unit @@ -202,12 +204,12 @@ subroutine vtk_file_write_point_data(unit, fld) write(unit, fmt='(A,I8)') 'POINT_DATA', fld%msh%mpts write(unit, fmt='(A,A,A,I8)') 'SCALARS ', trim(fld%name), ' double', 1 write(unit, fmt='(A)') 'LOOKUP_TABLE default' - + lx = fld%Xh%lx ly = fld%Xh%ly lz = fld%Xh%lz allocate(point_data(fld%msh%mpts)) - + do i = 1, fld%msh%nelv do j = 1, fld%msh%npts id(j) = fld%msh%get_local(fld%msh%elements(i)%e%pts(j)%p) @@ -216,21 +218,21 @@ subroutine vtk_file_write_point_data(unit, fld) point_data(id(1)) = real(fld%x(1,1,1,i),dp) point_data(id(2)) = real(fld%x(lx,1,1,i),dp) point_data(id(3)) = real(fld%x(1,ly,1,i),dp) - point_data(id(4)) = real(fld%x(lx,ly,1,i),dp) + point_data(id(4)) = real(fld%x(lx,ly,1,i),dp) if (fld%msh%gdim .eq. 3) then point_data(id(5)) = real(fld%x(1,1,lz,i),dp) point_data(id(6)) = real(fld%x(lx,1,lz,i),dp) point_data(id(7)) = real(fld%x(1,ly,lz,i),dp) - point_data(id(8)) = real(fld%x(lx,ly,lz,i),dp) + point_data(id(8)) = real(fld%x(lx,ly,lz,i),dp) end if end do write(unit, *) point_data - - deallocate(point_data) - + + deallocate(point_data) + end subroutine vtk_file_write_point_data !> Write xyz-coordinates of a dofmap @a dm as points @@ -258,30 +260,30 @@ subroutine vtk_file_write_dofmap_coordinates(unit, dm) do i = 1, size(dm%x) write(unit, fmt='(I8,I8)') 1,i-1 end do - - + + end subroutine vtk_file_write_dofmap_coordinates !> Write a dofmap @a dm data as point data - subroutine vtk_file_write_dofmap_data(unit, dm) + subroutine vtk_file_write_dofmap_data(unit, dm) integer :: unit type(dofmap_t), intent(inout) :: dm integer :: i, j, k, l - write(unit, fmt='(A,I8)') 'POINT_DATA', size(dm%dof) - write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'dof_id', ' integer', 1 - write(unit, fmt='(A)') 'LOOKUP_TABLE default' - - do i = 1, dm%msh%nelv - do l = 1, dm%Xh%lz - do k = 1, dm%Xh%ly - do j = 1, dm%Xh%lx - write(unit, fmt='(I8)') real(dm%dof(j,k,l,i),dp) - end do - end do - end do - end do - + write(unit, fmt='(A,I8)') 'POINT_DATA', size(dm%dof) + write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'dof_id', ' integer', 1 + write(unit, fmt='(A)') 'LOOKUP_TABLE default' + + do i = 1, dm%msh%nelv + do l = 1, dm%Xh%lz + do k = 1, dm%Xh%ly + do j = 1, dm%Xh%lx + write(unit, fmt='(I8)') real(dm%dof(j,k,l,i),dp) + end do + end do + end do + end do + write(unit, fmt='(A,A,A,I8)') 'SCALARS ', 'shared_dof', ' integer', 1 write(unit, fmt='(A)') 'LOOKUP_TABLE default' @@ -298,7 +300,7 @@ subroutine vtk_file_write_dofmap_data(unit, dm) end do end do end do - + end subroutine vtk_file_write_dofmap_data !> Write a tetrahedral mesh in legacy VTK format diff --git a/src/krylov/bcknd/cpu/bicgstab.f90 b/src/krylov/bcknd/cpu/bicgstab.f90 index ce6f691a557..a843d0bffb2 100644 --- a/src/krylov/bcknd/cpu/bicgstab.f90 +++ b/src/krylov/bcknd/cpu/bicgstab.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ module bicgstab use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply use math, only : glsc3, rzero, copy, NEKO_EPS, add2s2, x_update, & - p_update + p_update, abscmp use utils, only : neko_error implicit none private @@ -56,24 +56,28 @@ module bicgstab real(kind=rp), allocatable :: t(:) real(kind=rp), allocatable :: v(:) contains + !> Constructor. procedure, pass(this) :: init => bicgstab_init + !> Destructor. procedure, pass(this) :: free => bicgstab_free + !> Solve the system. procedure, pass(this) :: solve => bicgstab_solve end type bicgstab_t contains - !> Initialise a standard BiCGSTAB solver - subroutine bicgstab_init(this, n, M, rel_tol, abs_tol) + !> Constructor. + subroutine bicgstab_init(this, n, max_iter, M, rel_tol, abs_tol) class(bicgstab_t), intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() - + allocate(this%p(n)) allocate(this%p_hat(n)) allocate(this%r(n)) @@ -81,20 +85,20 @@ subroutine bicgstab_init(this, n, M, rel_tol, abs_tol) allocate(this%s_hat(n)) allocate(this%t(n)) allocate(this%v(n)) - if (present(M)) then + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine bicgstab_init !> Deallocate a standard BiCGSTAB solver @@ -110,7 +114,7 @@ subroutine bicgstab_free(this) if (allocated(this%r)) then deallocate(this%r) end if - + if (allocated(this%t)) then deallocate(this%t) end if @@ -118,15 +122,15 @@ subroutine bicgstab_free(this) if (allocated(this%p)) then deallocate(this%p) end if - + if (allocated(this%p_hat)) then deallocate(this%p_hat) end if - + if (allocated(this%s)) then deallocate(this%s) end if - + if (allocated(this%s_hat)) then deallocate(this%s_hat) end if @@ -135,7 +139,7 @@ subroutine bicgstab_free(this) end subroutine bicgstab_free - + !> Bi-Conjugate Gradient Stabilized method solve function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(bicgstab_t), intent(inout) :: this @@ -151,17 +155,17 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r integer :: iter, max_iter real(kind=rp) :: rnorm, rtr, norm_fac, gamma real(kind=rp) :: beta, alpha, omega, rho_1, rho_2 - + if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) associate(r => this%r, t => this%t, s => this%s, v => this%v, p => this%p, & s_hat => this%s_hat, p_hat => this%p_hat) - + call rzero(x%x, n) call copy(r, f, n) @@ -171,22 +175,22 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. 0.0_rp) return + if(abscmp(rnorm, 0.0_rp)) return do iter = 1, max_iter - + rho_1 = glsc3(r, coef%mult, f ,n) - + if (abs(rho_1) .lt. NEKO_EPS) then call neko_error('Bi-CGStab rho failure') end if - + if (iter .eq. 1) then - call copy(p, r, n) + call copy(p, r, n) else beta = (rho_1 / rho_2) * (alpha / omega) call p_update(p, r, v, beta, omega, n) end if - + call this%M%solve(p_hat, p, n) call Ax%compute(v, p_hat, coef, x%msh, x%Xh) call gs_h%op(v, n, GS_OP_ADD) @@ -200,7 +204,7 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r call add2s2(x%x, p_hat, alpha,n) exit end if - + call this%M%solve(s_hat, s, n) call Ax%compute(t, s_hat, coef, x%msh, x%Xh) call gs_h%op(t, n, GS_OP_ADD) @@ -210,18 +214,18 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r call x_update(x%x, p_hat, s_hat, alpha, omega, n) call copy(r, s, n) call add2s2(r, t, -omega, n) - + rtr = glsc3(r, coef%mult, r, n) rnorm = sqrt(rtr) * norm_fac if (rnorm .lt. this%abs_tol .or. rnorm .lt. gamma) then exit end if - + if (omega .lt. NEKO_EPS) then call neko_error('Bi-CGstab omega failure') end if rho_2 = rho_1 - + end do end associate ksp_results%res_final = rnorm @@ -229,5 +233,5 @@ function bicgstab_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r end function bicgstab_solve end module bicgstab - + diff --git a/src/krylov/bcknd/cpu/cacg.f90 b/src/krylov/bcknd/cpu/cacg.f90 index c1ba3ae99ee..2f15d501626 100644 --- a/src/krylov/bcknd/cpu/cacg.f90 +++ b/src/krylov/bcknd/cpu/cacg.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,13 +40,13 @@ module cacg use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply, bc_list_apply_scalar - use math, only : glsc3, rzero, copy, x_update + use math, only : glsc3, rzero, copy, x_update, abscmp use utils, only : neko_warning use comm use mxm_wrapper implicit none private - + !> S-step communication avoiding preconditioned conjugate gradient method type, public, extends(ksp_t) :: cacg_t real(kind=rp), allocatable :: r(:) @@ -62,10 +62,11 @@ module cacg contains !> Initialise a s-step CA PCG solver - subroutine cacg_init(this, n, M, s, rel_tol, abs_tol) + subroutine cacg_init(this, n, max_iter, M, s, rel_tol, abs_tol) class(cacg_t), intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol integer, optional, intent(inout) :: s @@ -73,30 +74,30 @@ subroutine cacg_init(this, n, M, s, rel_tol, abs_tol) if (present(s)) then this%s = s - else + else this%s = 4 end if if (pe_rank .eq. 0) then call neko_warning("Communication Avoiding CG chosen, be aware of potential instabilities") end if - + allocate(this%r(n)) allocate(this%p(n)) allocate(this%PR(n,4*this%s+1)) - if (present(M)) then + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine cacg_init !> Deallocate a s-step CA PCG solver @@ -116,12 +117,12 @@ subroutine cacg_free(this) if (allocated(this%p)) then deallocate(this%p) end if - + nullify(this%M) end subroutine cacg_free - + !> S-step CA PCG solve function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(cacg_t), intent(inout) :: this @@ -142,34 +143,34 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul real(kind=rp) :: r_c(4*this%s+1,this%s+1) real(kind=rp) :: z_c(4*this%s+1,this%s+1) real(kind=rp) :: x_c(4*this%s+1,this%s+1) - + associate(PR => this%PR, r => this%r, p => this%p) s = this%s if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) - + rtz1 = 1.0_rp call rzero(x%x, n) call copy(r, f, n) call this%M%solve(p, r, n) - + rtr = glsc3(r, coef%mult, r, n) rnorm = sqrt(rtr)*norm_fac ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 iter = 0 - if(rnorm .eq. 0.0_rp) return + if(abscmp(rnorm, 0.0_rp)) return do while (iter < max_iter) call copy(PR,p, n) call copy(PR(1,2*s+2), r, n) - !Here we have hardcoded a monomial basis atm. + !Here we have hardcoded a monomial basis atm. do i = 2, 2*s + 1 if (mod(i,2) .eq. 0) then call Ax%compute(PR(1,i), PR(1,i-1), coef, x%msh, x%Xh) @@ -192,7 +193,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul call construct_basis_matrix(Tt, s) call rzero(p_c, (4*s+1) * (s+1)) - p_c(1,1) = 1.0_rp + p_c(1,1) = 1.0_rp call rzero(r_c, (4*s+1) * (s+1)) r_c(2*s+2,1) = 1.0_rp call mxm(Tt, 4*s+1, r_c, 4*s+1, z_c,s+1) @@ -234,12 +235,12 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul G(k,j) = temp2(it,1) end do end do - + call mxm(G,4*s+1, Tt, 4*s+1,GTt,4*s+1) - + do j = 1, s iter = iter + 1 - + call mxm(G, 4*s+1, r_c(1,j), 4*s+1,temp, 1) call mxm(GTt, 4*s+1, p_c(1,j), 4*s+1,temp2, 1) alpha1 = 0.0_rp @@ -291,7 +292,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul do k = 1, NEKO_BLK_SIZE rtr = rtr + r(i+k)**2 * coef%mult(i+k,1,1,1) end do - else + else do j = 1,4*s+1 do k = 1, n-i x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + PR(i+k,j) * x_c(j,s+1) @@ -309,7 +310,7 @@ function cacg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul call MPI_Allreduce(rtr, tmp, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) rnorm = norm_fac*sqrt(tmp) - if( rnorm <= this%abs_tol) exit + if( rnorm <= this%abs_tol) exit end do ksp_results%res_final = rnorm @@ -321,19 +322,19 @@ end function cacg_solve !> Monomial matrix constuction, not sparse subroutine construct_basis_matrix(Tt, s) - integer, intent(in) :: s - real(kind=rp), intent(inout) :: Tt(4*s+1,4*s+1) - integer :: mlen, i - mlen = (4*s+1)*(4*s+1) - call rzero(Tt,mlen) - do i = 1, 2*s - Tt(i+1,i) = 1.0_rp - end do - do i = 1, (2*s-1) - Tt(2*s+2+i,2*s+1+i) = 1.0_rp - end do + integer, intent(in) :: s + real(kind=rp), intent(inout) :: Tt(4*s+1,4*s+1) + integer :: mlen, i + mlen = (4*s+1)*(4*s+1) + call rzero(Tt,mlen) + do i = 1, 2*s + Tt(i+1,i) = 1.0_rp + end do + do i = 1, (2*s-1) + Tt(2*s+2+i,2*s+1+i) = 1.0_rp + end do end subroutine construct_basis_matrix end module cacg - + diff --git a/src/krylov/bcknd/cpu/cg.f90 b/src/krylov/bcknd/cpu/cg.f90 index 4461a4e76ef..5d74ce50074 100644 --- a/src/krylov/bcknd/cpu/cg.f90 +++ b/src/krylov/bcknd/cpu/cg.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2021, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,13 +40,13 @@ module cg use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, copy + use math, only : glsc3, rzero, copy, abscmp use comm implicit none private integer, parameter :: CG_P_SPACE = 7 - + !> Standard preconditioned conjugate gradient method type, public, extends(ksp_t) :: cg_t real(kind=rp), allocatable :: w(:) @@ -63,13 +63,14 @@ module cg contains !> Initialise a standard PCG solver - subroutine cg_init(this, n, M, rel_tol, abs_tol) + subroutine cg_init(this, n, max_iter, M, rel_tol, abs_tol) class(cg_t), intent(inout), target :: this + integer, intent(in) :: max_iter class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() allocate(this%w(n)) @@ -77,21 +78,21 @@ subroutine cg_init(this, n, M, rel_tol, abs_tol) allocate(this%p(n,CG_P_SPACE)) allocate(this%z(n)) allocate(this%alpha(CG_P_SPACE)) - - if (present(M)) then + + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine cg_init !> Deallocate a standard PCG solver @@ -111,11 +112,11 @@ subroutine cg_free(this) if (allocated(this%p)) then deallocate(this%p) end if - + if (allocated(this%z)) then deallocate(this%z) end if - + if (allocated(this%alpha)) then deallocate(this%alpha) end if @@ -123,7 +124,7 @@ subroutine cg_free(this) nullify(this%M) end subroutine cg_free - + !> Standard PCG solve function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(cg_t), intent(inout) :: this @@ -139,11 +140,11 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results integer :: iter, max_iter, i, j, k, p_cur, p_prev real(kind=rp) :: rnorm, rtr, rtz2, rtz1, x_plus(NEKO_BLK_SIZE) real(kind=rp) :: beta, pap, norm_fac - + if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) @@ -162,24 +163,24 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results ksp_results%iter = 0 p_prev = CG_P_SPACE p_cur = 1 - if(rnorm .eq. 0.0_rp) return + if(abscmp(rnorm, 0.0_rp)) return do iter = 1, max_iter call this%M%solve(z, r, n) rtz2 = rtz1 rtz1 = glsc3(r, coef%mult, z, n) - + beta = rtz1 / rtz2 if (iter .eq. 1) beta = 0.0_rp do i = 1, n p(i,p_cur) = z(i) + beta * p(i,p_prev) end do - + call Ax%compute(w, p(1,p_cur), coef, x%msh, x%Xh) call gs_h%op(w, n, GS_OP_ADD) call bc_list_apply(blst, w, n) - + pap = glsc3(w, coef%mult, p(1,p_cur), n) - + alpha(p_cur) = rtz1 / pap call second_cg_part(rtr, r, coef%mult, w, alpha(p_cur), n) rnorm = sqrt(rtr) * norm_fac @@ -199,7 +200,7 @@ function cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results do k = 1, NEKO_BLK_SIZE x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k) end do - else + else do k = 1, n-i x_plus(1) = 0.0_rp do j = 1, p_cur @@ -227,7 +228,7 @@ end function cg_solve subroutine second_cg_part(rtr, r, mult, w, alpha, n) integer, intent(in) :: n real(kind=rp), intent(inout) :: r(n), rtr - real(kind=rp), intent(in) ::mult(n), w(n), alpha + real(kind=rp), intent(in) ::mult(n), w(n), alpha integer :: i, ierr rtr = 0.0_rp @@ -238,8 +239,8 @@ subroutine second_cg_part(rtr, r, mult, w, alpha, n) call MPI_Allreduce(MPI_IN_PLACE, rtr, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - end subroutine second_cg_part + end subroutine second_cg_part end module cg - + diff --git a/src/krylov/bcknd/cpu/gmres.f90 b/src/krylov/bcknd/cpu/gmres.f90 index 98bf9f24cc0..0832eabfbb7 100644 --- a/src/krylov/bcknd/cpu/gmres.f90 +++ b/src/krylov/bcknd/cpu/gmres.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2021, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,12 +40,12 @@ module gmres use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, rone, copy, sub2, cmult2 + use math, only : glsc3, rzero, rone, copy, sub2, cmult2, abscmp use comm implicit none private - !> Standard preconditioned generalized minimal residual method + !> Standard preconditioned generalized minimal residual method type, public, extends(ksp_t) :: gmres_t integer :: lgmres real(kind=rp), allocatable :: w(:) @@ -66,9 +66,10 @@ module gmres contains !> Initialise a standard GMRES solver - subroutine gmres_init(this, n, M, lgmres, rel_tol, abs_tol) + subroutine gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol) class(gmres_t), intent(inout) :: this integer, intent(in) :: n + integer, intent(in) :: max_iter class(pc_t), optional, intent(inout), target :: M integer, optional, intent(inout) :: lgmres real(kind=rp), optional, intent(inout) :: rel_tol @@ -79,38 +80,38 @@ subroutine gmres_init(this, n, M, lgmres, rel_tol, abs_tol) else this%lgmres = 30 end if - + call this%free() - - if (present(M)) then + + if (present(M)) then this%M => M end if allocate(this%w(n)) allocate(this%r(n)) allocate(this%wk1(n)) - + allocate(this%c(this%lgmres)) allocate(this%s(this%lgmres)) allocate(this%gam(this%lgmres + 1)) - + allocate(this%z(n,this%lgmres)) allocate(this%v(n,this%lgmres)) - + allocate(this%h(this%lgmres,this%lgmres)) - - + + if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init(abs_tol) + call this%ksp_init(max_iter) end if - + end subroutine gmres_init !> Deallocate a standard GMRES solver @@ -130,7 +131,7 @@ subroutine gmres_free(this) if (allocated(this%r)) then deallocate(this%r) end if - + if (allocated(this%z)) then deallocate(this%z) end if @@ -138,28 +139,28 @@ subroutine gmres_free(this) if (allocated(this%h)) then deallocate(this%h) end if - + if (allocated(this%v)) then deallocate(this%v) end if - + if (allocated(this%s)) then deallocate(this%s) end if - - + + if (allocated(this%gam)) then deallocate(this%gam) end if - + if (allocated(this%wk1)) then deallocate(this%wk1) end if - + nullify(this%M) - + end subroutine gmres_free - + !> Standard GMRES solve function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(gmres_t), intent(inout) :: this @@ -172,8 +173,8 @@ function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu type(gs_t), intent(inout) :: gs_h type(ksp_monitor_t) :: ksp_results integer, optional, intent(in) :: niter - integer :: iter - integer :: i, j, k, l, ierr + integer :: iter, max_iter + integer :: i, j, k, l, ierr real(kind=rp) :: w_plus(NEKO_BLK_SIZE), x_plus(NEKO_BLK_SIZE) real(kind=rp) :: rnorm, alpha, temp, lr, alpha2, norm_fac logical :: conv @@ -181,179 +182,185 @@ function gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu conv = .false. iter = 0 - associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, & + if (present(niter)) then + max_iter = niter + else + max_iter = this%max_iter + end if + + associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, & v => this%v, s => this%s, gam => this%gam, wk1 =>this%wk1) - norm_fac = 1.0_rp / sqrt(coef%volume) - call rzero(x%x, n) - call rzero(gam, this%lgmres + 1) - call rone(s, this%lgmres) - call rone(c, this%lgmres) - call rzero(h, this%lgmres * this%lgmres) - do while (.not. conv .and. iter .lt. niter) - - if(iter.eq.0) then - call copy(r, f, n) - else - call copy(r, f, n) - call Ax%compute(w, x%x, coef, x%msh, x%Xh) - call gs_h%op(w, n, GS_OP_ADD) - call bc_list_apply(blst, w, n) - call sub2(r, w, n) - end if - - gam(1) = sqrt(glsc3(r, r, coef%mult, n)) - if(iter.eq.0) then - ksp_results%res_start = gam(1) * norm_fac - end if - - if (gam(1) .eq. 0) return - - rnorm = 0.0_rp - temp = 1.0_rp / gam(1) - call cmult2(v(1,1), r, temp, n) - do j = 1, this%lgmres - iter = iter+1 - - call this%M%solve(z(1,j), v(1,j), n) - - call Ax%compute(w, z(1,j), coef, x%msh, x%Xh) - call gs_h%op(w, n, GS_OP_ADD) - call bc_list_apply(blst, w, n) - - do l = 1, j - h(l,j) = 0.0_rp - enddo - - do i = 0, n, NEKO_BLK_SIZE - if (i + NEKO_BLK_SIZE .le. n) then - do l = 1, j - do k = 1, NEKO_BLK_SIZE - h(l,j) = h(l,j) + & + norm_fac = 1.0_rp / sqrt(coef%volume) + call rzero(x%x, n) + call rzero(gam, this%lgmres + 1) + call rone(s, this%lgmres) + call rone(c, this%lgmres) + call rzero(h, this%lgmres * this%lgmres) + do while (.not. conv .and. iter .lt. max_iter) + + if(iter.eq.0) then + call copy(r, f, n) + else + call copy(r, f, n) + call Ax%compute(w, x%x, coef, x%msh, x%Xh) + call gs_h%op(w, n, GS_OP_ADD) + call bc_list_apply(blst, w, n) + call sub2(r, w, n) + end if + + gam(1) = sqrt(glsc3(r, r, coef%mult, n)) + if(iter.eq.0) then + ksp_results%res_start = gam(1) * norm_fac + end if + + if (abscmp(gam(1), 0.0_rp)) return + + rnorm = 0.0_rp + temp = 1.0_rp / gam(1) + call cmult2(v(1,1), r, temp, n) + do j = 1, this%lgmres + iter = iter+1 + + call this%M%solve(z(1,j), v(1,j), n) + + call Ax%compute(w, z(1,j), coef, x%msh, x%Xh) + call gs_h%op(w, n, GS_OP_ADD) + call bc_list_apply(blst, w, n) + + do l = 1, j + h(l,j) = 0.0_rp + enddo + + do i = 0, n, NEKO_BLK_SIZE + if (i + NEKO_BLK_SIZE .le. n) then + do l = 1, j + do k = 1, NEKO_BLK_SIZE + h(l,j) = h(l,j) + & w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1) - end do - end do - else - do k = 1, n-i - do l = 1, j - h(l,j) = h(l,j) + & + end do + end do + else + do k = 1, n-i + do l = 1, j + h(l,j) = h(l,j) + & w(i+k) * v(i+k,l) * coef%mult(i+k,1,1,1) - end do - end do - end if - end do - - call MPI_Allreduce(h(1,j), wk1, j, & + end do + end do + end if + end do + + call MPI_Allreduce(h(1,j), wk1, j, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - call copy(h(1,j), wk1, j) - - alpha2 = 0.0_rp - do i = 0,n,NEKO_BLK_SIZE - if (i + NEKO_BLK_SIZE .le. n) then - do k = 1, NEKO_BLK_SIZE - w_plus(k) = 0.0_rp - end do - do l = 1,j - do k = 1, NEKO_BLK_SIZE - w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l) - end do - end do - do k = 1, NEKO_BLK_SIZE - w(i+k) = w(i+k) + w_plus(k) - alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1) - end do - else - do k = 1, n-i - w_plus(1) = 0.0_rp - do l = 1, j - w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l) - end do - w(i+k) = w(i+k) + w_plus(1) - alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1) - end do - end if - end do - - call MPI_Allreduce(alpha2, temp, 1, & + call copy(h(1,j), wk1, j) + + alpha2 = 0.0_rp + do i = 0,n,NEKO_BLK_SIZE + if (i + NEKO_BLK_SIZE .le. n) then + do k = 1, NEKO_BLK_SIZE + w_plus(k) = 0.0_rp + end do + do l = 1,j + do k = 1, NEKO_BLK_SIZE + w_plus(k) = w_plus(k) - h(l,j) * v(i+k,l) + end do + end do + do k = 1, NEKO_BLK_SIZE + w(i+k) = w(i+k) + w_plus(k) + alpha2 = alpha2 + w(i+k)**2 * coef%mult(i+k,1,1,1) + end do + else + do k = 1, n-i + w_plus(1) = 0.0_rp + do l = 1, j + w_plus(1) = w_plus(1) - h(l,j) * v(i+k,l) + end do + w(i+k) = w(i+k) + w_plus(1) + alpha2 = alpha2 + (w(i+k)**2) * coef%mult(i+k,1,1,1) + end do + end if + end do + + call MPI_Allreduce(alpha2, temp, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - alpha2 = temp - alpha = sqrt(alpha2) - do i=1,j-1 - temp = h(i,j) - h(i ,j) = c(i)*temp + s(i) * h(i+1,j) - h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j) - end do - - rnorm = 0.0_rp - if(alpha .eq. 0.0_rp) then - conv = .true. - exit - end if - - lr = sqrt(h(j,j) * h(j,j) + alpha**2) - temp = 1.0_rp / lr - c(j) = h(j,j) * temp - s(j) = alpha * temp - h(j,j) = lr - gam(j+1) = -s(j) * gam(j) - gam(j) = c(j) * gam(j) - - rnorm = abs(gam(j+1)) * norm_fac - if (rnorm .lt. this%abs_tol) then - conv = .true. - exit - end if - - if (iter + 1 .gt. niter) exit - - if( j .lt. this%lgmres) then - temp = 1.0_rp / alpha - call cmult2(v(1,j+1), w, temp, n) - end if - - end do - - j = min(j, this%lgmres) - do k = j, 1, -1 - temp = gam(k) - do i = j, k+1, -1 - temp = temp - h(k,i) * c(i) - end do - c(k) = temp / h(k,k) - end do - - do i = 0, n, NEKO_BLK_SIZE - if (i + NEKO_BLK_SIZE .le. n) then - do k = 1, NEKO_BLK_SIZE - x_plus(k) = 0.0_rp - end do - do l = 1,j - do k = 1, NEKO_BLK_SIZE - x_plus(k) = x_plus(k) + c(l) * z(i+k,l) - end do - end do - do k = 1, NEKO_BLK_SIZE - x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k) - end do - else - do k = 1, n-i - x_plus(1) = 0.0_rp - do l = 1, j - x_plus(1) = x_plus(1) + c(l) * z(i+k,l) - end do - x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1) - end do - end if - end do - end do - - end associate - - ksp_results%res_final = rnorm - ksp_results%iter = iter + alpha2 = temp + alpha = sqrt(alpha2) + do i=1,j-1 + temp = h(i,j) + h(i ,j) = c(i)*temp + s(i) * h(i+1,j) + h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j) + end do + + rnorm = 0.0_rp + if(abscmp(alpha, 0.0_rp)) then + conv = .true. + exit + end if + + lr = sqrt(h(j,j) * h(j,j) + alpha**2) + temp = 1.0_rp / lr + c(j) = h(j,j) * temp + s(j) = alpha * temp + h(j,j) = lr + gam(j+1) = -s(j) * gam(j) + gam(j) = c(j) * gam(j) + + rnorm = abs(gam(j+1)) * norm_fac + if (rnorm .lt. this%abs_tol) then + conv = .true. + exit + end if + + if (iter + 1 .gt. max_iter) exit + + if( j .lt. this%lgmres) then + temp = 1.0_rp / alpha + call cmult2(v(1,j+1), w, temp, n) + end if + + end do + + j = min(j, this%lgmres) + do k = j, 1, -1 + temp = gam(k) + do i = j, k+1, -1 + temp = temp - h(k,i) * c(i) + end do + c(k) = temp / h(k,k) + end do + + do i = 0, n, NEKO_BLK_SIZE + if (i + NEKO_BLK_SIZE .le. n) then + do k = 1, NEKO_BLK_SIZE + x_plus(k) = 0.0_rp + end do + do l = 1,j + do k = 1, NEKO_BLK_SIZE + x_plus(k) = x_plus(k) + c(l) * z(i+k,l) + end do + end do + do k = 1, NEKO_BLK_SIZE + x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k) + end do + else + do k = 1, n-i + x_plus(1) = 0.0_rp + do l = 1, j + x_plus(1) = x_plus(1) + c(l) * z(i+k,l) + end do + x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(1) + end do + end if + end do + end do + + end associate + + ksp_results%res_final = rnorm + ksp_results%iter = iter end function gmres_solve end module gmres - + diff --git a/src/krylov/bcknd/cpu/pc_jacobi.f90 b/src/krylov/bcknd/cpu/pc_jacobi.f90 index c2075b8ef0d..8a6a266381c 100644 --- a/src/krylov/bcknd/cpu/pc_jacobi.f90 +++ b/src/krylov/bcknd/cpu/pc_jacobi.f90 @@ -55,7 +55,7 @@ module jacobi end type jacobi_t contains - + subroutine jacobi_init(this, coef, dof, gs_h) class(jacobi_t), intent(inout) :: this type(coef_t), intent(inout), target :: coef @@ -91,83 +91,71 @@ subroutine jacobi_solve(this, z, r, n) call col3(z,r,this%d,n) end subroutine jacobi_solve + !> Update Jacobi preconditioner if the geometry G has changed subroutine jacobi_update(this) class(jacobi_t), intent(inout) :: this - integer :: i, j, k, l, e, lz, ly, lx associate(dof => this%dof, coef => this%coef, gs_h => this%gs_h) - lx = dof%Xh%lx - ly = dof%Xh%ly - lz = dof%Xh%lz - - this%d = 0d0 - - do e = 1,dof%msh%nelv - do l = 1,lx - do k = 1,lz - do j = 1,ly - do i = 1,lx - this%d(i,j,k,e) = this%d(i,j,k,e) + & - coef%G11(l,j,k,e) * dof%Xh%dxt(i,l)**2 - end do - end do - end do - end do - do l = 1,ly - do k = 1,lz - do j = 1,ly - do i = 1,lx - this%d(i,j,k,e) = this%d(i,j,k,e) + & - coef%G22(i,l,k,e) * dof%Xh%dyt(j,l)**2 - end do - end do - end do - end do - do l = 1,lz - do k = 1,lz - do j = 1,ly - do i = 1,lx - this%d(i,j,k,e) = this%d(i,j,k,e) + & - coef%G33(i,j,l,e) * dof%Xh%dzt(k,l)**2 - end do - end do - end do - end do - - if (dof%msh%dfrmd_el(e)) then - do j = 1,ly,ly-1 - do k = 1,lz,lz-1 - this%d(1,j,k,e) = this%d(1,j,k,e) & - + coef%G12(1,j,k,e) * dof%Xh%dxt(1,1)*dof%Xh%dyt(j,j) & - + coef%G13(1,j,k,e) * dof%Xh%dxt(1,1)*dof%Xh%dzt(k,k) - this%d(lx,j,k,e) = this%d(lx,j,k,e) & - + coef%G12(lx,j,k,e) * dof%Xh%dxt(lx,lx)*dof%Xh%dyt(j,j) & - + coef%G13(lx,j,k,e) * dof%Xh%dxt(lx,lx)*dof%Xh%dzt(k,k) - end do - end do - - do i = 1,lx,lx-1 - do k = 1,lz,lz-1 - this%d(i,1,k,e) = this%d(i,1,k,e) & - + coef%G12(i,1,k,e) * dof%Xh%dyt(1,1)*dof%Xh%dxt(i,i) & - + coef%G23(i,1,k,e) * dof%Xh%dyt(1,1)*dof%Xh%dzt(k,k) - this%d(i,ly,k,e) = this%d(i,ly,k,e) & - + coef%G12(i,ly,k,e) * dof%Xh%dyt(ly,ly)*dof%Xh%dxt(i,i) & - + coef%G23(i,ly,k,e) * dof%Xh%dyt(ly,ly)*dof%Xh%dzt(k,k) - end do - end do - do i = 1,lx,lx-1 - do j = 1,ly,ly-1 - this%d(i,j,1,e) = this%d(i,j,1,e) & - + coef%G13(i,j,1,e) * dof%Xh%dzt(1,1)*dof%Xh%dxt(i,i) & - + coef%G23(i,j,1,e) * dof%Xh%dzt(1,1)*dof%Xh%dyt(j,j) - this%d(i,j,lz,e) = this%d(i,j,lz,e) & - + coef%G13(i,j,lz,e) * dof%Xh%dzt(lz,lz)*dof%Xh%dxt(i,i) & - + coef%G23(i,j,lz,e) * dof%Xh%dzt(lz,lz)*dof%Xh%dyt(j,j) - end do - end do - end if - end do + + select case(dof%Xh%lx) + case (14) + call jacobi_update_lx14(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (13) + call jacobi_update_lx13(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (12) + call jacobi_update_lx12(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (11) + call jacobi_update_lx11(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (10) + call jacobi_update_lx10(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (9) + call jacobi_update_lx9(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (8) + call jacobi_update_lx8(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (7) + call jacobi_update_lx7(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (6) + call jacobi_update_lx6(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (5) + call jacobi_update_lx5(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (4) + call jacobi_update_lx4(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (3) + call jacobi_update_lx3(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case (2) + call jacobi_update_lx2(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv) + case default + call jacobi_update_lx(this%d, dof%Xh%dxt, dof%Xh%dyt, dof%Xh%dzt, & + coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, & + dof%msh%dfrmd_el, dof%msh%nelv, dof%Xh%lx) + end select + call col2(this%d,coef%h1,coef%dof%size()) if (coef%ifh2) call addcol3(this%d,coef%h2,coef%B,coef%dof%size()) call gs_h%op(this%d, dof%size(), GS_OP_ADD) @@ -175,4 +163,1222 @@ subroutine jacobi_update(this) end associate end subroutine jacobi_update + !> Generic CPU kernel for updating the Jacobi preconditioner + subroutine jacobi_update_lx(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n, lx) + integer, intent(in) :: n, lx + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx + + subroutine jacobi_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 14 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx14 + + subroutine jacobi_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 13 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx13 + + subroutine jacobi_update_lx12(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 12 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx12 + + subroutine jacobi_update_lx11(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 11 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx11 + + subroutine jacobi_update_lx10(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 10 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx10 + + subroutine jacobi_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 9 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx9 + + subroutine jacobi_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 8 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx8 + + subroutine jacobi_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 7 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx7 + + subroutine jacobi_update_lx6(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 6 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx6 + + subroutine jacobi_update_lx5(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 5 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx5 + + subroutine jacobi_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 4 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx4 + + subroutine jacobi_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 3 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx3 + + subroutine jacobi_update_lx2(d, dxt, dyt, dzt, G11, G22, G33, & + G12, G13, G23, dfrmd_el, n) + integer, parameter :: lx = 2 + integer, intent(in) :: n + real(kind=rp), intent(inout) :: d(lx, lx, lx, n) + real(kind=rp), intent(in) :: G11(lx, lx, lx, n) + real(kind=rp), intent(in) :: G22(lx, lx, lx, n) + real(kind=rp), intent(in) :: G33(lx, lx, lx, n) + real(kind=rp), intent(in) :: G12(lx, lx, lx, n) + real(kind=rp), intent(in) :: G13(lx, lx, lx, n) + real(kind=rp), intent(in) :: G23(lx, lx, lx, n) + real(kind=rp), intent(in) :: dxt(lx, lx) + real(kind=rp), intent(in) :: dyt(lx, lx) + real(kind=rp), intent(in) :: dzt(lx, lx) + logical, intent(in) :: dfrmd_el(n) + integer :: i, j, k, l, e + + d = 0d0 + + do e = 1,n + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G11(l,j,k,e) * dxt(i,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G22(i,l,k,e) * dyt(j,l)**2 + end do + end do + end do + end do + do l = 1,lx + do k = 1,lx + do j = 1,lx + do i = 1,lx + d(i,j,k,e) = d(i,j,k,e) + & + G33(i,j,l,e) * dzt(k,l)**2 + end do + end do + end do + end do + + if (dfrmd_el(e)) then + do j = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(1,j,k,e) = d(1,j,k,e) & + + G12(1,j,k,e) * dxt(1,1)*dyt(j,j) & + + G13(1,j,k,e) * dxt(1,1)*dzt(k,k) + d(lx,j,k,e) = d(lx,j,k,e) & + + G12(lx,j,k,e) * dxt(lx,lx)*dyt(j,j) & + + G13(lx,j,k,e) * dxt(lx,lx)*dzt(k,k) + end do + end do + + do i = 1,lx,lx-1 + do k = 1,lx,lx-1 + d(i,1,k,e) = d(i,1,k,e) & + + G12(i,1,k,e) * dyt(1,1)*dxt(i,i) & + + G23(i,1,k,e) * dyt(1,1)*dzt(k,k) + d(i,lx,k,e) = d(i,lx,k,e) & + + G12(i,lx,k,e) * dyt(lx,lx)*dxt(i,i) & + + G23(i,lx,k,e) * dyt(lx,lx)*dzt(k,k) + end do + end do + do i = 1,lx,lx-1 + do j = 1,lx,lx-1 + d(i,j,1,e) = d(i,j,1,e) & + + G13(i,j,1,e) * dzt(1,1)*dxt(i,i) & + + G23(i,j,1,e) * dzt(1,1)*dyt(j,j) + d(i,j,lx,e) = d(i,j,lx,e) & + + G13(i,j,lx,e) * dzt(lx,lx)*dxt(i,i) & + + G23(i,j,lx,e) * dzt(lx,lx)*dyt(j,j) + end do + end do + end if + end do + end subroutine jacobi_update_lx2 + end module jacobi diff --git a/src/krylov/bcknd/cpu/pipecg.f90 b/src/krylov/bcknd/cpu/pipecg.f90 index 32f5c2053ca..f9270947c77 100644 --- a/src/krylov/bcknd/cpu/pipecg.f90 +++ b/src/krylov/bcknd/cpu/pipecg.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ module pipecg use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, copy + use math, only : glsc3, rzero, copy, abscmp use comm implicit none private @@ -66,19 +66,20 @@ module pipecg !> Solve the linear system. procedure, pass(this) :: solve => pipecg_solve end type pipecg_t - + contains - + !> Initialise a pipelined PCG solver - subroutine pipecg_init(this, n, M, rel_tol, abs_tol) + subroutine pipecg_init(this, n, max_iter, M, rel_tol, abs_tol) class(pipecg_t), intent(inout) :: this + integer, intent(in) :: max_iter class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() - + allocate(this%p(n)) allocate(this%q(n)) allocate(this%r(n)) @@ -88,28 +89,28 @@ subroutine pipecg_init(this, n, M, rel_tol, abs_tol) allocate(this%z(n)) allocate(this%mi(n)) allocate(this%ni(n)) - if (present(M)) then + if (present(M)) then this%M => M end if - + if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine pipecg_init - + !> Deallocate a pipelined PCG solver subroutine pipecg_free(this) class(pipecg_t), intent(inout) :: this - + call this%ksp_free() - + if (allocated(this%p)) then deallocate(this%p) end if @@ -137,12 +138,12 @@ subroutine pipecg_free(this) if (allocated(this%ni)) then deallocate(this%ni) end if - + nullify(this%M) end subroutine pipecg_free - + !> Pipelined PCG solve function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(pipecg_t), intent(inout) :: this @@ -162,17 +163,17 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res real(kind=rp) :: tmp1, tmp2, tmp3, x_plus(NEKO_BLK_SIZE) type(MPI_Request) :: request type(MPI_Status) :: status - + if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) - + associate(p => this%p, q => this%q, r => this%r, s => this%s, & u => this%u, w => this%w, z => this%z, mi => this%mi, ni => this%ni) - + p_prev = PIPECG_P_SPACE u_prev = PIPECG_P_SPACE+1 p_cur = 1 @@ -186,14 +187,14 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res call Ax%compute(w, u(1,u_prev), coef, x%msh, x%Xh) call gs_h%op(w, n, GS_OP_ADD) call bc_list_apply(blst, w, n) - + rtr = glsc3(r, coef%mult, r, n) rnorm = sqrt(rtr)*norm_fac ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. 0.0_rp) return - + if(abscmp(rnorm, 0.0_rp)) return + gamma1 = 0.0_rp tmp1 = 0.0_rp tmp2 = 0.0_rp @@ -206,33 +207,33 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res reduction(1) = tmp1 reduction(2) = tmp2 reduction(3) = tmp3 - + do iter = 1, max_iter call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr) - + call this%M%solve(mi, w, n) call Ax%compute(ni, mi, coef, x%msh, x%Xh) call gs_h%op(ni, n, GS_OP_ADD) call bc_list_apply(blst, ni, n) - + call MPI_Wait(request, status, ierr) - gamma2 = gamma1 + gamma2 = gamma1 gamma1 = reduction(1) delta = reduction(2) rtr = reduction(3) - + rnorm = sqrt(rtr)*norm_fac if (rnorm .lt. this%abs_tol) exit if (iter .gt. 1) then beta(p_cur) = gamma1 / gamma2 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev))) - else + else beta(p_cur) = 0.0_rp alpha(p_cur) = gamma1/delta end if - + tmp1 = 0.0_rp tmp2 = 0.0_rp tmp3 = 0.0_rp @@ -263,11 +264,11 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res end do end if end do - + reduction(1) = tmp1 reduction(2) = tmp2 reduction(3) = tmp3 - + if (p_cur .eq. PIPECG_P_SPACE) then do i = 0, n, NEKO_BLK_SIZE if (i + NEKO_BLK_SIZE .le. n) then @@ -286,7 +287,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k) u(i+k,PIPECG_P_SPACE+1) = u(i+k,PIPECG_P_SPACE) end do - else + else do k = 1, n-i x_plus(1) = 0.0_rp p_prev = PIPECG_P_SPACE + 1 @@ -302,7 +303,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res end do p_prev = p_cur u_prev = PIPECG_P_SPACE+1 - alpha(1) = alpha(p_cur) + alpha(1) = alpha(p_cur) beta(1) = beta(p_cur) p_cur = 1 else @@ -311,7 +312,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res p_cur = p_cur + 1 end if end do - + if ( p_cur .ne. 1) then do i = 0, n, NEKO_BLK_SIZE if (i + NEKO_BLK_SIZE .le. n) then @@ -330,7 +331,7 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res x%x(i+k,1,1,1) = x%x(i+k,1,1,1) + x_plus(k) u(i+k,PIPECG_P_SPACE+1) = u(i+k,PIPECG_P_SPACE) end do - else + else do k = 1, n-i x_plus(1) = 0.0_rp p_prev = PIPECG_P_SPACE + 1 @@ -345,14 +346,14 @@ function pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_res end if end do end if - + ksp_results%res_final = rnorm ksp_results%iter = iter - + end associate - + end function pipecg_solve - + end module pipecg - + diff --git a/src/krylov/bcknd/device/cg_device.f90 b/src/krylov/bcknd/device/cg_device.f90 index 58e1b350abb..baa39bde4d8 100644 --- a/src/krylov/bcknd/device/cg_device.f90 +++ b/src/krylov/bcknd/device/cg_device.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021-2022, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,7 +40,8 @@ module cg_device use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use device + use math, only : abscmp + use device use device_math, only : device_rzero, device_copy, device_glsc3, & device_add2s2, device_add2s1 implicit none @@ -65,15 +66,16 @@ module cg_device contains !> Initialise a device based PCG solver - subroutine cg_device_init(this, n, M, rel_tol, abs_tol) + subroutine cg_device_init(this, n, max_iter, M, rel_tol, abs_tol) class(cg_device_t), intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() - + allocate(this%w(n)) allocate(this%r(n)) allocate(this%p(n)) @@ -83,20 +85,20 @@ subroutine cg_device_init(this, n, M, rel_tol, abs_tol) call device_map(this%p, this%p_d, n) call device_map(this%r, this%r_d, n) call device_map(this%w, this%w_d, n) - - if (present(M)) then + + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if call device_event_create(this%gs_event, 2) @@ -119,7 +121,7 @@ subroutine cg_device_free(this) if (allocated(this%p)) then deallocate(this%p) end if - + if (allocated(this%z)) then deallocate(this%z) end if @@ -145,9 +147,9 @@ subroutine cg_device_free(this) if (c_associated(this%gs_event)) then call device_event_destroy(this%gs_event) end if - + end subroutine cg_device_free - + !> Standard PCG solve function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(cg_device_t), intent(inout) :: this @@ -166,13 +168,13 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac type(c_ptr) :: f_d - + f_d = device_get_ptr(f) if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = one/sqrt(coef%volume) @@ -186,7 +188,7 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. zero) return + if(abscmp(rnorm, zero)) return do iter = 1, max_iter call this%M%solve(this%z, this%r, n) rtz2 = rtz1 @@ -195,10 +197,10 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ if (iter .eq. 1) beta = zero call device_add2s1(this%p_d, this%z_d, beta, n) - call Ax%compute(this%w, this%p, coef, x%msh, x%Xh) + call Ax%compute(this%w, this%p, coef, x%msh, x%Xh) call gs_h%op(this%w, n, GS_OP_ADD, this%gs_event) call device_event_sync(this%gs_event) - call bc_list_apply(blst, this%w, n) + call bc_list_apply(blst, this%w, n) pap = device_glsc3(this%w_d, coef%mult_d, this%p_d, n) @@ -220,5 +222,5 @@ function cg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ end function cg_device_solve end module cg_device - + diff --git a/src/krylov/bcknd/device/cuda/fusedcg_aux.cu b/src/krylov/bcknd/device/cuda/fusedcg_aux.cu new file mode 100644 index 00000000000..43558734cb3 --- /dev/null +++ b/src/krylov/bcknd/device/cuda/fusedcg_aux.cu @@ -0,0 +1,130 @@ +/* + Copyright (c) 2021-2024, The Neko Authors + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the authors nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "fusedcg_kernel.h" +#include +#include + +/** + * @todo Make sure that this gets deleted at some point... + */ +real *fusedcg_buf = NULL; +real *fusedcg_buf_d = NULL; +int fusedcg_buf_len = 0; + +extern "C" { + +#include +#include + + + + void cuda_fusedcg_update_p(void *p, void *z, void *po, real *beta, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const cudaStream_t stream = (cudaStream_t) glb_cmd_queue; + + fusedcg_update_p_kernel + <<>>((real *) p, (real *) z, + (real *) po, *beta, *n); + CUDA_CHECK(cudaGetLastError()); + + } + + void cuda_fusedcg_update_x(void *x, void *p, void *alpha, int *p_cur, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const cudaStream_t stream = (cudaStream_t) glb_cmd_queue; + + fusedcg_update_x_kernel + <<>>((real *) x, (const real **) p, + (const real *) alpha, *p_cur, *n); + CUDA_CHECK(cudaGetLastError()); + } + + + real cuda_fusedcg_part2(void *a, void *b, void *c, + void *alpha_d , real *alpha, int *p_cur, int * n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const int nb = ((*n) + 1024 - 1)/ 1024; + const cudaStream_t stream = (cudaStream_t) glb_cmd_queue; + + if (fusedcg_buf != NULL && fusedcg_buf_len < nb) { + CUDA_CHECK(cudaFreeHost(fusedcg_buf)); + CUDA_CHECK(cudaFree(fusedcg_buf_d)); + fusedcg_buf = NULL; + } + + if (fusedcg_buf == NULL){ + CUDA_CHECK(cudaMallocHost(&fusedcg_buf, 2*sizeof(real))); + CUDA_CHECK(cudaMalloc(&fusedcg_buf_d, nb*sizeof(real))); + fusedcg_buf_len = nb; + } + + /* Store alpha(p_cur) in pinned memory */ + fusedcg_buf[1] = (*alpha); + + /* Update alpha_d(p_cur) = alpha(p_cur) */ + real *alpha_d_p_cur = ((real *) alpha_d) + ((*p_cur - 1)); + CUDA_CHECK(cudaMemcpyAsync(alpha_d_p_cur, &fusedcg_buf[1], sizeof(real), + cudaMemcpyHostToDevice, stream)); + + + fusedcg_part2_kernel + <<>>((real *) a, (real *) b, + (real *) c, *alpha, + fusedcg_buf_d, * n); + CUDA_CHECK(cudaGetLastError()); + + reduce_kernel<<<1, 1024, 0, stream>>>(fusedcg_buf_d, nb); + CUDA_CHECK(cudaGetLastError()); + +#ifdef HAVE_DEVICE_MPI + cudaStreamSynchronize(stream); + device_mpi_allreduce(fusedcg_buf_d, fusedcg_buf, 1, + sizeof(real), DEVICE_MPI_SUM); +#else + CUDA_CHECK(cudaMemcpyAsync(fusedcg_buf, fusedcg_buf_d, sizeof(real), + cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); +#endif + + return fusedcg_buf[0]; + } +} + diff --git a/src/krylov/bcknd/device/cuda/fusedcg_kernel.h b/src/krylov/bcknd/device/cuda/fusedcg_kernel.h new file mode 100644 index 00000000000..aaef81fe524 --- /dev/null +++ b/src/krylov/bcknd/device/cuda/fusedcg_kernel.h @@ -0,0 +1,124 @@ +#ifndef __KRYLOV_FUSEDCG_KERNEL_H__ +#define __KRYLOV_FUSEDCG_KERNEL_H__ +/* + Copyright (c) 2021-2024, The Neko Authors + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the authors nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +/** + * Kernel for update of p + */ +template< typename T > +__global__ void fusedcg_update_p_kernel(T * __restrict__ p, + const T * __restrict__ z, + const T * __restrict__ po, + const T beta, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < n; i+= str) { + p[i] = beta*po[i] + z[i]; + } + +} + +/** + * Kernel for update of x + */ +template< typename T > +__global__ void fusedcg_update_x_kernel(T * __restrict__ x, + const T ** p, + const T * __restrict__ alpha, + const int p_cur, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < n; i+= str) { + T tmp = 0.0; + for (int j = 0; j < p_cur; j ++) { + tmp += p[j][i] * alpha[j]; + } + x[i] += tmp; + } + +} + +/** + * Device kernel for fusedcg_part2 + */ +template< typename T> +__global__ void fusedcg_part2_kernel(T * __restrict__ a, + const T * __restrict__ b, + const T * __restrict__ c, + const T alpha, + T * buf_h, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + const unsigned int lane = threadIdx.x % warpSize; + const unsigned int wid = threadIdx.x / warpSize; + + __shared__ T buf[32]; + T tmp = 0.0; + + for (int i = idx; i < n; i+= str) { + T rt = a[i] - alpha * c[i]; + tmp = tmp + rt * b[i] * rt; + a[i] = rt; + } + + tmp = reduce_warp(tmp); + if (lane == 0) { + buf[wid] = tmp; + } + __syncthreads(); + + tmp = (threadIdx.x < blockDim.x / warpSize) ? buf[lane] : 0; + if (wid == 0) { + tmp = reduce_warp(tmp); + } + + if (threadIdx.x == 0) { + buf_h[blockIdx.x] = tmp; + } +} + + +#endif // __KRYLOV_FUSEDCG_KERNEL_H__ diff --git a/src/krylov/bcknd/device/cuda/gmres_kernel.h b/src/krylov/bcknd/device/cuda/gmres_kernel.h index 381dcbd603f..cc2f0de2148 100644 --- a/src/krylov/bcknd/device/cuda/gmres_kernel.h +++ b/src/krylov/bcknd/device/cuda/gmres_kernel.h @@ -1,3 +1,5 @@ +#ifndef __KRYLOV_GMRES_KERNEL_H__ +#define __KRYLOV_GMRES_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -79,3 +81,5 @@ __global__ void gmres_part2_kernel(T * __restrict__ w, } + +#endif // __KRYLOV_GMRES_KERNEL_H__ diff --git a/src/krylov/bcknd/device/cuda/pipecg_kernel.h b/src/krylov/bcknd/device/cuda/pipecg_kernel.h index b7280150d86..9f51dbadeb2 100644 --- a/src/krylov/bcknd/device/cuda/pipecg_kernel.h +++ b/src/krylov/bcknd/device/cuda/pipecg_kernel.h @@ -1,3 +1,5 @@ +#ifndef __KRYLOV_PIPECG_KERNEL_H__ +#define __KRYLOV_PIPECG_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -137,3 +139,5 @@ __global__ void pipecg_vecops_kernel(T * __restrict__ p, } } + +#endif // __KRYLOV_PIPECG_KERNEL_H__ diff --git a/src/krylov/bcknd/device/fusedcg_device.F90 b/src/krylov/bcknd/device/fusedcg_device.F90 new file mode 100644 index 00000000000..db91a99df7a --- /dev/null +++ b/src/krylov/bcknd/device/fusedcg_device.F90 @@ -0,0 +1,392 @@ +! Copyright (c) 2021-2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Defines a fused Conjugate Gradient method for accelerators +module fusedcg_device + use krylov, only : ksp_t, ksp_monitor_t, KSP_MAX_ITER + use precon, only : pc_t + use ax_product, only : ax_t + use num_types, only: rp, c_rp + use field, only : field_t + use coefs, only : coef_t + use gather_scatter, only : gs_t, GS_OP_ADD + use bc, only : bc_list_t, bc_list_apply + use math, only : glsc3, rzero, copy, abscmp + use device_math, only : device_rzero, device_copy, device_glsc3 + use device + use comm + implicit none + private + + integer, parameter :: DEVICE_FUSEDCG_P_SPACE = 10 + + !> Fused preconditioned conjugate gradient method + type, public, extends(ksp_t) :: fusedcg_device_t + real(kind=rp), allocatable :: w(:) + real(kind=rp), allocatable :: r(:) + real(kind=rp), allocatable :: z(:) + real(kind=rp), allocatable :: p(:,:) + real(kind=rp), allocatable :: alpha(:) + type(c_ptr) :: w_d = C_NULL_PTR + type(c_ptr) :: r_d = C_NULL_PTR + type(c_ptr) :: z_d = C_NULL_PTR + type(c_ptr) :: alpha_d = C_NULL_PTR + type(c_ptr) :: p_d_d = C_NULL_PTR + type(c_ptr), allocatable :: p_d(:) + type(c_ptr) :: gs_event = C_NULL_PTR + contains + procedure, pass(this) :: init => fusedcg_device_init + procedure, pass(this) :: free => fusedcg_device_free + procedure, pass(this) :: solve => fusedcg_device_solve + end type fusedcg_device_t + +#ifdef HAVE_CUDA + interface + subroutine cuda_fusedcg_update_p(p_d, z_d, po_d, beta, n) & + bind(c, name='cuda_fusedcg_update_p') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: p_d, z_d, po_d + real(c_rp) :: beta + integer(c_int) :: n + end subroutine cuda_fusedcg_update_p + end interface + + interface + subroutine cuda_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) & + bind(c, name='cuda_fusedcg_update_x') + use, intrinsic :: iso_c_binding + implicit none + type(c_ptr), value :: x_d, p_d, alpha + integer(c_int) :: p_cur, n + end subroutine cuda_fusedcg_update_x + end interface + + interface + real(c_rp) function cuda_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, & + p_cur, n) bind(c, name='cuda_fusedcg_part2') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: a_d, b_d, c_d, alpha_d + real(c_rp) :: alpha + integer(c_int) :: n, p_cur + end function cuda_fusedcg_part2 + end interface +#elif HAVE_HIP + interface + subroutine hip_fusedcg_update_p(p_d, z_d, po_d, beta, n) & + bind(c, name='hip_fusedcg_update_p') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: p_d, z_d, po_d + real(c_rp) :: beta + integer(c_int) :: n + end subroutine hip_fusedcg_update_p + end interface + + interface + subroutine hip_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) & + bind(c, name='hip_fusedcg_update_x') + use, intrinsic :: iso_c_binding + implicit none + type(c_ptr), value :: x_d, p_d, alpha + integer(c_int) :: p_cur, n + end subroutine hip_fusedcg_update_x + end interface + + interface + real(c_rp) function hip_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, & + p_cur, n) bind(c, name='hip_fusedcg_part2') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: a_d, b_d, c_d, alpha_d + real(c_rp) :: alpha + integer(c_int) :: n, p_cur + end function hip_fusedcg_part2 + end interface +#endif + +contains + + subroutine device_fusedcg_update_p(p_d, z_d, po_d, beta, n) + type(c_ptr), value :: p_d, z_d, po_d + real(c_rp) :: beta + integer(c_int) :: n +#ifdef HAVE_HIP + call hip_fusedcg_update_p(p_d, z_d, po_d, beta, n) +#elif HAVE_CUDA + call cuda_fusedcg_update_p(p_d, z_d, po_d, beta, n) +#else + call neko_error('No device backend configured') +#endif + end subroutine device_fusedcg_update_p + + subroutine device_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) + type(c_ptr), value :: x_d, p_d, alpha + integer(c_int) :: p_cur, n +#ifdef HAVE_HIP + call hip_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) +#elif HAVE_CUDA + call cuda_fusedcg_update_x(x_d, p_d, alpha, p_cur, n) +#else + call neko_error('No device backend configured') +#endif + end subroutine device_fusedcg_update_x + + function device_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, & + p_cur, n) result(res) + type(c_ptr), value :: a_d, b_d, c_d, alpha_d + real(c_rp) :: alpha + integer :: n, p_cur + real(kind=rp) :: res + integer :: ierr +#ifdef HAVE_HIP + res = hip_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, p_cur, n) +#elif HAVE_CUDA + res = cuda_fusedcg_part2(a_d, b_d, c_d, alpha_d, alpha, p_cur, n) +#else + call neko_error('No device backend configured') +#endif + +#ifndef HAVE_DEVICE_MPI + if (pe_size .gt. 1) then + call MPI_Allreduce(MPI_IN_PLACE, res, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + end if +#endif + + end function device_fusedcg_part2 + + !> Initialise a fused PCG solver + subroutine fusedcg_device_init(this, n, max_iter, M, rel_tol, abs_tol) + class(fusedcg_device_t), target, intent(inout) :: this + class(pc_t), optional, intent(inout), target :: M + integer, intent(in) :: n + integer, intent(in) :: max_iter + real(kind=rp), optional, intent(inout) :: rel_tol + real(kind=rp), optional, intent(inout) :: abs_tol + type(c_ptr) :: ptr + integer(c_size_t) :: p_size + integer :: i + + call this%free() + + allocate(this%w(n)) + allocate(this%r(n)) + allocate(this%z(n)) + allocate(this%p(n, DEVICE_FUSEDCG_P_SPACE)) + allocate(this%p_d(DEVICE_FUSEDCG_P_SPACE)) + allocate(this%alpha(DEVICE_FUSEDCG_P_SPACE)) + + if (present(M)) then + this%M => M + end if + + call device_map(this%w, this%w_d, n) + call device_map(this%r, this%r_d, n) + call device_map(this%z, this%z_d, n) + call device_map(this%alpha, this%alpha_d, DEVICE_FUSEDCG_P_SPACE) + do i = 1, DEVICE_FUSEDCG_P_SPACE+1 + this%p_d(i) = C_NULL_PTR + call device_map(this%p(:,i), this%p_d(i), n) + end do + + p_size = c_sizeof(C_NULL_PTR) * (DEVICE_FUSEDCG_P_SPACE) + call device_alloc(this%p_d_d, p_size) + ptr = c_loc(this%p_d) + call device_memcpy(ptr, this%p_d_d, p_size, & + HOST_TO_DEVICE, sync=.false.) + if (present(rel_tol) .and. present(abs_tol)) then + call this%ksp_init(max_iter, rel_tol, abs_tol) + else if (present(rel_tol)) then + call this%ksp_init(max_iter, rel_tol=rel_tol) + else if (present(abs_tol)) then + call this%ksp_init(max_iter, abs_tol=abs_tol) + else + call this%ksp_init(max_iter) + end if + + call device_event_create(this%gs_event, 2) + + end subroutine fusedcg_device_init + + !> Deallocate a pipelined PCG solver + subroutine fusedcg_device_free(this) + class(fusedcg_device_t), intent(inout) :: this + integer :: i + + call this%ksp_free() + + if (allocated(this%w)) then + deallocate(this%w) + end if + + if (allocated(this%r)) then + deallocate(this%r) + end if + + + if (allocated(this%z)) then + deallocate(this%z) + end if + + + if (allocated(this%alpha)) then + deallocate(this%alpha) + end if + + if (allocated(this%p)) then + deallocate(this%p) + end if + + if (c_associated(this%w_d)) then + call device_free(this%w_d) + end if + + if (c_associated(this%r_d)) then + call device_free(this%r_d) + end if + + if (c_associated(this%z_d)) then + call device_free(this%z_d) + end if + + if (c_associated(this%alpha_d)) then + call device_free(this%alpha_d) + end if + + if (allocated(this%p_d)) then + do i = 1, DEVICE_FUSEDCG_P_SPACE + if (c_associated(this%p_d(i))) then + call device_free(this%p_d(i)) + end if + end do + end if + + nullify(this%M) + + if (c_associated(this%gs_event)) then + call device_event_destroy(this%gs_event) + end if + + end subroutine fusedcg_device_free + + !> Pipelined PCG solve + function fusedcg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) + class(fusedcg_device_t), intent(inout) :: this + class(ax_t), intent(inout) :: Ax + type(field_t), intent(inout) :: x + integer, intent(in) :: n + real(kind=rp), dimension(n), intent(inout) :: f + type(coef_t), intent(inout) :: coef + type(bc_list_t), intent(inout) :: blst + type(gs_t), intent(inout) :: gs_h + type(ksp_monitor_t) :: ksp_results + integer, optional, intent(in) :: niter + integer :: iter, max_iter, ierr, i, p_cur, p_prev + real(kind=rp) :: rnorm, rtr, norm_fac, rtz1, rtz2 + real(kind=rp) :: pap, beta + type(c_ptr) :: f_d + f_d = device_get_ptr(f) + + if (present(niter)) then + max_iter = niter + else + max_iter = KSP_MAX_ITER + end if + norm_fac = 1.0_rp / sqrt(coef%volume) + + associate(w => this%w, r => this%r, p => this%p, z => this%z, & + alpha => this%alpha, alpha_d => this%alpha_d, & + w_d => this%w_d, r_d => this%r_d, z_d => this%z_d, & + p_d => this%p_d, p_d_d => this%p_d_d) + + rtz1 = 1.0_rp + p_prev = DEVICE_FUSEDCG_P_SPACE + p_cur = 1 + call device_rzero(x%x_d, n) + call device_rzero(p_d(1), n) + call device_copy(r_d, f_d, n) + + rtr = device_glsc3(r_d, coef%mult_d, r_d, n) + rnorm = sqrt(rtr)*norm_fac + ksp_results%res_start = rnorm + ksp_results%res_final = rnorm + ksp_results%iter = 0 + if(abscmp(rnorm, 0.0_rp)) return + + do iter = 1, max_iter + call this%M%solve(z, r, n) + rtz2 = rtz1 + rtz1 = device_glsc3(r_d, coef%mult_d, z_d, n) + beta = rtz1 / rtz2 + if (iter .eq. 1) beta = 0.0_rp + call device_fusedcg_update_p(p_d(p_cur), z_d, p_d(p_prev), beta, n) + + call Ax%compute(w, p(1, p_cur), coef, x%msh, x%Xh) + call gs_h%op(w, n, GS_OP_ADD, this%gs_event) + call device_event_sync(this%gs_event) + call bc_list_apply(blst, w, n) + + pap = device_glsc3(w_d, coef%mult_d, this%p_d(p_cur), n) + + alpha(p_cur) = rtz1 / pap + rtr = device_fusedcg_part2(r_d, coef%mult_d, w_d, & + alpha_d, alpha(p_cur), p_cur, n) + rnorm = sqrt(rtr)*norm_fac + + if ((p_cur .eq. DEVICE_FUSEDCG_P_SPACE) .or. & + (rnorm .lt. this%abs_tol) .or. iter .eq. max_iter) then + call device_fusedcg_update_x(x%x_d, p_d_d, alpha_d, p_cur, n) + p_prev = p_cur + p_cur = 1 + if (rnorm .lt. this%abs_tol) exit + else + p_prev = p_cur + p_cur = p_cur + 1 + end if + end do + + ksp_results%res_final = rnorm + ksp_results%iter = iter + + end associate + + end function fusedcg_device_solve + +end module fusedcg_device + + diff --git a/src/krylov/bcknd/device/gmres_device.F90 b/src/krylov/bcknd/device/gmres_device.F90 index 2a19fda4fe9..35ef3e6b268 100644 --- a/src/krylov/bcknd/device/gmres_device.F90 +++ b/src/krylov/bcknd/device/gmres_device.F90 @@ -1,4 +1,4 @@ -! Copyright (c) 2022, The Neko Authors +! Copyright (c) 2022-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -41,7 +41,7 @@ module gmres_device use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply use device_identity, only : device_ident_t - use math, only : rone, rzero + use math, only : rone, rzero, abscmp use device_math, only : device_rzero, device_copy, device_glsc3, & device_add2s2, device_add2s1, device_rone, & device_cmult2, device_add2s2_many, device_glsc3_many,& @@ -52,7 +52,7 @@ module gmres_device implicit none private - !> Standard preconditioned generalized minimal residual method + !> Standard preconditioned generalized minimal residual method type, public, extends(ksp_t) :: gmres_device_t integer :: m_restart real(kind=rp), allocatable :: w(:) @@ -91,7 +91,7 @@ real(c_rp) function hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) & end function hip_gmres_part2 end interface #elif HAVE_CUDA - + interface real(c_rp) function cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) & bind(c, name='cuda_gmres_part2') @@ -106,21 +106,21 @@ end function cuda_gmres_part2 contains - function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha) - type(c_ptr), value :: h_d, w_d, v_d_d, mult_d - integer(c_int) :: j, n - real(c_rp) :: alpha - integer :: ierr + function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha) + type(c_ptr), value :: h_d, w_d, v_d_d, mult_d + integer(c_int) :: j, n + real(c_rp) :: alpha + integer :: ierr #ifdef HAVE_HIP - alpha = hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) + alpha = hip_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) #elif HAVE_CUDA - alpha = cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) + alpha = cuda_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) #else - call neko_error('No device backend configured') + call neko_error('No device backend configured') #endif #ifndef HAVE_DEVICE_MPI - if (pe_size .gt. 1) then + if (pe_size .gt. 1) then call MPI_Allreduce(MPI_IN_PLACE, alpha, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) end if @@ -129,9 +129,10 @@ function device_gmres_part2(w_d,v_d_d,h_d,mult_d,j,n) result(alpha) end function device_gmres_part2 !> Initialise a standard GMRES solver - subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol) + subroutine gmres_device_init(this, n, max_iter, M, m_restart, rel_tol, abs_tol) class(gmres_device_t), target, intent(inout) :: this integer, intent(in) :: n + integer, intent(in) :: max_iter class(pc_t), optional, intent(inout), target :: M integer, optional, intent(inout) :: m_restart real(kind=rp), optional, intent(inout) :: rel_tol @@ -146,13 +147,13 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol) else this%m_restart = 30 end if - + call this%free() - - if (present(M)) then + + if (present(M)) then this%M => M - else + else this%M => M_ident end if @@ -160,14 +161,14 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol) allocate(this%r(n)) call device_map(this%w, this%w_d, n) call device_map(this%r, this%r_d, n) - + allocate(this%c(this%m_restart)) allocate(this%s(this%m_restart)) allocate(this%gam(this%m_restart + 1)) call device_map(this%c, this%c_d, this%m_restart) call device_map(this%s, this%s_d, this%m_restart) call device_map(this%gam, this%gam_d, this%m_restart+1) - + allocate(this%z(n,this%m_restart)) allocate(this%v(n,this%m_restart)) allocate(this%h(this%m_restart,this%m_restart)) @@ -184,31 +185,34 @@ subroutine gmres_device_init(this, n, M, m_restart, rel_tol, abs_tol) this%h_d(i) = c_null_ptr call device_map(this%h(:,i), this%h_d(i), this%m_restart) end do - + z_size = c_sizeof(C_NULL_PTR) * (this%m_restart) call device_alloc(this%z_d_d, z_size) call device_alloc(this%v_d_d, z_size) call device_alloc(this%h_d_d, z_size) ptr = c_loc(this%z_d) - call device_memcpy(ptr,this%z_d_d, z_size, HOST_TO_DEVICE) + call device_memcpy(ptr,this%z_d_d, z_size, & + HOST_TO_DEVICE, sync=.false.) ptr = c_loc(this%v_d) - call device_memcpy(ptr,this%v_d_d, z_size, HOST_TO_DEVICE) + call device_memcpy(ptr,this%v_d_d, z_size, & + HOST_TO_DEVICE, sync=.false.) ptr = c_loc(this%h_d) - call device_memcpy(ptr,this%h_d_d, z_size, HOST_TO_DEVICE) - - + call device_memcpy(ptr,this%h_d_d, z_size, & + HOST_TO_DEVICE, sync=.false.) + + if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init(abs_tol) + call this%ksp_init(max_iter) end if call device_event_create(this%gs_event, 2) - + end subroutine gmres_device_init !> Deallocate a standard GMRES solver @@ -229,7 +233,7 @@ subroutine gmres_device_free(this) if (allocated(this%r)) then deallocate(this%r) end if - + if (allocated(this%z)) then deallocate(this%z) end if @@ -237,17 +241,17 @@ subroutine gmres_device_free(this) if (allocated(this%h)) then deallocate(this%h) end if - + if (allocated(this%v)) then deallocate(this%v) end if - + if (allocated(this%s)) then deallocate(this%s) end if if (allocated(this%gam)) then deallocate(this%gam) - end if + end if if (allocated(this%v_d)) then do i = 1, this%m_restart @@ -273,7 +277,7 @@ subroutine gmres_device_free(this) end if - + if (c_associated(this%gam_d)) then call device_free(this%gam_d) end if @@ -295,9 +299,9 @@ subroutine gmres_device_free(this) if (c_associated(this%gs_event)) then call device_event_destroy(this%gs_event) end if - + end subroutine gmres_device_free - + !> Standard GMRES solve function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(gmres_device_t), intent(inout) :: this @@ -310,7 +314,7 @@ function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(k type(gs_t), intent(inout) :: gs_h type(ksp_monitor_t) :: ksp_results integer, optional, intent(in) :: niter - integer :: iter + integer :: iter, max_iter integer :: i, j, k real(kind=rp) :: rnorm, alpha, temp, lr, alpha2, norm_fac logical :: conv @@ -321,138 +325,146 @@ function gmres_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(k conv = .false. iter = 0 - associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, & + if (present(niter)) then + max_iter = niter + else + max_iter = this%max_iter + end if + + associate(w => this%w, c => this%c, r => this%r, z => this%z, h => this%h, & v => this%v, s => this%s, gam => this%gam, v_d => this%v_d, & w_d => this%w_d, r_d => this%r_d, h_d => this%h_d, v_d_d => this%v_d_d, & x_d => x%x_d, z_d_d => this%z_d_d, c_d => this%c_d) - norm_fac = 1.0_rp / sqrt(coef%volume) - call rzero(gam, this%m_restart + 1) - call rone(s, this%m_restart) - call rone(c, this%m_restart) - call rzero(h, this%m_restart * this%m_restart) - call device_rzero(x%x_d, n) - call device_rzero(this%gam_d, this%m_restart + 1) - call device_rone(this%s_d, this%m_restart) - call device_rone(this%c_d, this%m_restart) - - call rzero(this%h, this%m_restart**2) + norm_fac = 1.0_rp / sqrt(coef%volume) + call rzero(gam, this%m_restart + 1) + call rone(s, this%m_restart) + call rone(c, this%m_restart) + call rzero(h, this%m_restart * this%m_restart) + call device_rzero(x%x_d, n) + call device_rzero(this%gam_d, this%m_restart + 1) + call device_rone(this%s_d, this%m_restart) + call device_rone(this%c_d, this%m_restart) + + call rzero(this%h, this%m_restart**2) ! do j = 1, this%m_restart ! call device_rzero(h_d(j), this%m_restart) ! end do - do while (.not. conv .and. iter .lt. niter) - - if(iter.eq.0) then - call device_copy(r_d, f_d, n) - else - call device_copy(r_d, f_d, n) - call Ax%compute(w, x%x, coef, x%msh, x%Xh) - call gs_h%op(w, n, GS_OP_ADD) - call device_event_sync(this%gs_event) - call bc_list_apply(blst, w, n) - call device_sub2(r_d, w_d, n) - end if - - gam(1) = sqrt(device_glsc3(r_d, r_d, coef%mult_d, n)) - if(iter.eq.0) then - ksp_results%res_start = gam(1) * norm_fac - end if - - if (gam(1) .eq. 0) return - - rnorm = 0.0_rp - temp = 1.0_rp / gam(1) - call device_cmult2(v_d(1), r_d, temp, n) - do j = 1, this%m_restart - iter = iter+1 - - call this%M%solve(z(1,j), v(1,j), n) - - call Ax%compute(w, z(1,j), coef, x%msh, x%Xh) - call gs_h%op(w, n, GS_OP_ADD) - call device_event_sync(this%gs_event) - call bc_list_apply(blst, w, n) - - if (NEKO_BCKND_OPENCL .eq. 1) then - do i = 1, j - h(i,j) = device_glsc3(w_d, v_d(i), coef%mult_d, n) - - call device_add2s2(w_d, v_d(i), -h(i,j), n) - - alpha2 = device_glsc3(w_d, w_d, coef%mult_d, n) - end do - else - call device_glsc3_many(h(1,j), w_d, v_d_d, coef%mult_d, j, n) - - call device_memcpy(h(:,j), h_d(j), j, HOST_TO_DEVICE) - - alpha2 = device_gmres_part2(w_d, v_d_d, h_d(j), coef%mult_d, j, n) - - end if - - alpha = sqrt(alpha2) - do i=1,j-1 - temp = h(i,j) - h(i ,j) = c(i)*temp + s(i) * h(i+1,j) - h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j) - end do - - rnorm = 0.0_rp - if(alpha .eq. 0.0_rp) then - conv = .true. - exit - end if - - lr = sqrt(h(j,j) * h(j,j) + alpha**2) - temp = 1.0_rp / lr - c(j) = h(j,j) * temp - s(j) = alpha * temp - h(j,j) = lr - call device_memcpy(h(:,j), h_d(j), j, HOST_TO_DEVICE) - gam(j+1) = -s(j) * gam(j) - gam(j) = c(j) * gam(j) - - rnorm = abs(gam(j+1)) * norm_fac - if (rnorm .lt. this%abs_tol) then - conv = .true. - exit - end if - - if (iter + 1 .gt. niter) exit - - if( j .lt. this%m_restart) then - temp = 1.0_rp / alpha - call device_cmult2(v_d(j+1), w_d, temp, n) - end if - - end do - - j = min(j, this%m_restart) - do k = j, 1, -1 - temp = gam(k) - do i = j, k+1, -1 - temp = temp - h(k,i) * c(i) - end do - c(k) = temp / h(k,k) - end do - - if (NEKO_BCKND_OPENCL .eq. 1) then - do i = 1, j - call device_add2s2(x_d, this%z_d(i), c(i), n) - end do - else - call device_memcpy(c, c_d, j, HOST_TO_DEVICE) - call device_add2s2_many(x_d, z_d_d, c_d, j, n) - end if - end do - - end associate - - ksp_results%res_final = rnorm - ksp_results%iter = iter + do while (.not. conv .and. iter .lt. max_iter) + + if(iter.eq.0) then + call device_copy(r_d, f_d, n) + else + call device_copy(r_d, f_d, n) + call Ax%compute(w, x%x, coef, x%msh, x%Xh) + call gs_h%op(w, n, GS_OP_ADD, this%gs_event) + call device_event_sync(this%gs_event) + call bc_list_apply(blst, w, n) + call device_sub2(r_d, w_d, n) + end if + + gam(1) = sqrt(device_glsc3(r_d, r_d, coef%mult_d, n)) + if(iter.eq.0) then + ksp_results%res_start = gam(1) * norm_fac + end if + + if (abscmp(gam(1), 0.0_rp)) return + + rnorm = 0.0_rp + temp = 1.0_rp / gam(1) + call device_cmult2(v_d(1), r_d, temp, n) + do j = 1, this%m_restart + iter = iter+1 + + call this%M%solve(z(1,j), v(1,j), n) + + call Ax%compute(w, z(1,j), coef, x%msh, x%Xh) + call gs_h%op(w, n, GS_OP_ADD, this%gs_event) + call device_event_sync(this%gs_event) + call bc_list_apply(blst, w, n) + + if (NEKO_BCKND_OPENCL .eq. 1) then + do i = 1, j + h(i,j) = device_glsc3(w_d, v_d(i), coef%mult_d, n) + + call device_add2s2(w_d, v_d(i), -h(i,j), n) + + alpha2 = device_glsc3(w_d, w_d, coef%mult_d, n) + end do + else + call device_glsc3_many(h(1,j), w_d, v_d_d, coef%mult_d, j, n) + + call device_memcpy(h(:,j), h_d(j), j, & + HOST_TO_DEVICE, sync=.false.) + + alpha2 = device_gmres_part2(w_d, v_d_d, h_d(j), coef%mult_d, j, n) + + end if + + alpha = sqrt(alpha2) + do i=1,j-1 + temp = h(i,j) + h(i ,j) = c(i)*temp + s(i) * h(i+1,j) + h(i+1,j) = -s(i)*temp + c(i) * h(i+1,j) + end do + + rnorm = 0.0_rp + if(abscmp(alpha, 0.0_rp)) then + conv = .true. + exit + end if + + lr = sqrt(h(j,j) * h(j,j) + alpha**2) + temp = 1.0_rp / lr + c(j) = h(j,j) * temp + s(j) = alpha * temp + h(j,j) = lr + call device_memcpy(h(:,j), h_d(j), j, & + HOST_TO_DEVICE, sync=.false.) + gam(j+1) = -s(j) * gam(j) + gam(j) = c(j) * gam(j) + + rnorm = abs(gam(j+1)) * norm_fac + if (rnorm .lt. this%abs_tol) then + conv = .true. + exit + end if + + if (iter + 1 .gt. max_iter) exit + + if( j .lt. this%m_restart) then + temp = 1.0_rp / alpha + call device_cmult2(v_d(j+1), w_d, temp, n) + end if + + end do + + j = min(j, this%m_restart) + do k = j, 1, -1 + temp = gam(k) + do i = j, k+1, -1 + temp = temp - h(k,i) * c(i) + end do + c(k) = temp / h(k,k) + end do + + if (NEKO_BCKND_OPENCL .eq. 1) then + do i = 1, j + call device_add2s2(x_d, this%z_d(i), c(i), n) + end do + else + call device_memcpy(c, c_d, j, HOST_TO_DEVICE, sync=.false.) + call device_add2s2_many(x_d, z_d_d, c_d, j, n) + end if + end do + + end associate + + ksp_results%res_final = rnorm + ksp_results%iter = iter end function gmres_device_solve end module gmres_device - + diff --git a/src/krylov/bcknd/device/hip/fusedcg_aux.hip b/src/krylov/bcknd/device/hip/fusedcg_aux.hip new file mode 100644 index 00000000000..5174f10832a --- /dev/null +++ b/src/krylov/bcknd/device/hip/fusedcg_aux.hip @@ -0,0 +1,132 @@ +/* + Copyright (c) 2021-2024, The Neko Authors + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the authors nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "fusedcg_kernel.h" +#include +#include + +/** + * @todo Make sure that this gets deleted at some point... + */ +real *fusedcg_buf = NULL; +real *fusedcg_buf_d = NULL; +int fusedcg_buf_len = 0; + +extern "C" { + +#include +#include + + + + void hip_fusedcg_update_p(void *p, void *z, void *po, real *beta, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const hipStream_t stream = (hipStream_t) glb_cmd_queue; + + hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_update_p_kernel), + nblcks, nthrds, 0, stream, + (real *) p, (real *) z, (real *) po, *beta, *n); + HIP_CHECK(hipGetLastError()); + + } + + void hip_fusedcg_update_x(void *x, void *p, void *alpha, int *p_cur, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const hipStream_t stream = (hipStream_t) glb_cmd_queue; + + hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_update_x_kernel), + nblcks, nthrds, 0, stream, + (real *) x, (const real **) p, + (const real *) alpha, *p_cur, *n); + HIP_CHECK(hipGetLastError()); + } + + + real hip_fusedcg_part2(void *a, void *b, void *c, + void *alpha_d , real *alpha, int *p_cur, int * n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const int nb = ((*n) + 1024 - 1)/ 1024; + const hipStream_t stream = (hipStream_t) glb_cmd_queue; + + if (fusedcg_buf != NULL && fusedcg_buf_len < nb) { + HIP_CHECK(hipHostFree(fusedcg_buf)); + HIP_CHECK(hipFree(fusedcg_buf_d)); + fusedcg_buf = NULL; + } + + if (fusedcg_buf == NULL){ + HIP_CHECK(hipHostMalloc(&fusedcg_buf, 2*sizeof(real))); + HIP_CHECK(hipMalloc(&fusedcg_buf_d, nb*sizeof(real))); + fusedcg_buf_len = nb; + } + + /* Store alpha(p_cur) in pinned memory */ + fusedcg_buf[1] = (*alpha); + + /* Update alpha_d(p_cur) = alpha(p_cur) */ + real *alpha_d_p_cur = ((real *) alpha_d) + ((*p_cur - 1)); + HIP_CHECK(hipMemcpyAsync(alpha_d_p_cur, &fusedcg_buf[1], sizeof(real), + hipMemcpyHostToDevice, stream)); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(fusedcg_part2_kernel), + nblcks, nthrds, 0, stream, + (real *) a, (real *) b, (real *) c, + *alpha, fusedcg_buf_d, * n); + HIP_CHECK(hipGetLastError()); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel), + 1, 1024, 0, stream, fusedcg_buf_d, nb); + HIP_CHECK(hipGetLastError()); + +#ifdef HAVE_DEVICE_MPI + hipStreamSynchronize(stream); + device_mpi_allreduce(fusedcg_buf_d, fusedcg_buf, 1, + sizeof(real), DEVICE_MPI_SUM); +#else + HIP_CHECK(hipMemcpyAsync(fusedcg_buf, fusedcg_buf_d, sizeof(real), + hipMemcpyDeviceToHost, stream)); + hipStreamSynchronize(stream); +#endif + + return fusedcg_buf[0]; + } +} + diff --git a/src/krylov/bcknd/device/hip/fusedcg_kernel.h b/src/krylov/bcknd/device/hip/fusedcg_kernel.h new file mode 100644 index 00000000000..1a62caf1af4 --- /dev/null +++ b/src/krylov/bcknd/device/hip/fusedcg_kernel.h @@ -0,0 +1,124 @@ +#ifndef __KRYLOV_FUSEDCG_KERNEL_H__ +#define __KRYLOV_FUSEDCG_KERNEL_H__ +/* + Copyright (c) 2021-2024, The Neko Authors + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the authors nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + +/** + * Kernel for update of p + */ +template< typename T > +__global__ void fusedcg_update_p_kernel(T * __restrict__ p, + const T * __restrict__ z, + const T * __restrict__ po, + const T beta, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < n; i+= str) { + p[i] = beta*po[i] + z[i]; + } + +} + +/** + * Kernel for update of x + */ +template< typename T > +__global__ void fusedcg_update_x_kernel(T * __restrict__ x, + const T ** p, + const T * __restrict__ alpha, + const int p_cur, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < n; i+= str) { + T tmp = 0.0; + for (int j = 0; j < p_cur; j ++) { + tmp += p[j][i] * alpha[j]; + } + x[i] += tmp; + } + +} + +/** + * Device kernel for fusedcg_part2 + */ +template< typename T> +__global__ void fusedcg_part2_kernel(T * __restrict__ a, + const T * __restrict__ b, + const T * __restrict__ c, + const T alpha, + T * buf_h, + const int n) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + const unsigned int lane = threadIdx.x % warpSize; + const unsigned int wid = threadIdx.x / warpSize; + + __shared__ T buf[64]; + T tmp = 0.0; + + for (int i = idx; i < n; i+= str) { + T rt = a[i] - alpha * c[i]; + tmp = tmp + rt * b[i] * rt; + a[i] = rt; + } + + tmp = reduce_warp(tmp); + if (lane == 0) { + buf[wid] = tmp; + } + __syncthreads(); + + tmp = (threadIdx.x < blockDim.x / warpSize) ? buf[lane] : 0; + if (wid == 0) { + tmp = reduce_warp(tmp); + } + + if (threadIdx.x == 0) { + buf_h[blockIdx.x] = tmp; + } +} + + +#endif // __KRYLOV_FUSEDCG_KERNEL_H__ diff --git a/src/krylov/bcknd/device/hip/gmres_kernel.h b/src/krylov/bcknd/device/hip/gmres_kernel.h index e47e724e870..0f417d9b737 100644 --- a/src/krylov/bcknd/device/hip/gmres_kernel.h +++ b/src/krylov/bcknd/device/hip/gmres_kernel.h @@ -1,3 +1,5 @@ +#ifndef __KRYLOV_GMRES_KERNEL_H__ +#define __KRYLOV_GMRES_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -80,3 +82,5 @@ __global__ void gmres_part2_kernel(T * __restrict__ w, } + +#endif // __KRYLOV_GMRES_KERNEL_H__ diff --git a/src/krylov/bcknd/device/hip/pipecg_kernel.h b/src/krylov/bcknd/device/hip/pipecg_kernel.h index c197526034a..790a15649a9 100644 --- a/src/krylov/bcknd/device/hip/pipecg_kernel.h +++ b/src/krylov/bcknd/device/hip/pipecg_kernel.h @@ -1,3 +1,5 @@ +#ifndef __KRYLOV_PIPECG_KERNEL_H__ +#define __KRYLOV_PIPECG_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -138,3 +140,5 @@ __global__ void pipecg_vecops_kernel(T * __restrict__ p, } } + +#endif // __KRYLOV_PIPECG_KERNEL_H__ diff --git a/src/krylov/bcknd/device/opencl/jacobi_kernel.cl b/src/krylov/bcknd/device/opencl/jacobi_kernel.cl index 5e9027fb5af..fd225b50a85 100644 --- a/src/krylov/bcknd/device/opencl/jacobi_kernel.cl +++ b/src/krylov/bcknd/device/opencl/jacobi_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __KRYLOV_JACOBI_KERNEL_CL__ +#define __KRYLOV_JACOBI_KERNEL_CL__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -115,3 +117,5 @@ DEFINE_JACOBI_KERNEL(14) DEFINE_JACOBI_KERNEL(15) DEFINE_JACOBI_KERNEL(16) + +#endif // __KRYLOV_JACOBI_KERNEL_CL__ diff --git a/src/krylov/bcknd/device/pc_identity_device.f90 b/src/krylov/bcknd/device/pc_identity_device.f90 index 69811e54557..20d56b91c30 100644 --- a/src/krylov/bcknd/device/pc_identity_device.f90 +++ b/src/krylov/bcknd/device/pc_identity_device.f90 @@ -35,14 +35,14 @@ module device_identity use device use device_math use precon, only : pc_t - use num_types, only : rp + use num_types, only : rp use, intrinsic :: iso_c_binding, only : c_ptr implicit none private - + !> Defines a canonical Krylov preconditioner for accelerators type, public, extends(pc_t) :: device_ident_t - contains + contains procedure, pass(this) :: solve => device_ident_solve procedure, pass(this) :: update => device_ident_update end type device_ident_t @@ -56,17 +56,17 @@ subroutine device_ident_solve(this, z, r, n) real(kind=rp), dimension(n), intent(inout) :: z real(kind=rp), dimension(n), intent(inout) :: r type(c_ptr) :: z_d, r_d - + z_d = device_get_ptr(z) r_d = device_get_ptr(r) - + call device_copy(z_d, r_d, n) - + end subroutine device_ident_solve - + !> Mandatory update routine (NOP) subroutine device_ident_update(this) class(device_ident_t), intent(inout) :: this end subroutine device_ident_update - + end module device_identity diff --git a/src/krylov/bcknd/device/pc_jacobi_device.F90 b/src/krylov/bcknd/device/pc_jacobi_device.F90 index 6f26a2435a5..6cd20641a89 100644 --- a/src/krylov/bcknd/device/pc_jacobi_device.F90 +++ b/src/krylov/bcknd/device/pc_jacobi_device.F90 @@ -66,7 +66,7 @@ subroutine hip_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, & integer(c_int) :: nelv, lx end subroutine hip_jacobi_update end interface - + interface subroutine cuda_jacobi_update(d_d, dxt_d, dyt_d, dzt_d, & G11_d, G22_d, G33_d, G12_d, G13_d, G23_d, nelv, lx) & @@ -90,7 +90,7 @@ end subroutine opencl_jacobi_update end interface contains - + subroutine device_jacobi_init(this, coef, dof, gs_h) class(device_jacobi_t), intent(inout) :: this type(coef_t), intent(inout), target :: coef @@ -118,7 +118,7 @@ subroutine device_jacobi_free(this) call device_free(this%d_d) this%d_d = C_NULL_PTR end if - + if (allocated(this%d)) then deallocate(this%d) end if @@ -136,12 +136,12 @@ subroutine device_jacobi_solve(this, z, r, n) real(kind=rp), dimension(n), intent(inout) :: z real(kind=rp), dimension(n), intent(inout) :: r type(c_ptr) :: z_d, r_d - + z_d = device_get_ptr(z) r_d = device_get_ptr(r) - + call device_col3(z_d, r_d, this%d_d, n) - + end subroutine device_jacobi_solve subroutine device_jacobi_update(this) @@ -176,7 +176,7 @@ subroutine device_jacobi_update(this) if (coef%ifh2) then call device_addcol3(this%d_d, coef%h2_d, coef%B_d, coef%dof%size()) end if - + call gs_h%op(this%d, dof%size(), GS_OP_ADD) call device_invcol1(this%d_d, dof%size()) diff --git a/src/krylov/bcknd/device/pipecg_device.F90 b/src/krylov/bcknd/device/pipecg_device.F90 index 4ab0f234d10..829e95fea55 100644 --- a/src/krylov/bcknd/device/pipecg_device.F90 +++ b/src/krylov/bcknd/device/pipecg_device.F90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021-2023, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,8 +40,9 @@ module pipecg_device use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, copy - use device_math, only : device_rzero, device_copy, device_glsc3 + use math, only : glsc3, rzero, copy, abscmp + use device_math, only : device_rzero, device_copy, & + device_glsc3, device_vlsc3 use device use comm implicit none @@ -80,7 +81,7 @@ module pipecg_device procedure, pass(this) :: free => pipecg_device_free procedure, pass(this) :: solve => pipecg_device_solve end type pipecg_device_t - + #ifdef HAVE_CUDA interface subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, & @@ -95,7 +96,7 @@ subroutine cuda_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, & real(c_rp) :: alpha, beta, reduction(3) end subroutine cuda_pipecg_vecops end interface - + interface subroutine cuda_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, & p_cur, p_space, n) & @@ -132,9 +133,9 @@ subroutine hip_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, & end subroutine hip_cg_update_xp end interface #endif - + contains - + subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, & w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) type(c_ptr), value :: p_d, q_d, r_d, s_d, u_d1, u_d2 @@ -143,15 +144,15 @@ subroutine device_pipecg_vecops(p_d, q_d, r_d, s_d, u_d1, u_d2, & real(c_rp) :: alpha, beta, reduction(3) #ifdef HAVE_HIP call hip_pipecg_vecops(p_d, q_d, r_d,& - s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) + s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) #elif HAVE_CUDA call cuda_pipecg_vecops(p_d, q_d, r_d,& - s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) + s_d, u_d1, u_d2, w_d, z_d, ni_d, mi_d, alpha, beta, mult_d, reduction,n) #else call neko_error('No device backend configured') #endif end subroutine device_pipecg_vecops - + subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n) use, intrinsic :: iso_c_binding type(c_ptr), value :: x_d, p_d, u_d_d, alpha, beta @@ -164,18 +165,19 @@ subroutine device_cg_update_xp(x_d, p_d, u_d_d, alpha, beta, p_cur, p_space, n) call neko_error('No device backend configured') #endif end subroutine device_cg_update_xp - + !> Initialise a pipelined PCG solver - subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol) + subroutine pipecg_device_init(this, n, max_iter, M, rel_tol, abs_tol) class(pipecg_device_t), target, intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol type(c_ptr) :: ptr integer(c_size_t) :: u_size integer :: i - + call this%free() allocate(this%p(n)) @@ -190,8 +192,8 @@ subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol) allocate(this%ni(n)) allocate(this%alpha(DEVICE_PIPECG_P_SPACE)) allocate(this%beta(DEVICE_PIPECG_P_SPACE)) - - if (present(M)) then + + if (present(M)) then this%M => M end if @@ -213,20 +215,21 @@ subroutine pipecg_device_init(this, n, M, rel_tol, abs_tol) u_size = 8*(DEVICE_PIPECG_P_SPACE+1) call device_alloc(this%u_d_d, u_size) ptr = c_loc(this%u_d) - call device_memcpy(ptr,this%u_d_d, u_size, HOST_TO_DEVICE) + call device_memcpy(ptr,this%u_d_d, u_size, & + HOST_TO_DEVICE, sync=.false.) if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if call device_event_create(this%gs_event, 2) - + end subroutine pipecg_device_init !> Deallocate a pipelined PCG solver @@ -269,7 +272,7 @@ subroutine pipecg_device_free(this) if (allocated(this%beta)) then deallocate(this%beta) end if - + if (c_associated(this%p_d)) then call device_free(this%p_d) @@ -319,7 +322,7 @@ subroutine pipecg_device_free(this) end if end subroutine pipecg_device_free - + !> Pipelined PCG solve function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(pipecg_device_t), intent(inout) :: this @@ -344,7 +347,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) @@ -355,7 +358,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( p_d => this%p_d, q_d => this%q_d, r_d => this%r_d, & s_d => this%s_d, u_d => this%u_d, u_d_d => this%u_d_d, & w_d => this%w_d, z_d => this%z_d, mi_d => this%mi_d, ni_d => this%ni_d) - + p_prev = DEVICE_PIPECG_P_SPACE !this%p_space u_prev = DEVICE_PIPECG_P_SPACE + 1 !this%p_space+1 p_cur = 1 @@ -372,21 +375,21 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( call gs_h%op(w, n, GS_OP_ADD, this%gs_event) call device_event_sync(this%gs_event) call bc_list_apply(blst, w, n) - + rtr = device_glsc3(r_d, coef%mult_d, r_d, n) rnorm = sqrt(rtr)*norm_fac ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. 0.0_rp) return + if(abscmp(rnorm, 0.0_rp)) return gamma1 = 0.0_rp tmp1 = 0.0_rp tmp2 = 0.0_rp tmp3 = 0.0_rp - tmp1 = device_glsc3(r_d,coef%mult_d,u_d(u_prev),n) - tmp2 = device_glsc3(w_d,coef%mult_d,u_d(u_prev),n) - tmp3 = device_glsc3(r_d,coef%mult_d,r_d,n) + tmp1 = device_vlsc3(r_d, coef%mult_d, u_d(u_prev), n) + tmp2 = device_vlsc3(w_d, coef%mult_d, u_d(u_prev), n) + tmp3 = device_vlsc3(r_d, coef%mult_d, r_d, n) reduction(1) = tmp1 reduction(2) = tmp2 reduction(3) = tmp3 @@ -394,7 +397,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( do iter = 1, max_iter call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr) - + call this%M%solve(mi, w, n) call Ax%compute(ni, mi, coef, x%msh, x%Xh) call gs_h%op(ni, n, GS_OP_ADD, this%gs_event) @@ -402,7 +405,7 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( call bc_list_apply(blst, ni, n) call MPI_Wait(request, status, ierr) - gamma2 = gamma1 + gamma2 = gamma1 gamma1 = reduction(1) delta = reduction(2) rtr = reduction(3) @@ -414,24 +417,26 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( if (iter .gt. 1) then beta(p_cur) = gamma1 / gamma2 alpha(p_cur) = gamma1 / (delta - (beta(p_cur) * gamma1/alpha(p_prev))) - else + else beta(p_cur) = 0.0_rp alpha(p_cur) = gamma1/delta end if - + call device_pipecg_vecops(p_d, q_d, r_d,& s_d, u_d(u_prev), u_d(p_cur),& w_d, z_d, ni_d,& mi_d, alpha(p_cur), beta(p_cur),& - coef%mult_d, reduction,n) + coef%mult_d, reduction, n) if (p_cur .eq. DEVICE_PIPECG_P_SPACE) then - call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE) - call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, p_cur, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(beta, beta_d, p_cur, & + HOST_TO_DEVICE, sync=.false.) call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, & DEVICE_PIPECG_P_SPACE, n) p_prev = p_cur u_prev = DEVICE_PIPECG_P_SPACE + 1 - alpha(1) = alpha(p_cur) + alpha(1) = alpha(p_cur) beta(1) = beta(p_cur) p_cur = 1 else @@ -440,21 +445,21 @@ function pipecg_device_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result( p_cur = p_cur + 1 end if end do - + if ( p_cur .ne. 1) then - call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE) - call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE) + call device_memcpy(alpha, alpha_d, p_cur, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(beta, beta_d, p_cur, HOST_TO_DEVICE, sync=.false.) call device_cg_update_xp(x%x_d, p_d, u_d_d, alpha_d, beta_d, p_cur, & DEVICE_PIPECG_P_SPACE, n) end if ksp_results%res_final = rnorm ksp_results%iter = iter - + end associate - + end function pipecg_device_solve - + end module pipecg_device - + diff --git a/src/krylov/bcknd/sx/cg_sx.f90 b/src/krylov/bcknd/sx/cg_sx.f90 index f7caa5a08e1..90d5fc8088e 100644 --- a/src/krylov/bcknd/sx/cg_sx.f90 +++ b/src/krylov/bcknd/sx/cg_sx.f90 @@ -40,11 +40,11 @@ module cg_sx use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, copy, add2s1 + use math, only : glsc3, add2s1, abscmp implicit none private - !> Standard preconditioned conjugate gradient method + !> Standard preconditioned conjugate gradient method (SX version) type, public, extends(ksp_t) :: sx_cg_t real(kind=rp), allocatable :: w(:) real(kind=rp), allocatable :: r(:) @@ -59,34 +59,35 @@ module cg_sx contains !> Initialise a standard PCG solver - subroutine sx_cg_init(this, n, M, rel_tol, abs_tol) + subroutine sx_cg_init(this, n, max_iter, M, rel_tol, abs_tol) class(sx_cg_t), intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() - + allocate(this%w(n)) allocate(this%r(n)) allocate(this%p(n)) allocate(this%z(n)) - - if (present(M)) then + + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine sx_cg_init !> Deallocate a standard PCG solver @@ -106,7 +107,7 @@ subroutine sx_cg_free(this) if (allocated(this%p)) then deallocate(this%p) end if - + if (allocated(this%z)) then deallocate(this%z) end if @@ -114,7 +115,7 @@ subroutine sx_cg_free(this) nullify(this%M) end subroutine sx_cg_free - + !> Standard PCG solve function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(sx_cg_t), intent(inout) :: this @@ -132,11 +133,11 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu integer :: i, iter, max_iter real(kind=rp) :: rnorm, rtr, rtr0, rtz2, rtz1 real(kind=rp) :: beta, pap, alpha, alphm, norm_fac - + if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = one / sqrt(coef%volume) @@ -152,7 +153,7 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. zero) return + if(abscmp(rnorm, zero)) return do iter = 1, max_iter call this%M%solve(this%z, this%r, n) @@ -162,7 +163,7 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu beta = rtz1 / rtz2 if (iter .eq. 1) beta = zero call add2s1(this%p, this%z, beta, n) - + call Ax%compute(this%w, this%p, coef, x%msh, x%Xh) call gs_h%op(this%w, n, GS_OP_ADD) call bc_list_apply(blst, this%w, n) @@ -188,5 +189,5 @@ function sx_cg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resu end function sx_cg_solve end module cg_sx - + diff --git a/src/krylov/bcknd/sx/gmres_sx.f90 b/src/krylov/bcknd/sx/gmres_sx.f90 index 7ff7f925750..0f4e3a0b9cc 100644 --- a/src/krylov/bcknd/sx/gmres_sx.f90 +++ b/src/krylov/bcknd/sx/gmres_sx.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2021, The Neko Authors +! Copyright (c) 2021-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -40,12 +40,12 @@ module gmres_sx use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, rone, copy, cmult2, col2, col3, add2s2 + use math, only : glsc3, rzero, rone, copy, cmult2, col2, col3, add2s2, abscmp use comm implicit none private - !> Standard preconditioned conjugate gradient method + !> Standard preconditioned generalized minimal residual method (SX version) type, public, extends(ksp_t) :: sx_gmres_t integer :: lgmres real(kind=rp), allocatable :: w(:) @@ -69,9 +69,10 @@ module gmres_sx contains !> Initialise a standard GMRES solver - subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol) + subroutine sx_gmres_init(this, n, max_iter, M, lgmres, rel_tol, abs_tol) class(sx_gmres_t), intent(inout) :: this integer, intent(in) :: n + integer, intent(in) :: max_iter class(pc_t), optional, intent(inout), target :: M integer, optional, intent(inout) :: lgmres real(kind=rp), optional, intent(inout) :: rel_tol @@ -82,11 +83,11 @@ subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol) else this%lgmres = 30 end if - + call this%free() - - if (present(M)) then + + if (present(M)) then this%M => M end if @@ -95,27 +96,27 @@ subroutine sx_gmres_init(this, n, M, lgmres, rel_tol, abs_tol) allocate(this%ml(n)) allocate(this%mu(n)) allocate(this%wk1(n)) - + allocate(this%c(this%lgmres)) allocate(this%s(this%lgmres)) allocate(this%gam(this%lgmres + 1)) - + allocate(this%z(n,this%lgmres)) allocate(this%v(n,this%lgmres)) - + allocate(this%h(this%lgmres,this%lgmres)) - - + + if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init(abs_tol) + call this%ksp_init(max_iter) end if - + end subroutine sx_gmres_init !> Deallocate a standard GMRES solver @@ -135,7 +136,7 @@ subroutine sx_gmres_free(this) if (allocated(this%r)) then deallocate(this%r) end if - + if (allocated(this%z)) then deallocate(this%z) end if @@ -143,35 +144,35 @@ subroutine sx_gmres_free(this) if (allocated(this%h)) then deallocate(this%h) end if - + if (allocated(this%ml)) then deallocate(this%ml) end if - + if (allocated(this%v)) then deallocate(this%v) end if - + if (allocated(this%s)) then deallocate(this%s) end if - + if (allocated(this%mu)) then deallocate(this%mu) end if - + if (allocated(this%gam)) then deallocate(this%gam) end if - + if (allocated(this%wk1)) then deallocate(this%wk1) end if - + nullify(this%M) - + end subroutine sx_gmres_free - + !> Standard PCG solve function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(sx_gmres_t), intent(inout) :: this @@ -184,10 +185,10 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r type(gs_t), intent(inout) :: gs_h type(ksp_monitor_t) :: ksp_results integer, optional, intent(in) :: niter - integer :: iter, glb_n - integer :: i, j, k, ierr + integer :: iter, max_iter, glb_n + integer :: i, j, k, ierr real(kind=rp), parameter :: one = 1.0 - real(kind=rp) :: rnorm + real(kind=rp) :: rnorm real(kind=rp) :: alpha, temp, l real(kind=rp) :: ratio, div0, norm_fac logical :: conv @@ -197,6 +198,12 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r iter = 0 glb_n = n / x%msh%nelv * x%msh%glb_nelv + if (present(niter)) then + max_iter = niter + else + max_iter = this%max_iter + end if + call rone(this%ml, n) call rone(this%mu, n) norm_fac = one / sqrt(coef%volume) @@ -206,19 +213,19 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r call rone(this%c, this%lgmres) call rzero(this%h, this%lgmres * this%lgmres) outer = 0 - do while (.not. conv .and. iter .lt. niter) + do while (.not. conv .and. iter .lt. max_iter) outer = outer + 1 - if(iter.eq.0) then - call col3(this%r,this%ml,f,n) + if(iter.eq.0) then + call col3(this%r,this%ml,f,n) else !update residual - call copy (this%r,f,n) + call copy (this%r,f,n) call Ax%compute(this%w, x%x, coef, x%msh, x%Xh) call gs_h%op(this%w, n, GS_OP_ADD) call bc_list_apply(blst, this%w, n) - call add2s2(this%r,this%w,-one,n) - call col2(this%r,this%ml,n) + call add2s2(this%r,this%w,-one,n) + call col2(this%r,this%ml,n) endif this%gam(1) = sqrt(glsc3(this%r, this%r, coef%mult, n)) if(iter.eq.0) then @@ -226,11 +233,11 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r ksp_results%res_start = div0 endif - if ( this%gam(1) .eq. 0) return + if (abscmp(this%gam(1), 0.0_rp)) return rnorm = 0.0_rp temp = one / this%gam(1) - call cmult2(this%v(1,1), this%r, temp, n) + call cmult2(this%v(1,1), this%r, temp, n) do j = 1, this%lgmres iter = iter+1 call col3(this%w, this%mu, this%v(1,j), n) @@ -241,7 +248,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r call Ax%compute(this%w, this%z(1,j), coef, x%msh, x%Xh) call gs_h%op(this%w, n, GS_OP_ADD) call bc_list_apply(blst, this%w, n) - call col2(this%w, this%ml, n) + call col2(this%w, this%ml, n) do i = 1, j this%h(i,j) = 0.0_rp @@ -254,7 +261,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r !Could probably be done inplace... call MPI_Allreduce(this%h(1,j), this%wk1, j, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - call copy(this%h(1,j), this%wk1, j) + call copy(this%h(1,j), this%wk1, j) do i = 1, j do k = 1, n @@ -264,16 +271,16 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r !apply Givens rotations to new column do i=1,j-1 - temp = this%h(i,j) - this%h(i ,j) = this%c(i)*temp + this%s(i)*this%h(i+1,j) + temp = this%h(i,j) + this%h(i ,j) = this%c(i)*temp + this%s(i)*this%h(i+1,j) this%h(i+1,j) = -this%s(i)*temp + this%c(i)*this%h(i+1,j) end do - alpha = sqrt(glsc3(this%w, this%w, coef%mult, n)) + alpha = sqrt(glsc3(this%w, this%w, coef%mult, n)) rnorm = 0.0_rp - if(alpha .eq. 0.0_rp) then - conv = .true. - exit + if(abscmp(alpha, 0.0_rp)) then + conv = .true. + exit end if l = sqrt(this%h(j,j) * this%h(j,j) + alpha**2) temp = one / l @@ -285,16 +292,16 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r rnorm = abs(this%gam(j+1)) * norm_fac ratio = rnorm / div0 - if (rnorm .lt. this%abs_tol) then + if (rnorm .lt. this%abs_tol) then conv = .true. exit end if - - if (iter + 1 .gt. niter) exit - + + if (iter + 1 .gt. max_iter) exit + if( j .lt. this%lgmres) then - temp = one / alpha - call cmult2(this%v(1,j+1), this%w, temp, n) + temp = one / alpha + call cmult2(this%v(1,j+1), this%w, temp, n) endif end do j = min(j, this%lgmres) @@ -311,7 +318,7 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r do k = 1, n x%x(k,1,1,1) = x%x(k,1,1,1) + this%c(i) * this%z(k,i) end do - end do + end do end do ksp_results%res_final = rnorm @@ -319,5 +326,5 @@ function sx_gmres_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_r end function sx_gmres_solve end module gmres_sx - + diff --git a/src/krylov/bcknd/sx/pc_jacobi_sx.f90 b/src/krylov/bcknd/sx/pc_jacobi_sx.f90 index f7df4b75ad6..8ae01ff8fbf 100644 --- a/src/krylov/bcknd/sx/pc_jacobi_sx.f90 +++ b/src/krylov/bcknd/sx/pc_jacobi_sx.f90 @@ -55,7 +55,7 @@ module sx_jacobi end type sx_jacobi_t contains - + subroutine sx_jacobi_init(this, coef, dof, gs_h) class(sx_jacobi_t), intent(inout) :: this type(coef_t), intent(inout), target :: coef @@ -229,7 +229,7 @@ subroutine sx_update_lx(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n, lx) end do end subroutine sx_update_lx - + subroutine sx_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 14 integer, parameter :: ly = 14 @@ -308,7 +308,7 @@ subroutine sx_update_lx14(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx14 - + subroutine sx_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 13 integer, parameter :: ly = 13 @@ -387,7 +387,7 @@ subroutine sx_update_lx13(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx13 - + subroutine sx_update_lx12(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 12 integer, parameter :: ly = 12 @@ -624,7 +624,7 @@ subroutine sx_update_lx10(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx10 - + subroutine sx_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 9 integer, parameter :: ly = 9 @@ -703,7 +703,7 @@ subroutine sx_update_lx9(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx9 - + subroutine sx_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 8 integer, parameter :: ly = 8 @@ -782,7 +782,7 @@ subroutine sx_update_lx8(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx8 - + subroutine sx_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 7 integer, parameter :: ly = 7 @@ -861,7 +861,7 @@ subroutine sx_update_lx7(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end subroutine sx_update_lx7 - + subroutine sx_update_lx6(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 6 integer, parameter :: ly = 6 @@ -1019,7 +1019,7 @@ subroutine sx_update_lx5(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end subroutine sx_update_lx5 - + subroutine sx_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 4 integer, parameter :: ly = 4 @@ -1098,7 +1098,7 @@ subroutine sx_update_lx4(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end subroutine sx_update_lx4 - + subroutine sx_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 3 integer, parameter :: ly = 3 @@ -1177,7 +1177,7 @@ subroutine sx_update_lx3(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) end do end subroutine sx_update_lx3 - + subroutine sx_update_lx2(d, dxt, dyt, dzt, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 2 integer, parameter :: ly = 2 diff --git a/src/krylov/bcknd/sx/pipecg_sx.f90 b/src/krylov/bcknd/sx/pipecg_sx.f90 index b6d10ffbb31..28b848b0212 100644 --- a/src/krylov/bcknd/sx/pipecg_sx.f90 +++ b/src/krylov/bcknd/sx/pipecg_sx.f90 @@ -40,11 +40,11 @@ module pipecg_sx use coefs, only : coef_t use gather_scatter, only : gs_t, GS_OP_ADD use bc, only : bc_list_t, bc_list_apply - use math, only : glsc3, rzero, copy + use math, only : glsc3, abscmp use comm implicit none private - + !> Pipelined preconditioned conjugate gradient method for SX-Aurora type, public, extends(ksp_t) :: sx_pipecg_t real(kind=rp), allocatable :: p(:) @@ -65,15 +65,16 @@ module pipecg_sx contains !> Initialise a pipelined PCG solver - subroutine sx_pipecg_init(this, n, M, rel_tol, abs_tol) + subroutine sx_pipecg_init(this, n, max_iter, M, rel_tol, abs_tol) class(sx_pipecg_t), intent(inout) :: this class(pc_t), optional, intent(inout), target :: M integer, intent(in) :: n + integer, intent(in) :: max_iter real(kind=rp), optional, intent(inout) :: rel_tol real(kind=rp), optional, intent(inout) :: abs_tol - + call this%free() - + allocate(this%p(n)) allocate(this%q(n)) allocate(this%r(n)) @@ -83,20 +84,20 @@ subroutine sx_pipecg_init(this, n, M, rel_tol, abs_tol) allocate(this%z(n)) allocate(this%mi(n)) allocate(this%ni(n)) - if (present(M)) then + if (present(M)) then this%M => M end if if (present(rel_tol) .and. present(abs_tol)) then - call this%ksp_init(rel_tol, abs_tol) + call this%ksp_init(max_iter, rel_tol, abs_tol) else if (present(rel_tol)) then - call this%ksp_init(rel_tol=rel_tol) + call this%ksp_init(max_iter, rel_tol=rel_tol) else if (present(abs_tol)) then - call this%ksp_init(abs_tol=abs_tol) + call this%ksp_init(max_iter, abs_tol=abs_tol) else - call this%ksp_init() + call this%ksp_init(max_iter) end if - + end subroutine sx_pipecg_init !> Deallocate a pipelined PCG solver @@ -137,7 +138,7 @@ subroutine sx_pipecg_free(this) end subroutine sx_pipecg_free - + !> Pipelined PCG solve function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) class(sx_pipecg_t), intent(inout) :: this @@ -151,16 +152,16 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ type(ksp_monitor_t) :: ksp_results integer, optional, intent(in) :: niter integer :: iter, max_iter, i, ierr - real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac + real(kind=rp) :: rnorm, rtr, reduction(3), norm_fac real(kind=rp) :: alpha, beta, gamma1, gamma2, delta real(kind=rp) :: tmp1, tmp2, tmp3 type(MPI_Request) :: request type(MPI_Status) :: status - + if (present(niter)) then max_iter = niter else - max_iter = KSP_MAX_ITER + max_iter = this%max_iter end if norm_fac = 1.0_rp / sqrt(coef%volume) @@ -177,16 +178,16 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ call Ax%compute(this%w, this%u, coef, x%msh, x%Xh) call gs_h%op(this%w, n, GS_OP_ADD) call bc_list_apply(blst, this%w, n) - + rtr = glsc3(this%r, coef%mult, this%r, n) rnorm = sqrt(rtr)*norm_fac ksp_results%res_start = rnorm ksp_results%res_final = rnorm ksp_results%iter = 0 - if(rnorm .eq. 0.0_rp) return + if(abscmp(rnorm, 0.0_rp)) return gamma1 = 0.0_rp - + do iter = 1, max_iter tmp1 = 0.0_rp @@ -200,34 +201,34 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ reduction(1) = tmp1 reduction(2) = tmp2 reduction(3) = tmp3 - + call MPI_Iallreduce(MPI_IN_PLACE, reduction, 3, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, request, ierr) - + call this%M%solve(this%mi, this%w, n) call Ax%compute(this%ni, this%mi, coef, x%msh, x%Xh) call gs_h%op(this%ni, n, GS_OP_ADD) call bc_list_apply(blst, this%ni, n) call MPI_Wait(request, status, ierr) - gamma2 = gamma1 + gamma2 = gamma1 gamma1 = reduction(1) delta = reduction(2) rtr = reduction(3) - + rnorm = sqrt(rtr)*norm_fac if (rnorm .lt. this%abs_tol) then exit end if - + if (iter .gt. 1) then beta = gamma1 / gamma2 alpha = gamma1 / (delta - (beta * gamma1/alpha)) - else + else beta = 0.0_rp alpha = gamma1/delta end if - + do i = 1, n this%z(i) = beta * this%z(i) + this%ni(i) this%q(i) = beta * this%q(i) + this%mi(i) @@ -241,14 +242,14 @@ function sx_pipecg_solve(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_ this%u(i) = this%u(i) - alpha * this%q(i) this%w(i) = this%w(i) - alpha * this%z(i) end do - + end do - + ksp_results%res_final = rnorm ksp_results%iter = iter - + end function sx_pipecg_solve - + end module pipecg_sx - + diff --git a/src/krylov/krylov.f90 b/src/krylov/krylov.f90 index 88cfe377436..00a689b87e7 100644 --- a/src/krylov/krylov.f90 +++ b/src/krylov/krylov.f90 @@ -40,26 +40,25 @@ module krylov use mesh, only : mesh_t use field, only : field_t use utils, only : neko_error, neko_warning - use bc, only : bc_list_t, bc_list_apply_vector, bc_list_apply_scalar, & - bc_list_apply + use bc, only : bc_list_t use identity, only : ident_t use device_identity, only : device_ident_t use neko_config implicit none private - integer, public, parameter :: KSP_MAX_ITER = 1e4 !< Maximum number of iters. + integer, public, parameter :: KSP_MAX_ITER = 1e4 !< Maximum number of iters. real(kind=rp), public, parameter :: KSP_ABS_TOL = 1d-9 !< Absolut tolerance real(kind=rp), public, parameter :: KSP_REL_TOL = 1d-9 !< Relative tolerance !> Type for storing initial and final residuals in a Krylov solver. type, public :: ksp_monitor_t - !> Iteration number. - integer :: iter - !> Initial residual. - real(kind=rp) :: res_start - !> FInal residual - real(kind=rp) :: res_final + !> Iteration number. + integer :: iter + !> Initial residual. + real(kind=rp) :: res_start + !> FInal residual + real(kind=rp) :: res_final end type ksp_monitor_t !> Base abstract type for a canonical Krylov method, solving \f$ Ax = f \f$. @@ -67,20 +66,26 @@ module krylov class(pc_t), pointer :: M => null() !< Preconditioner real(kind=rp) :: rel_tol !< Relative tolerance real(kind=rp) :: abs_tol !< Absolute tolerance + integer :: max_iter !< Maximum number of iterations class(pc_t), allocatable :: M_ident !< Internal preconditioner (Identity) contains + !> Base type constructor. procedure, pass(this) :: ksp_init => krylov_init + !> Base type destructor. procedure, pass(this) :: ksp_free => krylov_free + !> Set preconditioner. procedure, pass(this) :: set_pc => krylov_set_pc + !> Solve the system. procedure(ksp_method), pass(this), deferred :: solve + !> Destructor. procedure(ksp_t_free), pass(this), deferred :: free end type ksp_t - + !> Abstract interface for a Krylov method's solve routine !! !! @param x field to solve for - !! @param f right hand side + !! @param f right hand side !! @param n integer, size of vectors !! @param coef Coefficients !! @param blst list of boundary conditions @@ -88,7 +93,7 @@ module krylov !! @param niter iteration trip count abstract interface function ksp_method(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_results) - import :: bc_list_t + import :: bc_list_t import :: field_t import :: ksp_t import :: coef_t @@ -104,8 +109,8 @@ function ksp_method(this, Ax, x, f, n, coef, blst, gs_h, niter) result(ksp_resul real(kind=rp), dimension(n), intent(inout) :: f type(coef_t), intent(inout) :: coef type(bc_list_t), intent(inout) :: blst - type(gs_t), intent(inout) :: gs_h - integer, optional, intent(in) :: niter + type(gs_t), intent(inout) :: gs_h + integer, optional, intent(in) :: niter type(ksp_monitor_t) :: ksp_results end function ksp_method end interface @@ -117,19 +122,21 @@ subroutine ksp_t_free(this) class(ksp_t), intent(inout) :: this end subroutine ksp_t_free end interface - + contains - !> Create a krylov solver + !> Constructor for the base type. + !! @param max_iter Maximum number of iterations. !! @param rel_tol Relative tolarance for converence. !! @param rel_tol Absolute tolarance for converence. !! @param M The preconditioner. - subroutine krylov_init(this, rel_tol, abs_tol, M) + subroutine krylov_init(this, max_iter, rel_tol, abs_tol, M) class(ksp_t), target, intent(inout) :: this + integer, intent(in) :: max_iter real(kind=rp), optional, intent(in) :: rel_tol real(kind=rp), optional, intent(in) :: abs_tol class(pc_t), optional, target, intent(in) :: M - + call krylov_free(this) if (present(rel_tol)) then @@ -144,6 +151,8 @@ subroutine krylov_init(this, rel_tol, abs_tol, M) this%abs_tol = KSP_ABS_TOL end if + this%max_iter = max_iter + if (present(M)) then this%M => M else @@ -158,7 +167,7 @@ subroutine krylov_init(this, rel_tol, abs_tol, M) end if end subroutine krylov_init - + !> Deallocate a Krylov solver subroutine krylov_free(this) class(ksp_t), intent(inout) :: this @@ -181,9 +190,9 @@ subroutine krylov_set_pc(this, M) call neko_error('Preconditioner already defined') end select end if - + this%M => M - + end subroutine krylov_set_pc - + end module krylov diff --git a/src/krylov/krylov_fctry.f90 b/src/krylov/krylov_fctry.f90 index f0d49ebf016..f122c670b65 100644 --- a/src/krylov/krylov_fctry.f90 +++ b/src/krylov/krylov_fctry.f90 @@ -38,6 +38,7 @@ module krylov_fctry use pipecg, only : pipecg_t use pipecg_sx, only : sx_pipecg_t use pipecg_device, only : pipecg_device_t + use fusedcg_device, only : fusedcg_device_t use bicgstab, only : bicgstab_t use gmres, only : gmres_t use gmres_sx, only : sx_gmres_t @@ -53,14 +54,15 @@ module krylov_fctry contains - !> Initialize an interative Krylov solver - subroutine krylov_solver_factory(ksp, n, solver, abstol, M) + !> Initialize an iterative Krylov solver. + subroutine krylov_solver_factory(ksp, n, solver, max_iter, abstol, M) class(ksp_t), allocatable, target, intent(inout) :: ksp integer, intent(in), value :: n character(len=*), intent(in) :: solver + integer, intent(in) :: max_iter real(kind=rp), optional :: abstol class(pc_t), optional, intent(inout), target :: M - + if (allocated(ksp)) then call krylov_solver_destroy(ksp) deallocate(ksp) @@ -84,6 +86,15 @@ subroutine krylov_solver_factory(ksp, n, solver, abstol, M) else allocate(pipecg_t::ksp) end if + else if (trim(solver) .eq. 'fusedcg') then + if (NEKO_BCKND_DEVICE .eq. 1) then + if (NEKO_BCKND_OPENCL .eq. 1) then + call neko_error('FusedCG not supported for OpenCL') + end if + allocate(fusedcg_device_t::ksp) + else + call neko_error('FusedCG only supported for CUDA/HIP') + end if else if (trim(solver) .eq. 'cacg') then allocate(cacg_t::ksp) else if (trim(solver) .eq. 'gmres') then @@ -103,102 +114,110 @@ subroutine krylov_solver_factory(ksp, n, solver, abstol, M) if (present(abstol) .and. present(M)) then select type(kp => ksp) type is(cg_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(sx_cg_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(cg_device_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(pipecg_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(sx_pipecg_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(pipecg_device_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) + type is(fusedcg_device_t) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(cacg_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(gmres_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(sx_gmres_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(gmres_device_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) type is(bicgstab_t) - call kp%init(n, M = M, abs_tol = abstol) + call kp%init(n, max_iter, M = M, abs_tol = abstol) end select else if (present(abstol)) then select type(kp => ksp) type is(cg_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(sx_cg_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(cg_device_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(pipecg_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(sx_pipecg_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is (pipecg_device_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) + type is (fusedcg_device_t) + call kp%init(n, max_iter, abs_tol = abstol) type is(cacg_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(gmres_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(sx_gmres_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(gmres_device_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) type is(bicgstab_t) - call kp%init(n, abs_tol = abstol) + call kp%init(n, max_iter, abs_tol = abstol) end select else if (present(M)) then select type(kp => ksp) type is(cg_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(sx_cg_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(cg_device_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(pipecg_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(sx_pipecg_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is (pipecg_device_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) + type is (fusedcg_device_t) + call kp%init(n, max_iter, M = M) type is(cacg_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(gmres_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(sx_gmres_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(gmres_device_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) type is(bicgstab_t) - call kp%init(n, M = M) + call kp%init(n, max_iter, M = M) end select else select type(kp => ksp) type is(cg_t) - call kp%init(n) + call kp%init(n, max_iter) type is(sx_cg_t) - call kp%init(n) + call kp%init(n, max_iter) type is(cg_device_t) - call kp%init(n) + call kp%init(n, max_iter) type is(pipecg_t) - call kp%init(n) + call kp%init(n, max_iter) type is(sx_pipecg_t) - call kp%init(n) + call kp%init(n, max_iter) type is (pipecg_device_t) - call kp%init(n) + call kp%init(n, max_iter) + type is (fusedcg_device_t) + call kp%init(n, max_iter) type is(cacg_t) - call kp%init(n) + call kp%init(n, max_iter) type is(gmres_t) - call kp%init(n) + call kp%init(n, max_iter) type is(sx_gmres_t) - call kp%init(n) + call kp%init(n, max_iter) type is(gmres_device_t) - call kp%init(n) + call kp%init(n, max_iter) type is(bicgstab_t) - call kp%init(n) + call kp%init(n, max_iter) end select end if @@ -215,13 +234,15 @@ subroutine krylov_solver_destroy(ksp) type is(sx_cg_t) call kp%free() type is(cg_device_t) - call kp%free() + call kp%free() type is(pipecg_t) call kp%free() type is(sx_pipecg_t) call kp%free() type is (pipecg_device_t) call kp%free() + type is (fusedcg_device_t) + call kp%free() type is(cacg_t) call kp%free() type is(gmres_t) @@ -234,8 +255,8 @@ subroutine krylov_solver_destroy(ksp) call kp%free() end select end if - + end subroutine krylov_solver_destroy - + end module krylov_fctry - + diff --git a/src/krylov/pc_hsmg.f90 b/src/krylov/pc_hsmg.f90 index 0e911d9b024..ce667e8d06b 100644 --- a/src/krylov/pc_hsmg.f90 +++ b/src/krylov/pc_hsmg.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Krylov preconditioner @@ -82,7 +82,7 @@ module hsmg use field, only : field_t use coefs, only : coef_t use mesh, only : mesh_t - use krylov, only : ksp_t, ksp_monitor_t + use krylov, only : ksp_t, ksp_monitor_t, KSP_MAX_ITER use krylov_fctry, only : krylov_solver_factory, krylov_solver_destroy !$ use omp_lib implicit none @@ -98,15 +98,15 @@ module hsmg type(schwarz_t), pointer :: schwarz type(field_t), pointer :: e end type multigrid_t - + type, public, extends(pc_t) :: hsmg_t type(mesh_t), pointer :: msh integer :: nlvls !< Number of levels in the multigrid type(multigrid_t), allocatable :: grids(:) !< array for multigrids type(gs_t) :: gs_crs, gs_mg !< gather scatter for lower levels type(space_t) :: Xh_crs, Xh_mg !< spaces for lower levels - type(dofmap_t) :: dm_crs, dm_mg - type(coef_t) :: c_crs, c_mg + type(dofmap_t) :: dm_crs, dm_mg + type(coef_t) :: c_crs, c_mg type(dirichlet_t) :: bc_crs, bc_mg, bc_reg type(bc_list_t) :: bclst_crs, bclst_mg, bclst_reg type(schwarz_t) :: schwarz, schwarz_mg, schwarz_crs !< Schwarz decompostions @@ -124,15 +124,15 @@ module hsmg type(c_ptr) :: r_d = C_NULL_PTR type(c_ptr) :: hsmg_event type(c_ptr) :: gs_event - contains + contains procedure, pass(this) :: init => hsmg_init procedure, pass(this) :: free => hsmg_free procedure, pass(this) :: solve => hsmg_solve procedure, pass(this) :: update => hsmg_set_h end type hsmg_t - + contains - + !> @note I do not think we actually use the same grids as they do in the original! subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) class(hsmg_t), intent(inout), target :: this @@ -140,18 +140,18 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) type(space_t), intent(inout), target :: Xh type(coef_t), intent(inout), target :: coef type(dofmap_t), intent(inout), target :: dof - type(gs_t), intent(inout), target :: gs_h + type(gs_t), intent(inout), target :: gs_h type(bc_list_t), intent(inout), target :: bclst character(len=*), optional :: crs_pctype integer :: n, i integer :: lx_crs, lx_mid - + call this%free() - this%nlvls = 3 + this%nlvls = 3 lx_crs = 2 if (Xh%lx .lt. 5) then lx_mid = max(Xh%lx-1,3) - + if(Xh%lx .le. 2) call neko_error('Polynomial order < 2 not supported for hsmg precon') else @@ -162,22 +162,22 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) allocate(this%w(dof%size())) allocate(this%r(dof%size())) - + ! Compute all elements as if they are deformed call msh%all_deformed() n = dof%size() call this%e%init(dof, 'work array') call this%wf%init(dof, 'work 2') - + call this%Xh_crs%init(GLL, lx_crs, lx_crs, lx_crs) - this%dm_crs = dofmap_t(msh, this%Xh_crs) + this%dm_crs = dofmap_t(msh, this%Xh_crs) call this%gs_crs%init(this%dm_crs) call this%e_crs%init(this%dm_crs, 'work crs') call this%c_crs%init(this%gs_crs) - + call this%Xh_mg%init(GLL, lx_mid, lx_mid, lx_mid) - this%dm_mg = dofmap_t(msh, this%Xh_mg) + this%dm_mg = dofmap_t(msh, this%Xh_mg) call this%gs_mg%init(this%dm_mg) call this%e_mg%init(this%dm_mg, 'work midl') call this%c_mg%init(this%gs_mg) @@ -201,15 +201,15 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) ! Create a backend specific krylov solver if (present(crs_pctype)) then call krylov_solver_factory(this%crs_solver, & - this%dm_crs%size(), trim(crs_pctype), M = this%pc_crs) + this%dm_crs%size(), trim(crs_pctype), KSP_MAX_ITER, M = this%pc_crs) else call krylov_solver_factory(this%crs_solver, & - this%dm_crs%size(), 'cg', M = this%pc_crs) + this%dm_crs%size(), 'cg', KSP_MAX_ITER, M = this%pc_crs) end if - call this%bc_crs%init(this%dm_crs) - call this%bc_mg%init(this%dm_mg) - call this%bc_reg%init(dof) + call this%bc_crs%init(this%c_crs) + call this%bc_mg%init(this%c_mg) + call this%bc_reg%init(coef) if (bclst%n .gt. 0) then do i = 1, bclst%n call this%bc_reg%mark_facets(bclst%bc(i)%bcp%marked_facet) @@ -227,7 +227,7 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) call bc_list_init(this%bclst_crs) call bc_list_add(this%bclst_crs, this%bc_crs) - + call this%bc_mg%finalize() call this%bc_mg%set_g(0.0_rp) call bc_list_init(this%bclst_mg) @@ -241,14 +241,14 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) call this%interp_mid_crs%init(this%Xh_mg,this%Xh_crs) call hsmg_fill_grid(dof, gs_h, Xh, coef, this%bclst_reg, this%schwarz, & - this%e, this%grids, 3) + this%e, this%grids, 3) call hsmg_fill_grid(this%dm_mg, this%gs_mg, this%Xh_mg, this%c_mg, & this%bclst_mg, this%schwarz_mg, this%e_mg, & - this%grids, 2) + this%grids, 2) call hsmg_fill_grid(this%dm_crs, this%gs_crs, this%Xh_crs, & this%c_crs, this%bclst_crs, this%schwarz_crs, & - this%e_crs, this%grids, 1) - + this%e_crs, this%grids, 1) + call hsmg_set_h(this) if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%w, this%w_d, n) @@ -265,11 +265,11 @@ subroutine hsmg_init(this, msh, Xh, coef, dof, gs_h, bclst, crs_pctype) call device_event_create(this%hsmg_event, 2) call device_event_create(this%gs_event, 2) end subroutine hsmg_init - + subroutine hsmg_set_h(this) - class(hsmg_t), intent(inout) :: this + class(hsmg_t), intent(inout) :: this ! integer :: i - !Yeah I dont really know what to do here. For incompressible flow not much happens + !Yeah I dont really know what to do here. For incompressible flow not much happens this%grids(1)%coef%ifh2 = .false. call copy(this%grids(1)%coef%h1, this%grids(3)%coef%h1, & this%grids(1)%dof%size()) @@ -280,7 +280,7 @@ subroutine hsmg_set_h(this) end subroutine hsmg_set_h - subroutine hsmg_fill_grid(dof, gs_h, Xh, coef, bclst, schwarz, e, grids, l) + subroutine hsmg_fill_grid(dof, gs_h, Xh, coef, bclst, schwarz, e, grids, l) type(dofmap_t), target, intent(in):: dof type(gs_t), target, intent(in) :: gs_h type(space_t), target, intent(in) :: Xh @@ -308,15 +308,15 @@ subroutine hsmg_free(this) if (allocated(this%ax)) then deallocate(this%ax) end if - + if (allocated(this%grids)) then deallocate(this%grids) end if - + if (allocated(this%w)) then deallocate(this%w) end if - + if (allocated(this%r)) then deallocate(this%r) end if @@ -339,17 +339,17 @@ subroutine hsmg_free(this) call krylov_solver_destroy(this%crs_solver) deallocate(this%crs_solver) end if - - if (allocated(this%pc_crs)) then + + if (allocated(this%pc_crs)) then select type(pc => this%pc_crs) type is (jacobi_t) call pc%free() type is (sx_jacobi_t) call pc%free() - end select + end select deallocate(this%pc_crs) end if - + end subroutine hsmg_free !> The h1mg preconditioner from Nek5000. @@ -362,11 +362,11 @@ subroutine hsmg_solve(this, z, r, n) type(ksp_monitor_t) :: crs_info integer :: i, thrdid, nthrds - call profiler_start_region('HSMG solve') + call profiler_start_region('HSMG solve', 8) if (NEKO_BCKND_DEVICE .eq. 1) then z_d = device_get_ptr(z) r_d = device_get_ptr(r) - !We should not work with the input + !We should not work with the input call device_copy(this%r_d, r_d, n) call bc_list_apply_scalar(this%bclst_reg, r, n) @@ -402,18 +402,21 @@ subroutine hsmg_solve(this, z, r, n) nthrds = 1 !$ thrdid = omp_get_thread_num() !$ nthrds = omp_get_num_threads() - + if (thrdid .eq. 0) then - call this%grids(3)%schwarz%compute(z, this%r) - call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w) + call profiler_start_region('HSMG schwarz', 9) + call this%grids(3)%schwarz%compute(z, this%r) + call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w) + call profiler_end_region end if - if (nthrds .eq. 1 .or. thrdid .eq. 1) then + if (nthrds .eq. 1 .or. thrdid .eq. 1) then + call profiler_start_region('HSMG coarse grid', 10) call this%grids(1)%gs_h%op(this%wf%x, & this%grids(1)%dof%size(), GS_OP_ADD, this%gs_event) call device_event_sync(this%gs_event) call bc_list_apply_scalar(this%grids(1)%bclst, this%wf%x, & this%grids(1)%dof%size()) - call profiler_start_region('HSMG coarse-solve') + call profiler_start_region('HSMG coarse-solve', 11) crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, this%wf%x, & this%grids(1)%dof%size(), & this%grids(1)%coef, & @@ -421,8 +424,8 @@ subroutine hsmg_solve(this, z, r, n) this%grids(1)%gs_h, this%niter) call profiler_end_region call bc_list_apply_scalar(this%grids(1)%bclst, this%grids(1)%e%x,& - this%grids(1)%dof%size()) - + this%grids(1)%dof%size()) + call profiler_end_region end if !$omp end parallel @@ -438,11 +441,11 @@ subroutine hsmg_solve(this, z, r, n) call device_event_sync(this%gs_event) call device_col2(z_d, this%grids(3)%coef%mult_d, this%grids(3)%dof%size()) else - !We should not work with the input + !We should not work with the input call copy(this%r, r, n) !OVERLAPPING Schwarz exchange and solve - call this%grids(3)%schwarz%compute(z, this%r) + call this%grids(3)%schwarz%compute(z, this%r) ! DOWNWARD Leg of V-cycle, we are pretty hardcoded here but w/e call col2(this%r, this%grids(3)%coef%mult, & this%grids(3)%dof%size()) @@ -451,7 +454,7 @@ subroutine hsmg_solve(this, z, r, n) this%msh%nelv, this%grids(2)%Xh) call this%grids(2)%gs_h%op(this%w, this%grids(2)%dof%size(), GS_OP_ADD) !OVERLAPPING Schwarz exchange and solve - call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w) + call this%grids(2)%schwarz%compute(this%grids(2)%e%x,this%w) call col2(this%w, this%grids(2)%coef%mult, this%grids(2)%dof%size()) !restrict residual to crs call this%interp_mid_crs%map(this%r,this%w,this%msh%nelv,this%grids(1)%Xh) @@ -460,7 +463,7 @@ subroutine hsmg_solve(this, z, r, n) call this%grids(1)%gs_h%op(this%r, this%grids(1)%dof%size(), GS_OP_ADD) call bc_list_apply_scalar(this%grids(1)%bclst, this%r, & this%grids(1)%dof%size()) - call profiler_start_region('HSMG coarse-solve') + call profiler_start_region('HSMG coarse-solve', 11) crs_info = this%crs_solver%solve(this%Ax, this%grids(1)%e, this%r, & this%grids(1)%dof%size(), & this%grids(1)%coef, & diff --git a/src/krylov/pc_identity.f90 b/src/krylov/pc_identity.f90 index 2f82954628b..6d47665f35e 100644 --- a/src/krylov/pc_identity.f90 +++ b/src/krylov/pc_identity.f90 @@ -37,10 +37,10 @@ module identity use num_types, only : rp implicit none private - + !> Defines a canonical Krylov preconditioner type, public, extends(pc_t) :: ident_t - contains + contains procedure, pass(this) :: solve => ident_solve procedure, pass(this) :: update => ident_update end type ident_t @@ -53,12 +53,12 @@ subroutine ident_solve(this, z, r, n) class(ident_t), intent(inout) :: this real(kind=rp), dimension(n), intent(inout) :: z real(kind=rp), dimension(n), intent(inout) :: r - call copy(z, r, n) + call copy(z, r, n) end subroutine ident_solve !> Mandatory update routine subroutine ident_update(this) class(ident_t), intent(inout) :: this end subroutine ident_update - + end module identity diff --git a/src/krylov/precon.f90 b/src/krylov/precon.f90 index 17c17e8f0cd..003c22a3376 100644 --- a/src/krylov/precon.f90 +++ b/src/krylov/precon.f90 @@ -35,7 +35,7 @@ module precon use num_types, only : rp implicit none private - + !> Defines a canonical Krylov preconditioner type, public, abstract :: pc_t contains diff --git a/src/krylov/precon_fctry.f90 b/src/krylov/precon_fctry.f90 index 265296e507a..58eaa59338d 100644 --- a/src/krylov/precon_fctry.f90 +++ b/src/krylov/precon_fctry.f90 @@ -43,7 +43,7 @@ module precon_fctry implicit none contains - + !> Create a preconditioner subroutine precon_factory(pc, pctype) class(pc_t), target, allocatable, intent(inout) :: pc @@ -73,7 +73,7 @@ subroutine precon_factory(pc, pctype) else call neko_error('Unknown preconditioner '//trim(pctype)) end if - + end subroutine precon_factory !> Destroy a preconditioner @@ -90,9 +90,9 @@ subroutine precon_destroy(pc) call pcp%free() type is (hsmg_t) call pcp%free() - end select + end select end if - + end subroutine precon_destroy - + end module precon_fctry diff --git a/src/les/bcknd/cpu/vreman_cpu.f90 b/src/les/bcknd/cpu/vreman_cpu.f90 new file mode 100644 index 00000000000..645f670bedf --- /dev/null +++ b/src/les/bcknd/cpu/vreman_cpu.f90 @@ -0,0 +1,139 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the CPU kernel for the `vreman_t` type. +module vreman_cpu + use num_types, only : rp + use field_list, only : field_list_t + use math, only : cadd, NEKO_EPS + use scratch_registry, only : neko_scratch_registry + use field_registry, only : neko_field_registry + use field, only : field_t + use operators, only : dudxyz + use coefs, only : coef_t + implicit none + private + + public :: vreman_compute_cpu + +contains + + !> Compute eddy viscosity on the CPU. + !! @param t The time value. + !! @param tstep The current time-step. + !! @param coef SEM coefficients. + !! @param nut The SGS viscosity array. + !! @param delta The LES lengthscale. + !! @param c The Vreman model constant + subroutine vreman_compute_cpu(t, tstep, coef, nut, delta, c) + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + type(coef_t), intent(in) :: coef + type(field_t), intent(inout) :: nut + type(field_t), intent(in) :: delta + real(kind=rp), intent(in) :: c + ! This is the alpha tensor in the paper + type(field_t), pointer :: a11, a12, a13, a21, a22, a23, a31, a32, a33 + type(field_t), pointer :: u, v, w + + real(kind=rp) :: beta11 + real(kind=rp) :: beta12 + real(kind=rp) :: beta13 + real(kind=rp) :: beta22 + real(kind=rp) :: beta23 + real(kind=rp) :: beta33 + real(kind=rp) :: b_beta + real(kind=rp) :: aijaij + integer :: temp_indices(9) + integer :: e, i + + u => neko_field_registry%get_field_by_name("u") + v => neko_field_registry%get_field_by_name("v") + w => neko_field_registry%get_field_by_name("u") + + call neko_scratch_registry%request_field(a11, temp_indices(1)) + call neko_scratch_registry%request_field(a12, temp_indices(2)) + call neko_scratch_registry%request_field(a13, temp_indices(3)) + call neko_scratch_registry%request_field(a21, temp_indices(4)) + call neko_scratch_registry%request_field(a22, temp_indices(5)) + call neko_scratch_registry%request_field(a23, temp_indices(6)) + call neko_scratch_registry%request_field(a31, temp_indices(7)) + call neko_scratch_registry%request_field(a32, temp_indices(8)) + call neko_scratch_registry%request_field(a33, temp_indices(9)) + + + ! Compute the derivatives of the velocity (the alpha tensor) + call dudxyz (a11%x, u%x, coef%drdx, coef%dsdx, coef%dtdx, coef) + call dudxyz (a12%x, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef) + call dudxyz (a13%x, u%x, coef%drdz, coef%dsdz, coef%dtdz, coef) + + call dudxyz (a21%x, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef) + call dudxyz (a22%x, v%x, coef%drdy, coef%dsdy, coef%dtdy, coef) + call dudxyz (a23%x, v%x, coef%drdz, coef%dsdz, coef%dtdz, coef) + + call dudxyz (a31%x, w%x, coef%drdx, coef%dsdx, coef%dtdx, coef) + call dudxyz (a32%x, w%x, coef%drdy, coef%dsdy, coef%dtdy, coef) + call dudxyz (a33%x, w%x, coef%drdz, coef%dsdz, coef%dtdz, coef) + + do e=1, coef%msh%nelv + do i=1, coef%Xh%lxyz + ! beta_ij = alpha_mi alpha_mj + beta11 = a11%x(i,1,1,e)**2 + a21%x(i,1,1,e)**2 + a31%x(i,1,1,e)**2 + beta22 = a12%x(i,1,1,e)**2 + a22%x(i,1,1,e)**2 + a32%x(i,1,1,e)**2 + beta33 = a13%x(i,1,1,e)**2 + a23%x(i,1,1,e)**2 + a33%x(i,1,1,e)**2 + beta12 = a11%x(i,1,1,e)*a12%x(i,1,1,e) + & + a21%x(i,1,1,e)*a22%x(i,1,1,e) + & + a31%x(i,1,1,e)*a32%x(i,1,1,e) + beta13 = a11%x(i,1,1,e)*a13%x(i,1,1,e) + & + a21%x(i,1,1,e)*a23%x(i,1,1,e) + & + a31%x(i,1,1,e)*a33%x(i,1,1,e) + beta23 = a12%x(i,1,1,e)*a13%x(i,1,1,e) + & + a22%x(i,1,1,e)*a23%x(i,1,1,e) + & + a32%x(i,1,1,e)*a33%x(i,1,1,e) + + b_beta = beta11*beta22 - beta12*beta12 + beta11*beta33 - beta13*beta13 & + + beta22*beta33 - beta23*beta23 + + b_beta = max(0.0_rp, b_beta) + + ! alpha_ij alpha_ij + aijaij = beta11 + beta22 + beta33 + + nut%x(i,1,1,e) = c*delta%x(i,1,1,e) * sqrt(b_beta/(aijaij + NEKO_EPS)) + end do + end do + + call neko_scratch_registry%relinquish_field(temp_indices) + end subroutine vreman_compute_cpu + +end module vreman_cpu + diff --git a/src/les/les_model.f90 b/src/les/les_model.f90 new file mode 100644 index 00000000000..4ec763120f3 --- /dev/null +++ b/src/les/les_model.f90 @@ -0,0 +1,194 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +!> Implements `les_model_t`. +module les_model + use num_types, only : rp + use field, only : field_t, field_ptr_t + use json_module, only : json_file + use field_registry, only : neko_field_registry + use dofmap, only : dofmap_t + use coefs, only : coef_t + use gs_ops, only : GS_OP_ADD + use neko_config, only : NEKO_BCKND_DEVICE + use device, only : device_memcpy, HOST_TO_DEVICE + implicit none + private + + !> Base abstract type for LES models based on the Boussinesq approximation. + type, abstract, public :: les_model_t + !> Subgrid kinematic viscosity. + type(field_t), pointer :: nut => null() + !> LES lengthscale. + type(field_t), pointer :: delta => null() + !> SEM coefficients. + type(coef_t), pointer :: coef => null() + contains + !> Constructor for the les_model_t (base) class. + procedure, pass(this) :: init_base => les_model_init_base + !> Destructor for the les_model_t (base) class. + procedure, pass(this) :: free_base => les_model_free_base + !> Compute the LES length-scale + procedure, pass(this) :: compute_delta => les_model_compute_delta + !> The common constructor. + procedure(les_model_init), pass(this), deferred :: init + !> Destructor. + procedure(les_model_free), pass(this), deferred :: free + !> Compute eddy viscosity. + procedure(les_model_compute), pass(this), deferred :: compute + end type les_model_t + + abstract interface + !> Compute eddy viscosity. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine les_model_compute(this, t, tstep) + import les_model_t, rp + class(les_model_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + end subroutine les_model_compute + end interface + + abstract interface + !> Common constructor. + !! @param dofmap SEM map of degrees of freedom. + !! @param coef SEM coefficients. + !! @param json A dictionary with parameters. + subroutine les_model_init(this, dofmap, coef, json) + import les_model_t, json_file, dofmap_t, coef_t + class(les_model_t), intent(inout) :: this + type(coef_t), intent(in) :: coef + type(dofmap_t), intent(in) :: dofmap + type(json_file), intent(inout) :: json + end subroutine les_model_init + end interface + + abstract interface + !> Destructor. + subroutine les_model_free(this) + import les_model_t + class(les_model_t), intent(inout) :: this + end subroutine les_model_free + end interface + +contains + !> Constructor for the les_model_t (base) class. + !! @param dofmap SEM map of degrees of freedom. + !! @param coef SEM coefficients. + !! @param nu_name The name of the turbulent viscosity field. + subroutine les_model_init_base(this, dofmap, coef, nut_name) + class(les_model_t), intent(inout) :: this + type(dofmap_t), intent(in) :: dofmap + type(coef_t), target, intent(in) :: coef + character(len=*), intent(in) :: nut_name + + if (.not. neko_field_registry%field_exists(trim(nut_name))) then + call neko_field_registry%add_field(dofmap, trim(nut_name)) + end if + if (.not. neko_field_registry%field_exists("les_delta")) then + call neko_field_registry%add_field(dofmap, "les_delta") + end if + this%nut => neko_field_registry%get_field(trim(nut_name)) + this%delta => neko_field_registry%get_field("les_delta") + this%coef => coef + + call this%compute_delta() + end subroutine les_model_init_base + + !> Destructor for the les_model_t (base) class. + subroutine les_model_free_base(this) + class(les_model_t), intent(inout) :: this + + nullify(this%nut) + nullify(this%coef) + end subroutine les_model_free_base + + !> Compute the LES lengthscale. + !! For each GLL point, we take the distance between its neighbours in all 3 + !! directions divided by 2 with the exception of face nodes, where only one + !! neighbour exists. To form the lengthscale the distances along 3 directions + !! are multiplied, and a cubic root is extracted from the result. This + !! roughly corresponds to a cubic root of the cell volume in FVM computations. + subroutine les_model_compute_delta(this) + class(les_model_t), intent(inout) :: this + integer :: e, i, j, k + integer :: im, ip, jm, jp, km, kp + real(kind=rp) :: di, dj, dk, ndim_inv + + + do e=1, this%coef%msh%nelv + do k=1, this%coef%Xh%lz + km = max(1, k-1) + kp = min(this%coef%Xh%lz, k+1) + + do j=1, this%coef%Xh%ly + jm = max(1, j-1) + jp = min(this%coef%Xh%ly, j+1) + + do i=1, this%coef%Xh%lx + im = max(1, i-1) + ip = min(this%coef%Xh%lx, i+1) + + di = (this%coef%dof%x(ip,j,k,e) - this%coef%dof%x(im,j,k,e))**2 & + + (this%coef%dof%y(ip,j,k,e) - this%coef%dof%y(im,j,k,e))**2 & + + (this%coef%dof%z(ip,j,k,e) - this%coef%dof%z(im,j,k,e))**2 + + dj = (this%coef%dof%x(i,jp,k,e) - this%coef%dof%x(i,jm,k,e))**2 & + + (this%coef%dof%y(i,jp,k,e) - this%coef%dof%y(i,jm,k,e))**2 & + + (this%coef%dof%z(i,jp,k,e) - this%coef%dof%z(i,jm,k,e))**2 + + dk = (this%coef%dof%x(i,j,kp,e) - this%coef%dof%x(i,j,km,e))**2 & + + (this%coef%dof%y(i,j,kp,e) - this%coef%dof%y(i,j,km,e))**2 & + + (this%coef%dof%z(i,j,kp,e) - this%coef%dof%z(i,j,km,e))**2 + + di = sqrt(di) / (ip - im) + dj = sqrt(dj) / (jp - jm) + dk = sqrt(dk) / (kp - km) + this%delta%x(i,j,k,e) = (di * dj * dk)**(1.0_rp / 3.0_rp) + + enddo + enddo + enddo + enddo + + call this%coef%gs_h%op(this%delta, GS_OP_ADD) + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(this%delta%x, this%delta%x_d, this%delta%dof%size(),& + HOST_TO_DEVICE, sync=.false.) + end if + + end subroutine les_model_compute_delta + +end module les_model \ No newline at end of file diff --git a/src/les/les_model_fctry.f90 b/src/les/les_model_fctry.f90 new file mode 100644 index 00000000000..b562198967a --- /dev/null +++ b/src/les/les_model_fctry.f90 @@ -0,0 +1,71 @@ + +! Copyright (c) 2021-2022, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module les_model_fctry + use les_model, only : les_model_t + use vreman, only : vreman_t + use dofmap, only : dofmap_t + use coefs, only : coef_t + use json_module, only : json_file + implicit none + private + + public :: les_model_factory + +contains + !> LES model factory. Both constructs and initializes the object. + !! @param les_model The object to be allocated. + !! @param name The name of the LES model. + !! @param dofmap SEM map of degrees of freedom. + !! @param coef SEM coefficients. + !! @param json A dictionary with parameters. + subroutine les_model_factory(les_model, name, dofmap, coef, json) + class(les_model_t), allocatable, target, intent(inout) :: les_model + character(len=*), intent(in) :: name + type(dofmap_t), intent(in) :: dofmap + type(coef_t), intent(in) :: coef + type(json_file), intent(inout) :: json + + if (allocated(les_model)) then + deallocate(les_model) + end if + + if (trim(name) .eq. 'vreman') then + allocate(vreman_t::les_model) + end if + + call les_model%init(dofmap, coef, json) + + end subroutine les_model_factory + +end module les_model_fctry \ No newline at end of file diff --git a/src/les/vreman.f90 b/src/les/vreman.f90 new file mode 100644 index 00000000000..71d2903e1e9 --- /dev/null +++ b/src/les/vreman.f90 @@ -0,0 +1,128 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +!> Implements `vreman_t`. +module vreman + use num_types, only : rp + use field, only : field_t + use les_model, only : les_model_t + use dofmap , only : dofmap_t + use json_utils, only : json_get, json_get_or_default + use json_module, only : json_file + use utils, only : neko_error + use neko_config, only : NEKO_BCKND_DEVICE + use vreman_cpu, only : vreman_compute_cpu + use coefs, only : coef_t + implicit none + private + + !> Implements the Vreman LES model. + !! @note Reference DOI: 10.1063/1.1785131 + type, public, extends(les_model_t) :: vreman_t + !> Model constant, defaults to 0.07. + real(kind=rp) :: c + contains + !> Constructor from JSON. + procedure, pass(this) :: init => vreman_init + !> Constructor from components. + procedure, pass(this) :: init_from_components => vreman_init_from_components + !> Destructor. + procedure, pass(this) :: free => vreman_free + !> Compute eddy viscosity. + procedure, pass(this) :: compute => vreman_compute + end type vreman_t + +contains + !> Constructor. + !! @param dofmap SEM map of degrees of freedom. + !! @param coef SEM coefficients. + !! @param json A dictionary with parameters. + subroutine vreman_init(this, dofmap, coef, json) + class(vreman_t), intent(inout) :: this + type(dofmap_t), intent(in) :: dofmap + type(coef_t), intent(in) :: coef + type(json_file), intent(inout) :: json + character(len=:), allocatable :: nut_name + real(kind=rp) :: c + + call json_get(json, "nut_field", nut_name) + ! Based on the Smagorinsky Cs = 0.17. + call json_get_or_default(json, "c", c, 0.07_rp) + + call vreman_init_from_components(this, dofmap, coef, c, nut_name) + end subroutine vreman_init + + !> Constructor from components. + !! @param dofmap SEM map of degrees of freedom. + !! @param coef SEM coefficients. + !! @param c The model constant. + !! @param nut_name The name of the SGS viscosity field. + subroutine vreman_init_from_components(this, dofmap, coef, c, nut_name) + class(vreman_t), intent(inout) :: this + type(dofmap_t), intent(in) :: dofmap + type(coef_t), intent(in) :: coef + real(kind=rp) :: c + character(len=*), intent(in) :: nut_name + + call this%free() + + call this%init_base(dofmap, coef, nut_name) + this%c = c + + end subroutine vreman_init_from_components + + !> Destructor for the les_model_t (base) class. + subroutine vreman_free(this) + class(vreman_t), intent(inout) :: this + + call this%free_base() + end subroutine vreman_free + + !> Compute eddy viscosity. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine vreman_compute(this, t, tstep) + class(vreman_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + if (NEKO_BCKND_DEVICE .eq. 1) then + call neko_error("Vreman model not implemented on accelarators.") + else + call vreman_compute_cpu(t, tstep, this%coef, this%nut, this%delta,& + this%c) + end if + + end subroutine vreman_compute + +end module vreman \ No newline at end of file diff --git a/src/math/ax.f90 b/src/math/ax.f90 index b5a553e277a..6e71a6b336a 100644 --- a/src/math/ax.f90 +++ b/src/math/ax.f90 @@ -35,7 +35,6 @@ module ax_product use num_types, only : rp use coefs, only : coef_t use space, only : space_t - use field, only : field_t use mesh, only : mesh_t implicit none private @@ -54,7 +53,7 @@ module ax_product !! @param msh Mesh. !! @param Xh Function space \f$ X_h \f$. abstract interface - subroutine ax_compute(w, u, coef, msh, Xh) + subroutine ax_compute(w, u, coef, msh, Xh) import space_t import mesh_t import coef_t @@ -62,11 +61,11 @@ subroutine ax_compute(w, u, coef, msh, Xh) import rp implicit none type(space_t), intent(inout) :: Xh - type(mesh_t), intent(inout) :: msh + type(mesh_t), intent(inout) :: msh type(coef_t), intent(inout) :: coef real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv) real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv) end subroutine ax_compute end interface - + end module ax_product diff --git a/src/math/ax_helm_fctry.f90 b/src/math/ax_helm_fctry.f90 index 6e14c5c991c..d4960c16425 100644 --- a/src/math/ax_helm_fctry.f90 +++ b/src/math/ax_helm_fctry.f90 @@ -49,7 +49,7 @@ module ax_helm_fctry !! @param Ax The matrix-vector product type to be allocated. subroutine ax_helm_factory(Ax) class(ax_t), allocatable, intent(inout) :: Ax - + if (allocated(Ax)) then deallocate(Ax) end if diff --git a/src/math/bcknd/cpu/ax_helm.f90 b/src/math/bcknd/cpu/ax_helm.f90 index ddcf9c2ab5c..4cf504d9fa0 100644 --- a/src/math/bcknd/cpu/ax_helm.f90 +++ b/src/math/bcknd/cpu/ax_helm.f90 @@ -35,11 +35,10 @@ module ax_helm use num_types, only : rp use coefs, only : coef_t use space, only : space_t - use field, only : field_t use mesh, only : mesh_t use math, only : addcol4 implicit none - private + private !> CPU matrix-vector product for a Helmholtz problem. type, public, extends(ax_t) :: ax_helm_t @@ -48,7 +47,7 @@ module ax_helm procedure, nopass :: compute => ax_helm_compute end type ax_helm_t -contains +contains !> Compute the product. !! @param w Vector of size @a (lx,ly,lz,nelv). @@ -64,8 +63,8 @@ subroutine ax_helm_compute(w, u, coef, msh, Xh) type(coef_t), intent(inout) :: coef real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv) real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv) - - + + select case(Xh%lx) case (14) call ax_helm_lx14(w, u, Xh%dx, Xh%dy, Xh%dz, Xh%dxt, Xh%dyt, Xh%dzt, & @@ -110,10 +109,10 @@ subroutine ax_helm_compute(w, u, coef, msh, Xh) call ax_helm_lx(w, u, Xh%dx, Xh%dy, Xh%dz, Xh%dxt, Xh%dyt, Xh%dzt, coef%h1, & coef%G11, coef%G22, coef%G33, coef%G12, coef%G13, coef%G23, msh%nelv, Xh%lx) end select - + if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%size()) - - + + end subroutine ax_helm_compute !> Generic CPU kernel for the Helmholtz matrix-vector product. @@ -177,7 +176,7 @@ subroutine ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -226,7 +225,7 @@ subroutine ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do end subroutine ax_helm_lx - + subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 14 @@ -270,7 +269,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,11) * u(11,j,1,e) & + Dx(i,12) * u(12,j,1,e) & + Dx(i,13) * u(13,j,1,e) & - + Dx(i,14) * u(14,j,1,e) + + Dx(i,14) * u(14,j,1,e) end do end do @@ -314,7 +313,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -344,7 +343,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,11) * ur(11,j,1) & + Dxt(i,12) * ur(12,j,1) & + Dxt(i,13) * ur(13,j,1) & - + Dxt(i,14) * ur(14,j,1) + + Dxt(i,14) * ur(14,j,1) end do end do @@ -365,7 +364,7 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,11) * us(i,11,k) & + Dyt(j,12) * us(i,12,k) & + Dyt(j,13) * us(i,13,k) & - + Dyt(j,14) * us(i,14,k) + + Dyt(j,14) * us(i,14,k) end do end do end do @@ -386,13 +385,13 @@ subroutine ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,11) * ut(i,1,11) & + Dzt(k,12) * ut(i,1,12) & + Dzt(k,13) * ut(i,1,13) & - + Dzt(k,14) * ut(i,1,14) + + Dzt(k,14) * ut(i,1,14) end do end do end do end subroutine ax_helm_lx14 - + subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 13 @@ -435,7 +434,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,10) * u(10,j,1,e) & + Dx(i,11) * u(11,j,1,e) & + Dx(i,12) * u(12,j,1,e) & - + Dx(i,13) * u(13,j,1,e) + + Dx(i,13) * u(13,j,1,e) end do end do @@ -478,7 +477,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -507,7 +506,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,10) * ur(10,j,1) & + Dxt(i,11) * ur(11,j,1) & + Dxt(i,12) * ur(12,j,1) & - + Dxt(i,13) * ur(13,j,1) + + Dxt(i,13) * ur(13,j,1) end do end do @@ -527,7 +526,7 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,10) * us(i,10,k) & + Dyt(j,11) * us(i,11,k) & + Dyt(j,12) * us(i,12,k) & - + Dyt(j,13) * us(i,13,k) + + Dyt(j,13) * us(i,13,k) end do end do end do @@ -547,13 +546,13 @@ subroutine ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,10) * ut(i,1,10) & + Dzt(k,11) * ut(i,1,11) & + Dzt(k,12) * ut(i,1,12) & - + Dzt(k,13) * ut(i,1,13) + + Dzt(k,13) * ut(i,1,13) end do end do end do end subroutine ax_helm_lx13 - + subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 12 @@ -595,7 +594,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,9) * u(9,j,1,e) & + Dx(i,10) * u(10,j,1,e) & + Dx(i,11) * u(11,j,1,e) & - + Dx(i,12) * u(12,j,1,e) + + Dx(i,12) * u(12,j,1,e) end do end do @@ -635,7 +634,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -682,7 +681,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,9) * us(i,9,k) & + Dyt(j,10) * us(i,10,k) & + Dyt(j,11) * us(i,11,k) & - + Dyt(j,12) * us(i,12,k) + + Dyt(j,12) * us(i,12,k) end do end do end do @@ -701,7 +700,7 @@ subroutine ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,9) * ut(i,1,9) & + Dzt(k,10) * ut(i,1,10) & + Dzt(k,11) * ut(i,1,11) & - + Dzt(k,12) * ut(i,1,12) + + Dzt(k,12) * ut(i,1,12) end do end do @@ -748,7 +747,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,8) * u(8,j,1,e) & + Dx(i,9) * u(9,j,1,e) & + Dx(i,10) * u(10,j,1,e) & - + Dx(i,11) * u(11,j,1,e) + + Dx(i,11) * u(11,j,1,e) end do end do @@ -765,7 +764,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,8) * u(i,8,k,e) & + Dy(j,9) * u(i,9,k,e) & + Dy(j,10) * u(i,10,k,e) & - + Dy(j,11) * u(i,11,k,e) + + Dy(j,11) * u(i,11,k,e) end do end do end do @@ -782,11 +781,11 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,8) * u(i,1,8,e) & + Dz(k,9) * u(i,1,9,e) & + Dz(k,10) * u(i,1,10,e) & - + Dz(k,11) * u(i,1,11,e) + + Dz(k,11) * u(i,1,11,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -813,7 +812,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,8) * ur(8,j,1) & + Dxt(i,9) * ur(9,j,1) & + Dxt(i,10) * ur(10,j,1) & - + Dxt(i,11) * ur(11,j,1) + + Dxt(i,11) * ur(11,j,1) end do end do @@ -831,7 +830,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,8) * us(i,8,k) & + Dyt(j,9) * us(i,9,k) & + Dyt(j,10) * us(i,10,k) & - + Dyt(j,11) * us(i,11,k) + + Dyt(j,11) * us(i,11,k) end do end do end do @@ -849,7 +848,7 @@ subroutine ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,8) * ut(i,1,8) & + Dzt(k,9) * ut(i,1,9) & + Dzt(k,10) * ut(i,1,10) & - + Dzt(k,11) * ut(i,1,11) + + Dzt(k,11) * ut(i,1,11) end do end do @@ -895,7 +894,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,7) * u(7,j,1,e) & + Dx(i,8) * u(8,j,1,e) & + Dx(i,9) * u(9,j,1,e) & - + Dx(i,10) * u(10,j,1,e) + + Dx(i,10) * u(10,j,1,e) end do end do @@ -911,7 +910,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,7) * u(i,7,k,e) & + Dy(j,8) * u(i,8,k,e) & + Dy(j,9) * u(i,9,k,e) & - + Dy(j,10) * u(i,10,k,e) + + Dy(j,10) * u(i,10,k,e) end do end do end do @@ -927,11 +926,11 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,7) * u(i,1,7,e) & + Dz(k,8) * u(i,1,8,e) & + Dz(k,9) * u(i,1,9,e) & - + Dz(k,10) * u(i,1,10,e) + + Dz(k,10) * u(i,1,10,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -957,7 +956,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,7) * ur(7,j,1) & + Dxt(i,8) * ur(8,j,1) & + Dxt(i,9) * ur(9,j,1) & - + Dxt(i,10) * ur(10,j,1) + + Dxt(i,10) * ur(10,j,1) end do end do @@ -974,7 +973,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,7) * us(i,7,k) & + Dyt(j,8) * us(i,8,k) & + Dyt(j,9) * us(i,9,k) & - + Dyt(j,10) * us(i,10,k) + + Dyt(j,10) * us(i,10,k) end do end do end do @@ -991,7 +990,7 @@ subroutine ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,7) * ut(i,1,7) & + Dzt(k,8) * ut(i,1,8) & + Dzt(k,9) * ut(i,1,9) & - + Dzt(k,10) * ut(i,1,10) + + Dzt(k,10) * ut(i,1,10) end do end do @@ -1036,7 +1035,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,6) * u(6,j,1,e) & + Dx(i,7) * u(7,j,1,e) & + Dx(i,8) * u(8,j,1,e) & - + Dx(i,9) * u(9,j,1,e) + + Dx(i,9) * u(9,j,1,e) end do end do @@ -1051,7 +1050,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,6) * u(i,6,k,e) & + Dy(j,7) * u(i,7,k,e) & + Dy(j,8) * u(i,8,k,e) & - + Dy(j,9) * u(i,9,k,e) + + Dy(j,9) * u(i,9,k,e) end do end do end do @@ -1066,11 +1065,11 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,6) * u(i,1,6,e) & + Dz(k,7) * u(i,1,7,e) & + Dz(k,8) * u(i,1,8,e) & - + Dz(k,9) * u(i,1,9,e) + + Dz(k,9) * u(i,1,9,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1095,7 +1094,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,6) * ur(6,j,1) & + Dxt(i,7) * ur(7,j,1) & + Dxt(i,8) * ur(8,j,1) & - + Dxt(i,9) * ur(9,j,1) + + Dxt(i,9) * ur(9,j,1) end do end do @@ -1111,7 +1110,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,6) * us(i,6,k) & + Dyt(j,7) * us(i,7,k) & + Dyt(j,8) * us(i,8,k) & - + Dyt(j,9) * us(i,9,k) + + Dyt(j,9) * us(i,9,k) end do end do end do @@ -1127,7 +1126,7 @@ subroutine ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,6) * ut(i,1,6) & + Dzt(k,7) * ut(i,1,7) & + Dzt(k,8) * ut(i,1,8) & - + Dzt(k,9) * ut(i,1,9) + + Dzt(k,9) * ut(i,1,9) end do end do @@ -1171,7 +1170,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,5) * u(5,j,1,e) & + Dx(i,6) * u(6,j,1,e) & + Dx(i,7) * u(7,j,1,e) & - + Dx(i,8) * u(8,j,1,e) + + Dx(i,8) * u(8,j,1,e) end do end do @@ -1185,7 +1184,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,5) * u(i,5,k,e) & + Dy(j,6) * u(i,6,k,e) & + Dy(j,7) * u(i,7,k,e) & - + Dy(j,8) * u(i,8,k,e) + + Dy(j,8) * u(i,8,k,e) end do end do end do @@ -1199,11 +1198,11 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,5) * u(i,1,5,e) & + Dz(k,6) * u(i,1,6,e) & + Dz(k,7) * u(i,1,7,e) & - + Dz(k,8) * u(i,1,8,e) + + Dz(k,8) * u(i,1,8,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1227,7 +1226,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,5) * ur(5,j,1) & + Dxt(i,6) * ur(6,j,1) & + Dxt(i,7) * ur(7,j,1) & - + Dxt(i,8) * ur(8,j,1) + + Dxt(i,8) * ur(8,j,1) end do end do @@ -1242,7 +1241,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,5) * us(i,5,k) & + Dyt(j,6) * us(i,6,k) & + Dyt(j,7) * us(i,7,k) & - + Dyt(j,8) * us(i,8,k) + + Dyt(j,8) * us(i,8,k) end do end do end do @@ -1257,7 +1256,7 @@ subroutine ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,5) * ut(i,1,5) & + Dzt(k,6) * ut(i,1,6) & + Dzt(k,7) * ut(i,1,7) & - + Dzt(k,8) * ut(i,1,8) + + Dzt(k,8) * ut(i,1,8) end do end do @@ -1300,7 +1299,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,4) * u(4,j,1,e) & + Dx(i,5) * u(5,j,1,e) & + Dx(i,6) * u(6,j,1,e) & - + Dx(i,7) * u(7,j,1,e) + + Dx(i,7) * u(7,j,1,e) end do end do @@ -1313,7 +1312,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,4) * u(i,4,k,e) & + Dy(j,5) * u(i,5,k,e) & + Dy(j,6) * u(i,6,k,e) & - + Dy(j,7) * u(i,7,k,e) + + Dy(j,7) * u(i,7,k,e) end do end do end do @@ -1326,11 +1325,11 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,4) * u(i,1,4,e) & + Dz(k,5) * u(i,1,5,e) & + Dz(k,6) * u(i,1,6,e) & - + Dz(k,7) * u(i,1,7,e) + + Dz(k,7) * u(i,1,7,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1353,7 +1352,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,4) * ur(4,j,1) & + Dxt(i,5) * ur(5,j,1) & + Dxt(i,6) * ur(6,j,1) & - + Dxt(i,7) * ur(7,j,1) + + Dxt(i,7) * ur(7,j,1) end do end do @@ -1367,7 +1366,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,4) * us(i,4,k) & + Dyt(j,5) * us(i,5,k) & + Dyt(j,6) * us(i,6,k) & - + Dyt(j,7) * us(i,7,k) + + Dyt(j,7) * us(i,7,k) end do end do end do @@ -1381,7 +1380,7 @@ subroutine ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,4) * ut(i,1,4) & + Dzt(k,5) * ut(i,1,5) & + Dzt(k,6) * ut(i,1,6) & - + Dzt(k,7) * ut(i,1,7) + + Dzt(k,7) * ut(i,1,7) end do end do @@ -1423,7 +1422,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,3) * u(3,j,1,e) & + Dx(i,4) * u(4,j,1,e) & + Dx(i,5) * u(5,j,1,e) & - + Dx(i,6) * u(6,j,1,e) + + Dx(i,6) * u(6,j,1,e) end do end do @@ -1435,7 +1434,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,3) * u(i,3,k,e) & + Dy(j,4) * u(i,4,k,e) & + Dy(j,5) * u(i,5,k,e) & - + Dy(j,6) * u(i,6,k,e) + + Dy(j,6) * u(i,6,k,e) end do end do end do @@ -1447,11 +1446,11 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,3) * u(i,1,3,e) & + Dz(k,4) * u(i,1,4,e) & + Dz(k,5) * u(i,1,5,e) & - + Dz(k,6) * u(i,1,6,e) + + Dz(k,6) * u(i,1,6,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1473,7 +1472,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,3) * ur(3,j,1) & + Dxt(i,4) * ur(4,j,1) & + Dxt(i,5) * ur(5,j,1) & - + Dxt(i,6) * ur(6,j,1) + + Dxt(i,6) * ur(6,j,1) end do end do @@ -1486,7 +1485,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,3) * us(i,3,k) & + Dyt(j,4) * us(i,4,k) & + Dyt(j,5) * us(i,5,k) & - + Dyt(j,6) * us(i,6,k) + + Dyt(j,6) * us(i,6,k) end do end do end do @@ -1499,7 +1498,7 @@ subroutine ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,3) * ut(i,1,3) & + Dzt(k,4) * ut(i,1,4) & + Dzt(k,5) * ut(i,1,5) & - + Dzt(k,6) * ut(i,1,6) + + Dzt(k,6) * ut(i,1,6) end do end do @@ -1540,7 +1539,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dx(i,2) * u(2,j,1,e) & + Dx(i,3) * u(3,j,1,e) & + Dx(i,4) * u(4,j,1,e) & - + Dx(i,5) * u(5,j,1,e) + + Dx(i,5) * u(5,j,1,e) end do end do @@ -1551,7 +1550,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dy(j,2) * u(i,2,k,e) & + Dy(j,3) * u(i,3,k,e) & + Dy(j,4) * u(i,4,k,e) & - + Dy(j,5) * u(i,5,k,e) + + Dy(j,5) * u(i,5,k,e) end do end do end do @@ -1562,11 +1561,11 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dz(k,2) * u(i,1,2,e) & + Dz(k,3) * u(i,1,3,e) & + Dz(k,4) * u(i,1,4,e) & - + Dz(k,5) * u(i,1,5,e) + + Dz(k,5) * u(i,1,5,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1587,7 +1586,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dxt(i,2) * ur(2,j,1) & + Dxt(i,3) * ur(3,j,1) & + Dxt(i,4) * ur(4,j,1) & - + Dxt(i,5) * ur(5,j,1) + + Dxt(i,5) * ur(5,j,1) end do end do @@ -1599,7 +1598,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,2) * us(i,2,k) & + Dyt(j,3) * us(i,3,k) & + Dyt(j,4) * us(i,4,k) & - + Dyt(j,5) * us(i,5,k) + + Dyt(j,5) * us(i,5,k) end do end do end do @@ -1611,7 +1610,7 @@ subroutine ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,2) * ut(i,1,2) & + Dzt(k,3) * ut(i,1,3) & + Dzt(k,4) * ut(i,1,4) & - + Dzt(k,5) * ut(i,1,5) + + Dzt(k,5) * ut(i,1,5) end do end do @@ -1651,7 +1650,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & wur(i,j,1) = Dx(i,1) * u(1,j,1,e) & + Dx(i,2) * u(2,j,1,e) & + Dx(i,3) * u(3,j,1,e) & - + Dx(i,4) * u(4,j,1,e) + + Dx(i,4) * u(4,j,1,e) end do end do @@ -1661,7 +1660,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & wus(i,j,k) = Dy(j,1) * u(i,1,k,e) & + Dy(j,2) * u(i,2,k,e) & + Dy(j,3) * u(i,3,k,e) & - + Dy(j,4) * u(i,4,k,e) + + Dy(j,4) * u(i,4,k,e) end do end do end do @@ -1671,11 +1670,11 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & wut(i,1,k) = Dz(k,1) * u(i,1,1,e) & + Dz(k,2) * u(i,1,2,e) & + Dz(k,3) * u(i,1,3,e) & - + Dz(k,4) * u(i,1,4,e) + + Dz(k,4) * u(i,1,4,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1695,7 +1694,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) & + Dxt(i,2) * ur(2,j,1) & + Dxt(i,3) * ur(3,j,1) & - + Dxt(i,4) * ur(4,j,1) + + Dxt(i,4) * ur(4,j,1) end do end do @@ -1706,7 +1705,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dyt(j,1) * us(i,1,k) & + Dyt(j,2) * us(i,2,k) & + Dyt(j,3) * us(i,3,k) & - + Dyt(j,4) * us(i,4,k) + + Dyt(j,4) * us(i,4,k) end do end do end do @@ -1717,7 +1716,7 @@ subroutine ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & + Dzt(k,1) * ut(i,1,1) & + Dzt(k,2) * ut(i,1,2) & + Dzt(k,3) * ut(i,1,3) & - + Dzt(k,4) * ut(i,1,4) + + Dzt(k,4) * ut(i,1,4) end do end do @@ -1756,7 +1755,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx wur(i,j,1) = Dx(i,1) * u(1,j,1,e) & + Dx(i,2) * u(2,j,1,e) & - + Dx(i,3) * u(3,j,1,e) + + Dx(i,3) * u(3,j,1,e) end do end do @@ -1765,7 +1764,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx wus(i,j,k) = Dy(j,1) * u(i,1,k,e) & + Dy(j,2) * u(i,2,k,e) & - + Dy(j,3) * u(i,3,k,e) + + Dy(j,3) * u(i,3,k,e) end do end do end do @@ -1774,11 +1773,11 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx*lx wut(i,1,k) = Dz(k,1) * u(i,1,1,e) & + Dz(k,2) * u(i,1,2,e) & - + Dz(k,3) * u(i,1,3,e) + + Dz(k,3) * u(i,1,3,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1797,7 +1796,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) & + Dxt(i,2) * ur(2,j,1) & - + Dxt(i,3) * ur(3,j,1) + + Dxt(i,3) * ur(3,j,1) end do end do @@ -1807,7 +1806,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,j,k,e) = w(i,j,k,e) & + Dyt(j,1) * us(i,1,k) & + Dyt(j,2) * us(i,2,k) & - + Dyt(j,3) * us(i,3,k) + + Dyt(j,3) * us(i,3,k) end do end do end do @@ -1817,7 +1816,7 @@ subroutine ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,1,k,e) = w(i,1,k,e) & + Dzt(k,1) * ut(i,1,1) & + Dzt(k,2) * ut(i,1,2) & - + Dzt(k,3) * ut(i,1,3) + + Dzt(k,3) * ut(i,1,3) end do end do @@ -1855,7 +1854,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do j = 1, lx * lx do i = 1, lx wur(i,j,1) = Dx(i,1) * u(1,j,1,e) & - + Dx(i,2) * u(2,j,1,e) + + Dx(i,2) * u(2,j,1,e) end do end do @@ -1863,7 +1862,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do j = 1, lx do i = 1, lx wus(i,j,k) = Dy(j,1) * u(i,1,k,e) & - + Dy(j,2) * u(i,2,k,e) + + Dy(j,2) * u(i,2,k,e) end do end do end do @@ -1871,11 +1870,11 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do k = 1, lx do i = 1, lx*lx wut(i,1,k) = Dz(k,1) * u(i,1,1,e) & - + Dz(k,2) * u(i,1,2,e) + + Dz(k,2) * u(i,1,2,e) end do end do - do i = 1, lx*lx*lx + do i = 1, lx*lx*lx ur(i,1,1) = h1(i,1,1,e) & * ( G11(i,1,1,e) * wur(i,1,1) & + G12(i,1,1,e) * wus(i,1,1) & @@ -1893,7 +1892,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do j = 1, lx*lx do i = 1, lx w(i,j,1,e) = Dxt(i,1) * ur(1,j,1) & - + Dxt(i,2) * ur(2,j,1) + + Dxt(i,2) * ur(2,j,1) end do end do @@ -1902,7 +1901,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx w(i,j,k,e) = w(i,j,k,e) & + Dyt(j,1) * us(i,1,k) & - + Dyt(j,2) * us(i,2,k) + + Dyt(j,2) * us(i,2,k) end do end do end do @@ -1911,7 +1910,7 @@ subroutine ax_helm_lx2(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do i = 1, lx*lx w(i,1,k,e) = w(i,1,k,e) & + Dzt(k,1) * ut(i,1,1) & - + Dzt(k,2) * ut(i,1,2) + + Dzt(k,2) * ut(i,1,2) end do end do diff --git a/src/math/bcknd/cpu/cdtp.f90 b/src/math/bcknd/cpu/cdtp.f90 index d2b3389224c..1500f1384b1 100644 --- a/src/math/bcknd/cpu/cdtp.f90 +++ b/src/math/bcknd/cpu/cdtp.f90 @@ -36,7 +36,7 @@ module cpu_cdtp implicit none contains - + subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx) integer, intent(in) :: nel, lx real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx @@ -47,26 +47,26 @@ subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx) integer :: e, i, j, k, l do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx tmp = 0.0_rp - !DIR$ LOOP_INFO MIN_TRIPS(15) + !DIR$ LOOP_INFO MIN_TRIPS(15) do k = 1, lx tmp = tmp + dxt(i,k) * ta1(k,j,1) end do dtx(i,j,1,e) = tmp end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do @@ -75,52 +75,52 @@ subroutine cpu_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, lx) do j = 1, lx do i = 1, lx tmp = 0.0_rp - !DIR$ LOOP_INFO MIN_TRIPS(15) + !DIR$ LOOP_INFO MIN_TRIPS(15) do l = 1, lx - tmp = tmp + dyt(j,l) * ta1(i,l,k) + tmp = tmp + dyt(j,l) * ta1(i,l,k) end do dtx(i,j,k,e) = dtx(i,j,k,e) + tmp end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx tmp = 0.0_rp - !DIR$ LOOP_INFO MIN_TRIPS(15) + !DIR$ LOOP_INFO MIN_TRIPS(15) do l = 1, lx tmp = tmp + dzt(k,l) * ta1(i,1,l) end do dtx(i,1,k,e) = dtx(i,1,k,e) + tmp end do end do - + end do end subroutine cpu_cdtp_lx - + subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 14 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -136,14 +136,14 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,11) * ta1(11,j,1) & + dxt(i,12) * ta1(12,j,1) & + dxt(i,13) * ta1(13,j,1) & - + dxt(i,14) * ta1(14,j,1) + + dxt(i,14) * ta1(14,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -161,15 +161,15 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,11) * ta1(i,11,k) & + dyt(j,12) * ta1(i,12,k) & + dyt(j,13) * ta1(i,13,k) & - + dyt(j,14) * ta1(i,14,k) + + dyt(j,14) * ta1(i,14,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -186,32 +186,32 @@ subroutine cpu_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,11) * ta1(i,1,11) & + dzt(k,12) * ta1(i,1,12) & + dzt(k,13) * ta1(i,1,13) & - + dzt(k,14) * ta1(i,1,14) + + dzt(k,14) * ta1(i,1,14) end do end do - + end do end subroutine cpu_cdtp_lx14 - + subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 13 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -226,14 +226,14 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,10) * ta1(10,j,1) & + dxt(i,11) * ta1(11,j,1) & + dxt(i,12) * ta1(12,j,1) & - + dxt(i,13) * ta1(13,j,1) + + dxt(i,13) * ta1(13,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -254,11 +254,11 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -274,32 +274,32 @@ subroutine cpu_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,10) * ta1(i,1,10) & + dzt(k,11) * ta1(i,1,11) & + dzt(k,12) * ta1(i,1,12) & - + dzt(k,13) * ta1(i,1,13) + + dzt(k,13) * ta1(i,1,13) end do end do - + end do end subroutine cpu_cdtp_lx13 - + subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 12 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -313,14 +313,14 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,9) * ta1(9,j,1) & + dxt(i,10) * ta1(10,j,1) & + dxt(i,11) * ta1(11,j,1) & - + dxt(i,12) * ta1(12,j,1) + + dxt(i,12) * ta1(12,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -340,11 +340,11 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -362,29 +362,29 @@ subroutine cpu_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,12) * ta1(i,1,12) end do end do - + end do end subroutine cpu_cdtp_lx12 - + subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 11 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -397,14 +397,14 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,8) * ta1(8,j,1) & + dxt(i,9) * ta1(9,j,1) & + dxt(i,10) * ta1(10,j,1) & - + dxt(i,11) * ta1(11,j,1) + + dxt(i,11) * ta1(11,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -419,15 +419,15 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,8) * ta1(i,8,k) & + dyt(j,9) * ta1(i,9,k) & + dyt(j,10) * ta1(i,10,k) & - + dyt(j,11) * ta1(i,11,k) + + dyt(j,11) * ta1(i,11,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -441,22 +441,22 @@ subroutine cpu_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,8) * ta1(i,1,8) & + dzt(k,9) * ta1(i,1,9) & + dzt(k,10) * ta1(i,1,10) & - + dzt(k,11) * ta1(i,1,11) + + dzt(k,11) * ta1(i,1,11) end do end do - + end do end subroutine cpu_cdtp_lx11 - + subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 10 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel do i = 1, lx*lx*lx @@ -478,14 +478,14 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,7) * ta1(7,j,1) & + dxt(i,8) * ta1(8,j,1) & + dxt(i,9) * ta1(9,j,1) & - + dxt(i,10) * ta1(10,j,1) + + dxt(i,10) * ta1(10,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -499,16 +499,16 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,7) * ta1(i,7,k) & + dyt(j,8) * ta1(i,8,k) & + dyt(j,9) * ta1(i,9,k) & - + dyt(j,10) * ta1(i,10,k) + + dyt(j,10) * ta1(i,10,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -521,10 +521,10 @@ subroutine cpu_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,7) * ta1(i,1,7) & + dzt(k,8) * ta1(i,1,8) & + dzt(k,9) * ta1(i,1,9) & - + dzt(k,10) * ta1(i,1,10) + + dzt(k,10) * ta1(i,1,10) end do end do - + end do end subroutine cpu_cdtp_lx10 @@ -533,20 +533,20 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -557,14 +557,14 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,6) * ta1(6,j,1) & + dxt(i,7) * ta1(7,j,1) & + dxt(i,8) * ta1(8,j,1) & - + dxt(i,9) * ta1(9,j,1) + + dxt(i,9) * ta1(9,j,1) end do end do do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -577,15 +577,15 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,6) * ta1(i,6,k) & + dyt(j,7) * ta1(i,7,k) & + dyt(j,8) * ta1(i,8,k) & - + dyt(j,9) * ta1(i,9,k) + + dyt(j,9) * ta1(i,9,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -597,10 +597,10 @@ subroutine cpu_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,6) * ta1(i,1,6) & + dzt(k,7) * ta1(i,1,7) & + dzt(k,8) * ta1(i,1,8) & - + dzt(k,9) * ta1(i,1,9) + + dzt(k,9) * ta1(i,1,9) end do end do - + end do end subroutine cpu_cdtp_lx9 @@ -609,20 +609,20 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -632,14 +632,14 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,5) * ta1(5,j,1) & + dxt(i,6) * ta1(6,j,1) & + dxt(i,7) * ta1(7,j,1) & - + dxt(i,8) * ta1(8,j,1) + + dxt(i,8) * ta1(8,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -655,11 +655,11 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -673,7 +673,7 @@ subroutine cpu_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,8) * ta1(i,1,8) end do end do - + end do end subroutine cpu_cdtp_lx8 @@ -682,16 +682,16 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do @@ -704,14 +704,14 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,4) * ta1(4,j,1) & + dxt(i,5) * ta1(5,j,1) & + dxt(i,6) * ta1(6,j,1) & - + dxt(i,7) * ta1(7,j,1) + + dxt(i,7) * ta1(7,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -722,15 +722,15 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,4) * ta1(i,4,k) & + dyt(j,5) * ta1(i,5,k) & + dyt(j,6) * ta1(i,6,k) & - + dyt(j,7) * ta1(i,7,k) + + dyt(j,7) * ta1(i,7,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -740,32 +740,32 @@ subroutine cpu_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,4) * ta1(i,1,4) & + dzt(k,5) * ta1(i,1,5) & + dzt(k,6) * ta1(i,1,6) & - + dzt(k,7) * ta1(i,1,7) + + dzt(k,7) * ta1(i,1,7) end do end do - + end do end subroutine cpu_cdtp_lx7 - + subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 6 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & @@ -773,14 +773,14 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,3) * ta1(3,j,1) & + dxt(i,4) * ta1(4,j,1) & + dxt(i,5) * ta1(5,j,1) & - + dxt(i,6) * ta1(6,j,1) + + dxt(i,6) * ta1(6,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -790,15 +790,15 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,3) * ta1(i,3,k) & + dyt(j,4) * ta1(i,4,k) & + dyt(j,5) * ta1(i,5,k) & - + dyt(j,6) * ta1(i,6,k) + + dyt(j,6) * ta1(i,6,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -807,10 +807,10 @@ subroutine cpu_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,3) * ta1(i,1,3) & + dzt(k,4) * ta1(i,1,4) & + dzt(k,5) * ta1(i,1,5) & - + dzt(k,6) * ta1(i,1,6) + + dzt(k,6) * ta1(i,1,6) end do end do - + end do end subroutine cpu_cdtp_lx6 @@ -819,34 +819,34 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k - + do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do - + do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & + dxt(i,2) * ta1(2,j,1) & + dxt(i,3) * ta1(3,j,1) & + dxt(i,4) * ta1(4,j,1) & - + dxt(i,5) * ta1(5,j,1) + + dxt(i,5) * ta1(5,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -855,15 +855,15 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,2) * ta1(i,2,k) & + dyt(j,3) * ta1(i,3,k) & + dyt(j,4) * ta1(i,4,k) & - + dyt(j,5) * ta1(i,5,k) + + dyt(j,5) * ta1(i,5,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & @@ -871,10 +871,10 @@ subroutine cpu_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dzt(k,2) * ta1(i,1,2) & + dzt(k,3) * ta1(i,1,3) & + dzt(k,4) * ta1(i,1,4) & - + dzt(k,5) * ta1(i,1,5) + + dzt(k,5) * ta1(i,1,5) end do end do - + end do end subroutine cpu_cdtp_lx5 @@ -883,12 +883,12 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do @@ -902,14 +902,14 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & + dxt(i,2) * ta1(2,j,1) & + dxt(i,3) * ta1(3,j,1) & - + dxt(i,4) * ta1(4,j,1) + + dxt(i,4) * ta1(4,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -917,25 +917,25 @@ subroutine cpu_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dyt(j,1) * ta1(i,1,k) & + dyt(j,2) * ta1(i,2,k) & + dyt(j,3) * ta1(i,3,k) & - + dyt(j,4) * ta1(i,4,k) + + dyt(j,4) * ta1(i,4,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & + dzt(k,1) * ta1(i,1,1) & + dzt(k,2) * ta1(i,1,2) & + dzt(k,3) * ta1(i,1,3) & - + dzt(k,4) * ta1(i,1,4) + + dzt(k,4) * ta1(i,1,4) end do end do - + end do end subroutine cpu_cdtp_lx4 @@ -944,16 +944,16 @@ subroutine cpu_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k do e = 1, nel - + do i = 1, lx*lx*lx wx(i,1,1) = ( B(i,1,1,e) * x(i,1,1,e) ) / jac(i,1,1,e) end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dr(i,1,1,e) end do @@ -965,44 +965,44 @@ subroutine cpu_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) + dxt(i,3) * ta1(3,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx dtx(i,j,k,e) = dtx(i,j,k,e) & + dyt(j,1) * ta1(i,1,k) & + dyt(j,2) * ta1(i,2,k) & - + dyt(j,3) * ta1(i,3,k) + + dyt(j,3) * ta1(i,3,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & + dzt(k,1) * ta1(i,1,1) & + dzt(k,2) * ta1(i,1,2) & - + dzt(k,3) * ta1(i,1,3) + + dzt(k,3) * ta1(i,1,3) end do end do - + end do end subroutine cpu_cdtp_lx3 - + subroutine cpu_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) integer, parameter :: lx = 2 integer, intent(in) :: nel real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx) :: wx, ta1 integer :: e, i, j, k @@ -1019,36 +1019,36 @@ subroutine cpu_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel) do j = 1, lx * lx do i = 1, lx dtx(i,j,1,e) = dxt(i,1) * ta1(1,j,1) & - + dxt(i,2) * ta1(2,j,1) + + dxt(i,2) * ta1(2,j,1) end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx dtx(i,j,k,e) = dtx(i,j,k,e) & + dyt(j,1) * ta1(i,1,k) & - + dyt(j,2) * ta1(i,2,k) + + dyt(j,2) * ta1(i,2,k) end do end do end do - + do i = 1, lx*lx*lx ta1(i,1,1) = wx(i,1,1) * dt(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx dtx(i,1,k,e) = dtx(i,1,k,e) & + dzt(k,1) * ta1(i,1,1) & - + dzt(k,2) * ta1(i,1,2) + + dzt(k,2) * ta1(i,1,2) end do end do - + end do end subroutine cpu_cdtp_lx2 diff --git a/src/math/bcknd/cpu/conv1.f90 b/src/math/bcknd/cpu/conv1.f90 index 0739fd70016..90bd51421ba 100644 --- a/src/math/bcknd/cpu/conv1.f90 +++ b/src/math/bcknd/cpu/conv1.f90 @@ -36,7 +36,7 @@ module cpu_conv1 implicit none contains - + subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, lx) @@ -47,7 +47,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -64,7 +64,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & dudr(i,j,1) = tmp end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -76,7 +76,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & end do end do end do - + do k = 1, lx do i = 1, lx*lx tmp = 0.0_rp @@ -86,7 +86,7 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & dudt(i,1,k) = tmp end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & @@ -103,9 +103,9 @@ subroutine cpu_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) end do end do - + end subroutine cpu_conv1_lx - + subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) @@ -117,7 +117,7 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -139,10 +139,10 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & + dx(i,13) * u(13,j,1,e) & - + dx(i,14) * u(14,j,1,e) + + dx(i,14) * u(14,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -159,11 +159,11 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,11) * u(i,11,k,e) & + dy(j,12) * u(i,12,k,e) & + dy(j,13) * u(i,13,k,e) & - + dy(j,14) * u(i,14,k,e) + + dy(j,14) * u(i,14,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -179,10 +179,10 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,11) * u(i,1,11,e) & + dz(k,12) * u(i,1,12,e) & + dz(k,13) * u(i,1,13,e) & - + dz(k,14) * u(i,1,14,e) + + dz(k,14) * u(i,1,14,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & @@ -199,9 +199,9 @@ subroutine cpu_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) end do end do - + end subroutine cpu_conv1_lx14 - + subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) @@ -213,7 +213,7 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -234,10 +234,10 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & - + dx(i,13) * u(13,j,1,e) + + dx(i,13) * u(13,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -253,11 +253,11 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,10) * u(i,10,k,e) & + dy(j,11) * u(i,11,k,e) & + dy(j,12) * u(i,12,k,e) & - + dy(j,13) * u(i,13,k,e) + + dy(j,13) * u(i,13,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -272,12 +272,12 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,10) * u(i,1,10,e) & + dz(k,11) * u(i,1,11,e) & + dz(k,12) * u(i,1,12,e) & - + dz(k,13) * u(i,1,13,e) + + dz(k,13) * u(i,1,13,e) end do end do - + do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -290,11 +290,11 @@ subroutine cpu_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx13 - + end do + end do + + end subroutine cpu_conv1_lx13 + subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) @@ -306,7 +306,7 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -326,10 +326,10 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & - + dx(i,12) * u(12,j,1,e) + + dx(i,12) * u(12,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -348,7 +348,7 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -363,11 +363,11 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,10) * u(i,1,10,e) & + dz(k,11) * u(i,1,11,e) & + dz(k,12) * u(i,1,12,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -380,12 +380,12 @@ subroutine cpu_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx12 + end do + end do + + end subroutine cpu_conv1_lx12 - subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 11 @@ -396,7 +396,7 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -415,10 +415,10 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & - + dx(i,11) * u(11,j,1,e) + + dx(i,11) * u(11,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -432,11 +432,11 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,8) * u(i,8,k,e) & + dy(j,9) * u(i,9,k,e) & + dy(j,10) * u(i,10,k,e) & - + dy(j,11) * u(i,11,k,e) + + dy(j,11) * u(i,11,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -449,12 +449,12 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,8) * u(i,1,8,e) & + dz(k,9) * u(i,1,9,e) & + dz(k,10) * u(i,1,10,e) & - + dz(k,11) * u(i,1,11,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,11) * u(i,1,11,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -467,12 +467,12 @@ subroutine cpu_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx11 - - subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & + end do + end do + + end subroutine cpu_conv1_lx11 + + subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 10 @@ -483,7 +483,7 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -501,10 +501,10 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & - + dx(i,10) * u(10,j,1,e) + + dx(i,10) * u(10,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -517,11 +517,11 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & + dy(j,9) * u(i,9,k,e) & - + dy(j,10) * u(i,10,k,e) + + dy(j,10) * u(i,10,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -533,12 +533,12 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,7) * u(i,1,7,e) & + dz(k,8) * u(i,1,8,e) & + dz(k,9) * u(i,1,9,e) & - + dz(k,10) * u(i,1,10,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,10) * u(i,1,10,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -551,12 +551,12 @@ subroutine cpu_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx10 - - subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & + end do + end do + + end subroutine cpu_conv1_lx10 + + subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 9 @@ -567,7 +567,7 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -584,10 +584,10 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & - + dx(i,9) * u(9,j,1,e) + + dx(i,9) * u(9,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -599,11 +599,11 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,6) * u(i,6,k,e) & + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & - + dy(j,9) * u(i,9,k,e) + + dy(j,9) * u(i,9,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -614,12 +614,12 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & + dz(k,8) * u(i,1,8,e) & - + dz(k,9) * u(i,1,9,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,9) * u(i,1,9,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -632,12 +632,12 @@ subroutine cpu_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx9 + end do + end do - subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & + end subroutine cpu_conv1_lx9 + + subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 8 @@ -648,7 +648,7 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -664,10 +664,10 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & - + dx(i,8) * u(8,j,1,e) + + dx(i,8) * u(8,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -678,11 +678,11 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,5) * u(i,5,k,e) & + dy(j,6) * u(i,6,k,e) & + dy(j,7) * u(i,7,k,e) & - + dy(j,8) * u(i,8,k,e) + + dy(j,8) * u(i,8,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -692,12 +692,12 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,5) * u(i,1,5,e) & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & - + dz(k,8) * u(i,1,8,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,8) * u(i,1,8,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -710,12 +710,12 @@ subroutine cpu_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx8 + end do + end do + + end subroutine cpu_conv1_lx8 - subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 7 @@ -726,7 +726,7 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -741,10 +741,10 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,4) * u(4,j,1,e) & + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & - + dx(i,7) * u(7,j,1,e) + + dx(i,7) * u(7,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -754,11 +754,11 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,4) * u(i,4,k,e) & + dy(j,5) * u(i,5,k,e) & + dy(j,6) * u(i,6,k,e) & - + dy(j,7) * u(i,7,k,e) + + dy(j,7) * u(i,7,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -768,11 +768,11 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,5) * u(i,1,5,e) & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -785,12 +785,12 @@ subroutine cpu_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx7 + end do + end do + + end subroutine cpu_conv1_lx7 - subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 6 @@ -801,12 +801,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt integer :: e, i, j, k - + do e = 1, nelv do j = 1, lx * lx do i = 1, lx @@ -815,10 +815,10 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,3) * u(3,j,1,e) & + dx(i,4) * u(4,j,1,e) & + dx(i,5) * u(5,j,1,e) & - + dx(i,6) * u(6,j,1,e) + + dx(i,6) * u(6,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -827,11 +827,11 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,3) * u(i,3,k,e) & + dy(j,4) * u(i,4,k,e) & + dy(j,5) * u(i,5,k,e) & - + dy(j,6) * u(i,6,k,e) + + dy(j,6) * u(i,6,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -839,12 +839,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & + dz(k,3) * u(i,1,3,e) & + dz(k,4) * u(i,1,4,e) & + dz(k,5) * u(i,1,5,e) & - + dz(k,6) * u(i,1,6,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,6) * u(i,1,6,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -857,12 +857,12 @@ subroutine cpu_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx6 + end do + end do - subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & + end subroutine cpu_conv1_lx6 + + subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 5 @@ -873,7 +873,7 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -886,10 +886,10 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & + dx(i,2) * u(2,j,1,e) & + dx(i,3) * u(3,j,1,e) & + dx(i,4) * u(4,j,1,e) & - + dx(i,5) * u(5,j,1,e) + + dx(i,5) * u(5,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -897,23 +897,23 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & + dy(j,2) * u(i,2,k,e) & + dy(j,3) * u(i,3,k,e) & + dy(j,4) * u(i,4,k,e) & - + dy(j,5) * u(i,5,k,e) + + dy(j,5) * u(i,5,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & + dz(k,2) * u(i,1,2,e) & + dz(k,3) * u(i,1,3,e) & + dz(k,4) * u(i,1,4,e) & - + dz(k,5) * u(i,1,5,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,5) * u(i,1,5,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -926,12 +926,12 @@ subroutine cpu_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx5 + end do + end do + + end subroutine cpu_conv1_lx5 - subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 4 @@ -942,7 +942,7 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -954,32 +954,32 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & dudr(i,j,1) = dx(i,1) * u(1,j,1,e) & + dx(i,2) * u(2,j,1,e) & + dx(i,3) * u(3,j,1,e) & - + dx(i,4) * u(4,j,1,e) + + dx(i,4) * u(4,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx duds(i,j,k) = dy(j,1) * u(i,1,k,e) & + dy(j,2) * u(i,2,k,e) & + dy(j,3) * u(i,3,k,e) & - + dy(j,4) * u(i,4,k,e) + + dy(j,4) * u(i,4,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & + dz(k,2) * u(i,1,2,e) & + dz(k,3) * u(i,1,3,e) & - + dz(k,4) * u(i,1,4,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,4) * u(i,1,4,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -992,12 +992,12 @@ subroutine cpu_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx4 + end do + end do + + end subroutine cpu_conv1_lx4 - subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 3 @@ -1008,7 +1008,7 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -1019,30 +1019,30 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & do i = 1, lx dudr(i,j,1) = dx(i,1) * u(1,j,1,e) & + dx(i,2) * u(2,j,1,e) & - + dx(i,3) * u(3,j,1,e) + + dx(i,3) * u(3,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx duds(i,j,k) = dy(j,1) * u(i,1,k,e) & + dy(j,2) * u(i,2,k,e) & - + dy(j,3) * u(i,3,k,e) + + dy(j,3) * u(i,3,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & + dz(k,2) * u(i,1,2,e) & - + dz(k,3) * u(i,1,3,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,3) * u(i,1,3,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -1055,12 +1055,12 @@ subroutine cpu_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx3 + end do + end do + + end subroutine cpu_conv1_lx3 - subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & + subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv) integer, parameter :: lx = 2 @@ -1071,7 +1071,7 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: dudr real(kind=rp), dimension(lx,lx,lx) :: duds real(kind=rp), dimension(lx,lx,lx) :: dudt @@ -1081,28 +1081,28 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx * lx do i = 1, lx dudr(i,j,1) = dx(i,1) * u(1,j,1,e) & - + dx(i,2) * u(2,j,1,e) + + dx(i,2) * u(2,j,1,e) end do end do - + do k = 1, lx do j = 1, lx do i = 1, lx duds(i,j,k) = dy(j,1) * u(i,1,k,e) & - + dy(j,2) * u(i,2,k,e) + + dy(j,2) * u(i,2,k,e) end do end do end do - + do k = 1, lx do i = 1, lx*lx dudt(i,1,k) = dz(k,1) * u(i,1,1,e) & - + dz(k,2) * u(i,1,2,e) - end do - end do - - do i = 1, lx * lx * lx - du(i,1,1,e) = jacinv(i,1,1,e) & + + dz(k,2) * u(i,1,2,e) + end do + end do + + do i = 1, lx * lx * lx + du(i,1,1,e) = jacinv(i,1,1,e) & * ( vx(i,1,1,e) & * ( drdx(i,1,1,e) * dudr(i,1,1) & + dsdx(i,1,1,e) * duds(i,1,1) & @@ -1115,9 +1115,9 @@ subroutine cpu_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & * ( drdz(i,1,1,e) * dudr(i,1,1) & + dsdz(i,1,1,e) * duds(i,1,1) & + dtdz(i,1,1,e) * dudt(i,1,1) ) ) - end do - end do - - end subroutine cpu_conv1_lx2 + end do + end do + + end subroutine cpu_conv1_lx2 end module cpu_conv1 diff --git a/src/math/bcknd/cpu/dudxyz.f90 b/src/math/bcknd/cpu/dudxyz.f90 index 02f5c8e6523..27b8b409ab5 100644 --- a/src/math/bcknd/cpu/dudxyz.f90 +++ b/src/math/bcknd/cpu/dudxyz.f90 @@ -73,11 +73,11 @@ subroutine cpu_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, lx) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx tmp = 0.0_rp @@ -87,19 +87,19 @@ subroutine cpu_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, lx) drst(i,1,k) = tmp end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx - + subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) integer, parameter :: lx = 14 integer, intent(in) :: nel @@ -126,7 +126,7 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & + dx(i,13) * u(13,j,1,e) & - + dx(i,14) * u(14,j,1,e) + + dx(i,14) * u(14,j,1,e) end do end do @@ -150,15 +150,15 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dy(j,11) * u(i,11,k,e) & + dy(j,12) * u(i,12,k,e) & + dy(j,13) * u(i,13,k,e) & - + dy(j,14) * u(i,14,k,e) + + dy(j,14) * u(i,14,k,e) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -174,22 +174,22 @@ subroutine cpu_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,11) * u(i,1,11,e) & + dz(k,12) * u(i,1,12,e) & + dz(k,13) * u(i,1,13,e) & - + dz(k,14) * u(i,1,14,e) + + dz(k,14) * u(i,1,14,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx14 - + subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) integer, parameter :: lx = 13 integer, intent(in) :: nel @@ -215,7 +215,7 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & - + dx(i,13) * u(13,j,1,e) + + dx(i,13) * u(13,j,1,e) end do end do @@ -238,15 +238,15 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dy(j,10) * u(i,10,k,e) & + dy(j,11) * u(i,11,k,e) & + dy(j,12) * u(i,12,k,e) & - + dy(j,13) * u(i,13,k,e) + + dy(j,13) * u(i,13,k,e) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -261,22 +261,22 @@ subroutine cpu_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,10) * u(i,1,10,e) & + dz(k,11) * u(i,1,11,e) & + dz(k,12) * u(i,1,12,e) & - + dz(k,13) * u(i,1,13,e) + + dz(k,13) * u(i,1,13,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx13 - + subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) integer, parameter :: lx = 12 integer, intent(in) :: nel @@ -301,7 +301,7 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & - + dx(i,12) * u(12,j,1,e) + + dx(i,12) * u(12,j,1,e) end do end do @@ -327,11 +327,11 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -348,17 +348,17 @@ subroutine cpu_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,12) * u(i,1,12,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx12 subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) @@ -384,14 +384,14 @@ subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & - + dx(i,11) * u(11,j,1,e) + + dx(i,11) * u(11,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -426,20 +426,20 @@ subroutine cpu_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,8) * u(i,1,8,e) & + dz(k,9) * u(i,1,9,e) & + dz(k,10) * u(i,1,10,e) & - + dz(k,11) * u(i,1,11,e) + + dz(k,11) * u(i,1,11,e) end do end do do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx11 subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) @@ -451,7 +451,7 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: drst integer :: e, i, j, k - + do e = 1, nel do j = 1, lx * lx do i = 1, lx @@ -464,14 +464,14 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & - + dx(i,10) * u(10,j,1,e) + + dx(i,10) * u(10,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -484,15 +484,15 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & + dy(j,9) * u(i,9,k,e) & - + dy(j,10) * u(i,10,k,e) + + dy(j,10) * u(i,10,k,e) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -511,7 +511,7 @@ subroutine cpu_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do @@ -541,7 +541,7 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & - + dx(i,9) * u(9,j,1,e) + + dx(i,9) * u(9,j,1,e) end do end do @@ -560,15 +560,15 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dy(j,6) * u(i,6,k,e) & + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & - + dy(j,9) * u(i,9,k,e) + + dy(j,9) * u(i,9,k,e) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -579,10 +579,10 @@ subroutine cpu_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & + dz(k,8) * u(i,1,8,e) & - + dz(k,9) * u(i,1,9,e) + + dz(k,9) * u(i,1,9,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do @@ -615,10 +615,10 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & - + dx(i,8) * u(8,j,1,e) + + dx(i,8) * u(8,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do @@ -637,7 +637,7 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do @@ -651,7 +651,7 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,5) * u(i,1,5,e) & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & - + dz(k,8) * u(i,1,8,e) + + dz(k,8) * u(i,1,8,e) end do end do @@ -662,9 +662,9 @@ subroutine cpu_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do - + end subroutine cpu_dudxyz_lx8 subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) @@ -676,7 +676,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: drst integer :: e, i, j, k - + do e = 1, nel do j = 1, lx * lx do i = 1, lx @@ -686,10 +686,10 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,4) * u(4,j,1,e) & + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & - + dx(i,7) * u(7,j,1,e) + + dx(i,7) * u(7,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do @@ -707,7 +707,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do @@ -731,7 +731,7 @@ subroutine cpu_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do end subroutine cpu_dudxyz_lx7 @@ -745,7 +745,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: drst integer :: e, i, j, k - + do e = 1, nel do j = 1, lx * lx do i = 1, lx @@ -757,7 +757,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,6) * u(6,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do @@ -774,7 +774,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do @@ -797,7 +797,7 @@ subroutine cpu_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do - + end do end subroutine cpu_dudxyz_lx6 @@ -822,7 +822,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dx(i,5) * u(5,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do @@ -842,7 +842,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & @@ -852,7 +852,7 @@ subroutine cpu_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,5) * u(i,1,5,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do @@ -874,7 +874,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: drst integer :: e, i, j, k - + do e = 1, nel do j = 1, lx * lx do i = 1, lx @@ -888,7 +888,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -899,7 +899,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do @@ -912,7 +912,7 @@ subroutine cpu_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,4) * u(i,1,4,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do @@ -934,7 +934,7 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx) :: drst integer :: e, i, j, k - + do e = 1, nel do j = 1, lx * lx do i = 1, lx @@ -957,7 +957,7 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do @@ -969,17 +969,17 @@ subroutine cpu_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) + dz(k,3) * u(i,1,3,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * jacinv(i,1,1,e) end do end do - + end subroutine cpu_dudxyz_lx3 subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) @@ -996,14 +996,14 @@ subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) do j = 1, lx * lx do i = 1, lx du(i,j,1,e) = dx(i,1) * u(1,j,1,e) & - + dx(i,2) * u(2,j,1,e) + + dx(i,2) * u(2,j,1,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) * dr(i,1,1,e) end do - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -1012,18 +1012,18 @@ subroutine cpu_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel) end do end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * ds(i,1,1,e) end do - + do k = 1, lx do i = 1, lx*lx drst(i,1,k) = dz(k,1) * u(i,1,1,e) & - + dz(k,2) * u(i,1,2,e) + + dz(k,2) * u(i,1,2,e) end do end do - + do i = 1, lx * lx * lx du(i,1,1,e) = du(i,1,1,e) + drst(i,1,1) * dt(i,1,1,e) end do diff --git a/src/math/bcknd/cpu/fdm_cpu.f90 b/src/math/bcknd/cpu/fdm_cpu.f90 index ce3d0587d5e..92b57cd0efd 100644 --- a/src/math/bcknd/cpu/fdm_cpu.f90 +++ b/src/math/bcknd/cpu/fdm_cpu.f90 @@ -1,4 +1,4 @@ -!> Fast Diagonalization +!> Fast Diagonalization module fdm_cpu use num_types, only : rp use tensor_cpu @@ -14,7 +14,7 @@ subroutine fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv) real(kind=rp), intent(inout) :: e(nl**ldim, nelv) real(kind=rp), intent(inout) :: r(nl**ldim, nelv) real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv) - real(kind=rp), intent(inout) :: d(nl**ldim, nelv) + real(kind=rp), intent(inout) :: d(nl**ldim, nelv) integer :: ie, nn, i nn = nl**ldim @@ -38,5 +38,5 @@ subroutine fdm_do_fast_cpu(e, r, s, d, nl, ldim, nelv) end do end if end subroutine fdm_do_fast_cpu - + end module fdm_cpu diff --git a/src/math/bcknd/cpu/opgrad.f90 b/src/math/bcknd/cpu/opgrad.f90 index 07e28d5d60a..136e5ce15c0 100644 --- a/src/math/bcknd/cpu/opgrad.f90 +++ b/src/math/bcknd/cpu/opgrad.f90 @@ -45,8 +45,8 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -85,7 +85,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & ut(i,1,k) = tmp end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -94,7 +94,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -102,7 +102,7 @@ subroutine cpu_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx - + subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 18 @@ -112,8 +112,8 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -139,7 +139,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & + dx(i,15) * u(15,j,1,e) & + dx(i,16) * u(16,j,1,e) & + dx(i,17) * u(17,j,1,e) & - + dx(i,18) * u(18,j,1,e) + + dx(i,18) * u(18,j,1,e) end do end do @@ -163,7 +163,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & + dy(j,15) * u(i,15,k,e) & + dy(j,16) * u(i,16,k,e) & + dy(j,17) * u(i,17,k,e) & - + dy(j,18) * u(i,18,k,e) + + dy(j,18) * u(i,18,k,e) end do end do end do @@ -187,10 +187,10 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & + dz(k,15) * u(i,1,15,e) & + dz(k,16) * u(i,1,16,e) & + dz(k,17) * u(i,1,17,e) & - + dz(k,18) * u(i,1,18,e) + + dz(k,18) * u(i,1,18,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -199,7 +199,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -207,7 +207,7 @@ subroutine cpu_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx18 - + subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 17 @@ -217,8 +217,8 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -243,7 +243,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & + dx(i,14) * u(14,j,1,e) & + dx(i,15) * u(15,j,1,e) & + dx(i,16) * u(16,j,1,e) & - + dx(i,17) * u(17,j,1,e) + + dx(i,17) * u(17,j,1,e) end do end do @@ -266,7 +266,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & + dy(j,14) * u(i,14,k,e) & + dy(j,15) * u(i,15,k,e) & + dy(j,16) * u(i,16,k,e) & - + dy(j,17) * u(i,17,k,e) + + dy(j,17) * u(i,17,k,e) end do end do end do @@ -289,10 +289,10 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & + dz(k,14) * u(i,1,14,e) & + dz(k,15) * u(i,1,15,e) & + dz(k,16) * u(i,1,16,e) & - + dz(k,17) * u(i,1,17,e) + + dz(k,17) * u(i,1,17,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -301,7 +301,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -309,7 +309,7 @@ subroutine cpu_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx17 - + subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 16 @@ -319,8 +319,8 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -344,7 +344,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & + dx(i,13) * u(13,j,1,e) & + dx(i,14) * u(14,j,1,e) & + dx(i,15) * u(15,j,1,e) & - + dx(i,16) * u(16,j,1,e) + + dx(i,16) * u(16,j,1,e) end do end do @@ -366,7 +366,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & + dy(j,13) * u(i,13,k,e) & + dy(j,14) * u(i,14,k,e) & + dy(j,15) * u(i,15,k,e) & - + dy(j,16) * u(i,16,k,e) + + dy(j,16) * u(i,16,k,e) end do end do end do @@ -391,7 +391,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & + dz(k,16) * u(i,1,16,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -400,7 +400,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -408,7 +408,7 @@ subroutine cpu_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx16 - + subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 15 @@ -418,8 +418,8 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -442,7 +442,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & + dx(i,12) * u(12,j,1,e) & + dx(i,13) * u(13,j,1,e) & + dx(i,14) * u(14,j,1,e) & - + dx(i,15) * u(15,j,1,e) + + dx(i,15) * u(15,j,1,e) end do end do @@ -463,7 +463,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & + dy(j,12) * u(i,12,k,e) & + dy(j,13) * u(i,13,k,e) & + dy(j,14) * u(i,14,k,e) & - + dy(j,15) * u(i,15,k,e) + + dy(j,15) * u(i,15,k,e) end do end do end do @@ -484,10 +484,10 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & + dz(k,12) * u(i,1,12,e) & + dz(k,13) * u(i,1,13,e) & + dz(k,14) * u(i,1,14,e) & - + dz(k,15) * u(i,1,15,e) + + dz(k,15) * u(i,1,15,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -496,7 +496,7 @@ subroutine cpu_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -514,8 +514,8 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -537,7 +537,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & + dx(i,13) * u(13,j,1,e) & - + dx(i,14) * u(14,j,1,e) + + dx(i,14) * u(14,j,1,e) end do end do @@ -580,7 +580,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & + dz(k,14) * u(i,1,14,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -589,7 +589,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -597,7 +597,7 @@ subroutine cpu_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx14 - + subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 13 @@ -607,8 +607,8 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -629,7 +629,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & + dx(i,12) * u(12,j,1,e) & - + dx(i,13) * u(13,j,1,e) + + dx(i,13) * u(13,j,1,e) end do end do @@ -648,7 +648,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & + dy(j,10) * u(i,10,k,e) & + dy(j,11) * u(i,11,k,e) & + dy(j,12) * u(i,12,k,e) & - + dy(j,13) * u(i,13,k,e) + + dy(j,13) * u(i,13,k,e) end do end do end do @@ -670,7 +670,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & + dz(k,13) * u(i,1,13,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -679,7 +679,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -687,7 +687,7 @@ subroutine cpu_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine cpu_opgrad_lx13 - + subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 12 @@ -697,8 +697,8 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -718,7 +718,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & + dx(i,11) * u(11,j,1,e) & - + dx(i,12) * u(12,j,1,e) + + dx(i,12) * u(12,j,1,e) end do end do @@ -757,7 +757,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & + dz(k,12) * u(i,1,12,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -766,7 +766,7 @@ subroutine cpu_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -784,8 +784,8 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -804,7 +804,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & + dx(i,10) * u(10,j,1,e) & - + dx(i,11) * u(11,j,1,e) + + dx(i,11) * u(11,j,1,e) end do end do @@ -821,7 +821,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & + dy(j,8) * u(i,8,k,e) & + dy(j,9) * u(i,9,k,e) & + dy(j,10) * u(i,10,k,e) & - + dy(j,11) * u(i,11,k,e) + + dy(j,11) * u(i,11,k,e) end do end do end do @@ -838,10 +838,10 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & + dz(k,8) * u(i,1,8,e) & + dz(k,9) * u(i,1,9,e) & + dz(k,10) * u(i,1,10,e) & - + dz(k,11) * u(i,1,11,e) + + dz(k,11) * u(i,1,11,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -850,7 +850,7 @@ subroutine cpu_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -868,8 +868,8 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -887,7 +887,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & + dx(i,9) * u(9,j,1,e) & - + dx(i,10) * u(10,j,1,e) + + dx(i,10) * u(10,j,1,e) end do end do @@ -903,7 +903,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & + dy(j,9) * u(i,9,k,e) & - + dy(j,10) * u(i,10,k,e) + + dy(j,10) * u(i,10,k,e) end do end do end do @@ -919,10 +919,10 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & + dz(k,7) * u(i,1,7,e) & + dz(k,8) * u(i,1,8,e) & + dz(k,9) * u(i,1,9,e) & - + dz(k,10) * u(i,1,10,e) + + dz(k,10) * u(i,1,10,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -931,7 +931,7 @@ subroutine cpu_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -949,8 +949,8 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -967,7 +967,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & + dx(i,8) * u(8,j,1,e) & - + dx(i,9) * u(9,j,1,e) + + dx(i,9) * u(9,j,1,e) end do end do @@ -982,7 +982,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & + dy(j,6) * u(i,6,k,e) & + dy(j,7) * u(i,7,k,e) & + dy(j,8) * u(i,8,k,e) & - + dy(j,9) * u(i,9,k,e) + + dy(j,9) * u(i,9,k,e) end do end do end do @@ -997,10 +997,10 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & + dz(k,8) * u(i,1,8,e) & - + dz(k,9) * u(i,1,9,e) + + dz(k,9) * u(i,1,9,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1009,7 +1009,7 @@ subroutine cpu_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1027,8 +1027,8 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1044,7 +1044,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & + dx(i,7) * u(7,j,1,e) & - + dx(i,8) * u(8,j,1,e) + + dx(i,8) * u(8,j,1,e) end do end do @@ -1058,7 +1058,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & + dy(j,5) * u(i,5,k,e) & + dy(j,6) * u(i,6,k,e) & + dy(j,7) * u(i,7,k,e) & - + dy(j,8) * u(i,8,k,e) + + dy(j,8) * u(i,8,k,e) end do end do end do @@ -1072,10 +1072,10 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & + dz(k,5) * u(i,1,5,e) & + dz(k,6) * u(i,1,6,e) & + dz(k,7) * u(i,1,7,e) & - + dz(k,8) * u(i,1,8,e) + + dz(k,8) * u(i,1,8,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1084,7 +1084,7 @@ subroutine cpu_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1102,8 +1102,8 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1118,7 +1118,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & + dx(i,4) * u(4,j,1,e) & + dx(i,5) * u(5,j,1,e) & + dx(i,6) * u(6,j,1,e) & - + dx(i,7) * u(7,j,1,e) + + dx(i,7) * u(7,j,1,e) end do end do @@ -1131,7 +1131,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & + dy(j,4) * u(i,4,k,e) & + dy(j,5) * u(i,5,k,e) & + dy(j,6) * u(i,6,k,e) & - + dy(j,7) * u(i,7,k,e) + + dy(j,7) * u(i,7,k,e) end do end do end do @@ -1144,10 +1144,10 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & + dz(k,4) * u(i,1,4,e) & + dz(k,5) * u(i,1,5,e) & + dz(k,6) * u(i,1,6,e) & - + dz(k,7) * u(i,1,7,e) + + dz(k,7) * u(i,1,7,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1156,7 +1156,7 @@ subroutine cpu_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1174,8 +1174,8 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1189,7 +1189,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & + dx(i,3) * u(3,j,1,e) & + dx(i,4) * u(4,j,1,e) & + dx(i,5) * u(5,j,1,e) & - + dx(i,6) * u(6,j,1,e) + + dx(i,6) * u(6,j,1,e) end do end do @@ -1201,7 +1201,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & + dy(j,3) * u(i,3,k,e) & + dy(j,4) * u(i,4,k,e) & + dy(j,5) * u(i,5,k,e) & - + dy(j,6) * u(i,6,k,e) + + dy(j,6) * u(i,6,k,e) end do end do end do @@ -1213,10 +1213,10 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & + dz(k,3) * u(i,1,3,e) & + dz(k,4) * u(i,1,4,e) & + dz(k,5) * u(i,1,5,e) & - + dz(k,6) * u(i,1,6,e) + + dz(k,6) * u(i,1,6,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1225,7 +1225,7 @@ subroutine cpu_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1243,8 +1243,8 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1257,7 +1257,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & + dx(i,2) * u(2,j,1,e) & + dx(i,3) * u(3,j,1,e) & + dx(i,4) * u(4,j,1,e) & - + dx(i,5) * u(5,j,1,e) + + dx(i,5) * u(5,j,1,e) end do end do @@ -1268,7 +1268,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & + dy(j,2) * u(i,2,k,e) & + dy(j,3) * u(i,3,k,e) & + dy(j,4) * u(i,4,k,e) & - + dy(j,5) * u(i,5,k,e) + + dy(j,5) * u(i,5,k,e) end do end do end do @@ -1279,10 +1279,10 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & + dz(k,2) * u(i,1,2,e) & + dz(k,3) * u(i,1,3,e) & + dz(k,4) * u(i,1,4,e) & - + dz(k,5) * u(i,1,5,e) + + dz(k,5) * u(i,1,5,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1291,7 +1291,7 @@ subroutine cpu_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1309,8 +1309,8 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1322,7 +1322,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & ur(i,j,1) = dx(i,1) * u(1,j,1,e) & + dx(i,2) * u(2,j,1,e) & + dx(i,3) * u(3,j,1,e) & - + dx(i,4) * u(4,j,1,e) + + dx(i,4) * u(4,j,1,e) end do end do @@ -1332,7 +1332,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & us(i,j,k) = dy(j,1) * u(i,1,k,e) & + dy(j,2) * u(i,2,k,e) & + dy(j,3) * u(i,3,k,e) & - + dy(j,4) * u(i,4,k,e) + + dy(j,4) * u(i,4,k,e) end do end do end do @@ -1342,10 +1342,10 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & ut(i,1,k) = dz(k,1) * u(i,1,1,e) & + dz(k,2) * u(i,1,2,e) & + dz(k,3) * u(i,1,3,e) & - + dz(k,4) * u(i,1,4,e) + + dz(k,4) * u(i,1,4,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1354,7 +1354,7 @@ subroutine cpu_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1372,8 +1372,8 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1384,7 +1384,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & do i = 1, lx ur(i,j,1) = dx(i,1) * u(1,j,1,e) & + dx(i,2) * u(2,j,1,e) & - + dx(i,3) * u(3,j,1,e) + + dx(i,3) * u(3,j,1,e) end do end do @@ -1393,7 +1393,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & do i = 1, lx us(i,j,k) = dy(j,1) * u(i,1,k,e) & + dy(j,2) * u(i,2,k,e) & - + dy(j,3) * u(i,3,k,e) + + dy(j,3) * u(i,3,k,e) end do end do end do @@ -1402,10 +1402,10 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & do i = 1, lx*lx ut(i,1,k) = dz(k,1) * u(i,1,1,e) & + dz(k,2) * u(i,1,2,e) & - + dz(k,3) * u(i,1,3,e) + + dz(k,3) * u(i,1,3,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1414,7 +1414,7 @@ subroutine cpu_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & @@ -1432,8 +1432,8 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx) real(kind=rp) :: us(lx,lx,lx) real(kind=rp) :: ut(lx,lx,lx) @@ -1443,7 +1443,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & do j = 1, lx * lx do i = 1, lx ur(i,j,1) = dx(i,1) * u(1,j,1,e) & - + dx(i,2) * u(2,j,1,e) + + dx(i,2) * u(2,j,1,e) end do end do @@ -1451,7 +1451,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & do j = 1, lx do i = 1, lx us(i,j,k) = dy(j,1) * u(i,1,k,e) & - + dy(j,2) * u(i,2,k,e) + + dy(j,2) * u(i,2,k,e) end do end do end do @@ -1459,10 +1459,10 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & do k = 1, lx do i = 1, lx*lx ut(i,1,k) = dz(k,1) * u(i,1,1,e) & - + dz(k,2) * u(i,1,2,e) + + dz(k,2) * u(i,1,2,e) end do end do - + do i = 1, lx * lx * lx ux(i,1,1,e) = w3(i,1,1) & * ( drdx(i,1,1,e) * ur(i,1,1) & @@ -1471,7 +1471,7 @@ subroutine cpu_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & uy(i,1,1,e) = w3(i,1,1) & * ( dsdy(i,1,1,e) * us(i,1,1) & + drdy(i,1,1,e) * ur(i,1,1) & - + dtdy(i,1,1,e) * ut(i,1,1) ) + + dtdy(i,1,1,e) * ut(i,1,1) ) uz(i,1,1,e) = w3(i,1,1) & * ( dtdz(i,1,1,e) * ut(i,1,1) & + drdz(i,1,1,e) * ur(i,1,1) & diff --git a/src/math/bcknd/cpu/opr_cpu.f90 b/src/math/bcknd/cpu/opr_cpu.f90 index 9cef11346c6..a9c2985515c 100644 --- a/src/math/bcknd/cpu/opr_cpu.f90 +++ b/src/math/bcknd/cpu/opr_cpu.f90 @@ -40,7 +40,6 @@ module opr_cpu use space, only : space_t use coefs, only : coef_t use math - use mesh, only : mesh_t use field, only : field_t use gather_scatter use mathops @@ -48,8 +47,8 @@ module opr_cpu private public :: opr_cpu_dudxyz, opr_cpu_opgrad, opr_cpu_cdtp, & - opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl - + opr_cpu_conv1, opr_cpu_curl, opr_cpu_cfl, opr_cpu_lambda2 + contains subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef) @@ -62,46 +61,46 @@ subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef) associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) select case(coef%Xh%lx) case(14) - call cpu_dudxyz_lx14(du, u, dr, ds, dt, & + call cpu_dudxyz_lx14(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(13) - call cpu_dudxyz_lx13(du, u, dr, ds, dt, & + call cpu_dudxyz_lx13(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(12) - call cpu_dudxyz_lx12(du, u, dr, ds, dt, & + call cpu_dudxyz_lx12(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(11) - call cpu_dudxyz_lx11(du, u, dr, ds, dt, & + call cpu_dudxyz_lx11(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(10) - call cpu_dudxyz_lx10(du, u, dr, ds, dt, & + call cpu_dudxyz_lx10(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(9) - call cpu_dudxyz_lx9(du, u, dr, ds, dt, & + call cpu_dudxyz_lx9(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(8) - call cpu_dudxyz_lx8(du, u, dr, ds, dt, & + call cpu_dudxyz_lx8(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(7) - call cpu_dudxyz_lx7(du, u, dr, ds, dt, & + call cpu_dudxyz_lx7(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(6) - call cpu_dudxyz_lx6(du, u, dr, ds, dt, & + call cpu_dudxyz_lx6(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(5) - call cpu_dudxyz_lx5(du, u, dr, ds, dt, & + call cpu_dudxyz_lx5(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(4) - call cpu_dudxyz_lx4(du, u, dr, ds, dt, & + call cpu_dudxyz_lx4(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(3) - call cpu_dudxyz_lx3(du, u, dr, ds, dt, & + call cpu_dudxyz_lx3(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case(2) - call cpu_dudxyz_lx2(du, u, dr, ds, dt, & + call cpu_dudxyz_lx2(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv) case default - call cpu_dudxyz_lx(du, u, dr, ds, dt, & + call cpu_dudxyz_lx(du, u, dr, ds, dt, & Xh%dx, Xh%dy, Xh%dz, coef%jacinv, msh%nelv, Xh%lx) end select @@ -109,8 +108,8 @@ subroutine opr_cpu_dudxyz(du, u, dr, ds, dt, coef) end subroutine opr_cpu_dudxyz - subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) - type(coef_t), intent(in) :: coef + subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) + type(coef_t), intent(in) :: coef integer, intent(in) :: e_start, e_end real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: ux real(kind=rp), dimension(coef%Xh%lxyz,e_end-e_start+1), intent(inout) :: uy @@ -122,7 +121,7 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) drdx => coef%drdx, drdy => coef%drdy, drdz => coef%drdz, & dsdx => coef%dsdx, dsdy => coef%dsdy, dsdz => coef%dsdz, & dtdx => coef%dtdx, dtdy => coef%dtdy, dtdz => coef%dtdz) - + select case(Xh%lx) case(18) call cpu_opgrad_lx18(ux, uy, uz, u, & @@ -194,49 +193,49 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(8) call cpu_opgrad_lx8(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(7) call cpu_opgrad_lx7(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(6) call cpu_opgrad_lx6(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(5) call cpu_opgrad_lx5(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(4) call cpu_opgrad_lx4(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(3) call cpu_opgrad_lx3(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len) + Xh%w3, e_len) case(2) call cpu_opgrad_lx2(ux, uy, uz, u, & Xh%dx, Xh%dy, Xh%dz, & @@ -250,7 +249,7 @@ subroutine opr_cpu_opgrad(ux, uy, uz, u, coef, e_start, e_end) drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - Xh%w3, e_len, Xh%lx) + Xh%w3, e_len, Xh%lx) end select end associate @@ -359,25 +358,25 @@ subroutine opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end) drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - jacinv(1,1,1,e_start), e_len) + jacinv(1,1,1,e_start), e_len) case(9) call cpu_conv1_lx9(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - jacinv(1,1,1,e_start), e_len) + jacinv(1,1,1,e_start), e_len) case(8) call cpu_conv1_lx8(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - jacinv(1,1,1,e_start), e_len) + jacinv(1,1,1,e_start), e_len) case(7) call cpu_conv1_lx7(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & drdy(1,1,1,e_start), dsdy(1,1,1,e_start), dtdy(1,1,1,e_start), & drdz(1,1,1,e_start), dsdz(1,1,1,e_start), dtdz(1,1,1,e_start), & - jacinv(1,1,1,e_start), e_len) + jacinv(1,1,1,e_start), e_len) case(6) call cpu_conv1_lx6(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, & drdx(1,1,1,e_start), dsdx(1,1,1,e_start), dtdx(1,1,1,e_start), & @@ -416,7 +415,7 @@ subroutine opr_cpu_conv1(du, u, vx, vy, vz, Xh, coef, e_start, e_end) jacinv(1,1,1,e_start), e_len, Xh%lx) end select end associate - + end subroutine opr_cpu_conv1 subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) @@ -459,9 +458,9 @@ subroutine opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) !! BC dependent, Needs to change if cyclic call opcolv(w1%x, w2%x, w3%x, c_Xh%B, gdim, n) - call c_Xh%gs_h%op(w1, GS_OP_ADD) - call c_Xh%gs_h%op(w2, GS_OP_ADD) - call c_Xh%gs_h%op(w3, GS_OP_ADD) + call c_Xh%gs_h%op(w1, GS_OP_ADD) + call c_Xh%gs_h%op(w2, GS_OP_ADD) + call c_Xh%gs_h%op(w3, GS_OP_ADD) call opcolv(w1%x, w2%x, w3%x, c_Xh%Binv, gdim, n) end subroutine opr_cpu_curl @@ -491,11 +490,11 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) ut = ( u(i,j,k,e)*coef%dtdx(i,j,k,e) & + v(i,j,k,e)*coef%dtdy(i,j,k,e) & + w(i,j,k,e)*coef%dtdz(i,j,k,e) ) * coef%jacinv(i,j,k,e) - + cflr = abs(dt*ur*Xh%dr_inv(i)) cfls = abs(dt*us*Xh%ds_inv(j)) cflt = abs(dt*ut*Xh%dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -513,7 +512,7 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) cflr = abs(dt*ur*Xh%dr_inv(i)) cfls = abs(dt*us*Xh%ds_inv(j)) - + cflm = cflr + cfls cfl = max(cfl,cflm) @@ -523,4 +522,79 @@ function opr_cpu_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) end if end function opr_cpu_cfl + subroutine opr_cpu_lambda2(lambda2, u, v, w, coef) + type(coef_t), intent(in) :: coef + type(field_t), intent(inout) :: lambda2 + type(field_t), intent(in) :: u, v, w + real(kind=rp) :: grad(coef%Xh%lxyz,3,3) + integer :: temp_indices(9), e, i, ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + + do e = 1, coef%msh%nelv + call opr_cpu_opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), & + u%x(1,1,1,e),coef,e,e) + call opr_cpu_opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), & + v%x(1,1,1,e),coef,e,e) + call opr_cpu_opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), & + w%x(1,1,1,e),coef,e,e) + + do i = 1, coef%Xh%lxyz + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2%x(i,1,1,e) = l2/(coef%B(i,1,1,e)**2) + end do + end do + + end subroutine opr_cpu_lambda2 + end module opr_cpu diff --git a/src/math/bcknd/cpu/tensor_cpu.f90 b/src/math/bcknd/cpu/tensor_cpu.f90 index 8ef02a12b14..df64c0108b6 100644 --- a/src/math/bcknd/cpu/tensor_cpu.f90 +++ b/src/math/bcknd/cpu/tensor_cpu.f90 @@ -5,7 +5,7 @@ module tensor_cpu private public :: tnsr2d_el_cpu, tnsr3d_el_cpu, tnsr3d_cpu, tnsr1_3d_cpu - + contains subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt) @@ -16,9 +16,9 @@ subroutine tnsr2d_el_cpu(v, nv, u, nu, A, Bt) call mxm(A, nv, u, nu, work, nu) call mxm(work, nv, Bt, nu, v, nv) - + end subroutine tnsr2d_el_cpu - + subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct) integer, intent(in) :: nv, nu real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu) @@ -58,7 +58,7 @@ subroutine tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct) else call tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct) end if - + end subroutine tnsr3d_el_cpu subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct) @@ -70,20 +70,20 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct) integer :: i, j, k, l, nunu, nvnu, nvnv integer :: ii, jj nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv do j = 1, nunu do i = 1, nv ii = i + nv * (j - 1) - tmp = 0.0_rp + tmp = 0.0_rp do k = 1, nu tmp = tmp + A(i,k) * u(k + nu * (j - 1)) end do work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -97,7 +97,7 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) @@ -109,19 +109,19 @@ subroutine tnsr3d_el_nvnu_cpu(v, nv, u, nu, A, Bt, Ct) v(jj) = tmp end do end do - + end subroutine tnsr3d_el_nvnu_cpu subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n) integer, intent(in) :: n real(kind=rp), intent(inout) :: v(n*n*n), u(n*n*n) real(kind=rp), intent(inout) :: A(n,n), Bt(n,n), Ct(n,n) - real(kind=rp) :: work(n**3), work2(n**3), tmp + real(kind=rp) :: work(n**3), work2(n**3), tmp integer :: i, j, l, k integer :: ii, jj, nn nn = n**2 - + do j = 1, nn do i = 1, n ii = i + n * (j - 1) @@ -132,7 +132,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n) work(ii) = tmp end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -145,7 +145,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -156,7 +156,7 @@ subroutine tnsr3d_el_n_cpu(v, u, A, Bt, Ct, n) v(jj) = tmp end do end do - + end subroutine tnsr3d_el_n_cpu subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct) @@ -184,10 +184,10 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct) + A(i,11) * u(11 + n * (j - 1)) & + A(i,12) * u(12 + n * (j - 1)) & + A(i,13) * u(13 + n * (j - 1)) & - + A(i,14) * u(14 + n * (j - 1)) + + A(i,14) * u(14 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -205,11 +205,11 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct) + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) & + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) & + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j) & - + work(l + n * (14 - 1) + nn * (i - 1)) * Bt(14,j) + + work(l + n * (14 - 1) + nn * (i - 1)) * Bt(14,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -226,10 +226,10 @@ subroutine tnsr3d_el_n14_cpu(v, u, A, Bt, Ct) + work2(i + nn * (11 - 1)) * Ct(11, j) & + work2(i + nn * (12 - 1)) * Ct(12, j) & + work2(i + nn * (13 - 1)) * Ct(13, j) & - + work2(i + nn * (14 - 1)) * Ct(14, j) + + work2(i + nn * (14 - 1)) * Ct(14, j) end do end do - + end subroutine tnsr3d_el_n14_cpu subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct) @@ -256,10 +256,10 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct) + A(i,10) * u(10 + n * (j - 1)) & + A(i,11) * u(11 + n * (j - 1)) & + A(i,12) * u(12 + n * (j - 1)) & - + A(i,13) * u(13 + n * (j - 1)) + + A(i,13) * u(13 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -276,11 +276,11 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct) + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) & + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) & + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) & - + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j) + + work(l + n * (13 - 1) + nn * (i - 1)) * Bt(13,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -296,10 +296,10 @@ subroutine tnsr3d_el_n13_cpu(v, u, A, Bt, Ct) + work2(i + nn * (10 - 1)) * Ct(10, j) & + work2(i + nn * (11 - 1)) * Ct(11, j) & + work2(i + nn * (12 - 1)) * Ct(12, j) & - + work2(i + nn * (13 - 1)) * Ct(13, j) + + work2(i + nn * (13 - 1)) * Ct(13, j) end do end do - + end subroutine tnsr3d_el_n13_cpu subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct) @@ -325,10 +325,10 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct) + A(i,9) * u(9 + n * (j - 1)) & + A(i,10) * u(10 + n * (j - 1)) & + A(i,11) * u(11 + n * (j - 1)) & - + A(i,12) * u(12 + n * (j - 1)) + + A(i,12) * u(12 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -344,11 +344,11 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct) + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) & + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) & + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) & - + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) + + work(l + n * (12 - 1) + nn * (i - 1)) * Bt(12,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -363,10 +363,10 @@ subroutine tnsr3d_el_n12_cpu(v, u, A, Bt, Ct) + work2(i + nn * (9 - 1)) * Ct(9, j) & + work2(i + nn * (10 - 1)) * Ct(10, j) & + work2(i + nn * (11 - 1)) * Ct(11, j) & - + work2(i + nn * (12 - 1)) * Ct(12, j) + + work2(i + nn * (12 - 1)) * Ct(12, j) end do end do - + end subroutine tnsr3d_el_n12_cpu subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct) @@ -391,10 +391,10 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct) + A(i,8) * u(8 + n * (j - 1)) & + A(i,9) * u(9 + n * (j - 1)) & + A(i,10) * u(10 + n * (j - 1)) & - + A(i,11) * u(11 + n * (j - 1)) + + A(i,11) * u(11 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -409,11 +409,11 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct) + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) & + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) & + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) & - + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) + + work(l + n * (11 - 1) + nn * (i - 1)) * Bt(11,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -427,10 +427,10 @@ subroutine tnsr3d_el_n11_cpu(v, u, A, Bt, Ct) + work2(i + nn * (8 - 1)) * Ct(8, j) & + work2(i + nn * (9 - 1)) * Ct(9, j) & + work2(i + nn * (10 - 1)) * Ct(10, j) & - + work2(i + nn * (11 - 1)) * Ct(11, j) + + work2(i + nn * (11 - 1)) * Ct(11, j) end do end do - + end subroutine tnsr3d_el_n11_cpu subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct) @@ -454,10 +454,10 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct) + A(i,7) * u(7 + n * (j - 1)) & + A(i,8) * u(8 + n * (j - 1)) & + A(i,9) * u(9 + n * (j - 1)) & - + A(i,10) * u(10 + n * (j - 1)) + + A(i,10) * u(10 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -471,11 +471,11 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct) + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) & + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) & + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) & - + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) + + work(l + n * (10 - 1) + nn * (i - 1)) * Bt(10,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -488,10 +488,10 @@ subroutine tnsr3d_el_n10_cpu(v, u, A, Bt, Ct) + work2(i + nn * (7 - 1)) * Ct(7, j) & + work2(i + nn * (8 - 1)) * Ct(8, j) & + work2(i + nn * (9 - 1)) * Ct(9, j) & - + work2(i + nn * (10 - 1)) * Ct(10, j) + + work2(i + nn * (10 - 1)) * Ct(10, j) end do end do - + end subroutine tnsr3d_el_n10_cpu subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct) @@ -514,10 +514,10 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct) + A(i,6) * u(6 + n * (j - 1)) & + A(i,7) * u(7 + n * (j - 1)) & + A(i,8) * u(8 + n * (j - 1)) & - + A(i,9) * u(9 + n * (j - 1)) + + A(i,9) * u(9 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -530,11 +530,11 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct) + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) & + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) & + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) & - + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) + + work(l + n * (9 - 1) + nn * (i - 1)) * Bt(9,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -546,10 +546,10 @@ subroutine tnsr3d_el_n9_cpu(v, u, A, Bt, Ct) + work2(i + nn * (6 - 1)) * Ct(6, j) & + work2(i + nn * (7 - 1)) * Ct(7, j) & + work2(i + nn * (8 - 1)) * Ct(8, j) & - + work2(i + nn * (9 - 1)) * Ct(9, j) + + work2(i + nn * (9 - 1)) * Ct(9, j) end do end do - + end subroutine tnsr3d_el_n9_cpu subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct) @@ -571,10 +571,10 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct) + A(i,5) * u(5 + n * (j - 1)) & + A(i,6) * u(6 + n * (j - 1)) & + A(i,7) * u(7 + n * (j - 1)) & - + A(i,8) * u(8 + n * (j - 1)) + + A(i,8) * u(8 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -586,11 +586,11 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct) + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) & + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) & + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) & - + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) + + work(l + n * (8 - 1) + nn * (i - 1)) * Bt(8,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -601,10 +601,10 @@ subroutine tnsr3d_el_n8_cpu(v, u, A, Bt, Ct) + work2(i + nn * (5 - 1)) * Ct(5, j) & + work2(i + nn * (6 - 1)) * Ct(6, j) & + work2(i + nn * (7 - 1)) * Ct(7, j) & - + work2(i + nn * (8 - 1)) * Ct(8, j) + + work2(i + nn * (8 - 1)) * Ct(8, j) end do end do - + end subroutine tnsr3d_el_n8_cpu subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct) @@ -625,10 +625,10 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct) + A(i,4) * u(4 + n * (j - 1)) & + A(i,5) * u(5 + n * (j - 1)) & + A(i,6) * u(6 + n * (j - 1)) & - + A(i,7) * u(7 + n * (j - 1)) + + A(i,7) * u(7 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -639,11 +639,11 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct) + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) & + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) & + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) & - + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) + + work(l + n * (7 - 1) + nn * (i - 1)) * Bt(7,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -653,10 +653,10 @@ subroutine tnsr3d_el_n7_cpu(v, u, A, Bt, Ct) + work2(i + nn * (4 - 1)) * Ct(4, j) & + work2(i + nn * (5 - 1)) * Ct(5, j) & + work2(i + nn * (6 - 1)) * Ct(6, j) & - + work2(i + nn * (7 - 1)) * Ct(7, j) + + work2(i + nn * (7 - 1)) * Ct(7, j) end do end do - + end subroutine tnsr3d_el_n7_cpu subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct) @@ -676,10 +676,10 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct) + A(i,3) * u(3 + n * (j - 1)) & + A(i,4) * u(4 + n * (j - 1)) & + A(i,5) * u(5 + n * (j - 1)) & - + A(i,6) * u(6 + n * (j - 1)) + + A(i,6) * u(6 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -689,11 +689,11 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct) + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) & + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) & + work(l + n * (5 - 1) + nn * (i - 1)) * Bt(5,j) & - + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) + + work(l + n * (6 - 1) + nn * (i - 1)) * Bt(6,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -702,10 +702,10 @@ subroutine tnsr3d_el_n6_cpu(v, u, A, Bt, Ct) + work2(i + nn * (3 - 1)) * Ct(3, j) & + work2(i + nn * (4 - 1)) * Ct(4, j) & + work2(i + nn * (5 - 1)) * Ct(5, j) & - + work2(i + nn * (6 - 1)) * Ct(6, j) + + work2(i + nn * (6 - 1)) * Ct(6, j) end do end do - + end subroutine tnsr3d_el_n6_cpu subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct) @@ -724,10 +724,10 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct) + A(i,2) * u(2 + n * (j - 1)) & + A(i,3) * u(3 + n * (j - 1)) & + A(i,4) * u(4 + n * (j - 1)) & - + A(i,5) * u(5 + n * (j - 1)) + + A(i,5) * u(5 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -740,7 +740,7 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) @@ -748,10 +748,10 @@ subroutine tnsr3d_el_n5_cpu(v, u, A, Bt, Ct) + work2(i + nn * (2 - 1)) * Ct(2, j) & + work2(i + nn * (3 - 1)) * Ct(3, j) & + work2(i + nn * (4 - 1)) * Ct(4, j) & - + work2(i + nn * (5 - 1)) * Ct(5, j) + + work2(i + nn * (5 - 1)) * Ct(5, j) end do end do - + end subroutine tnsr3d_el_n5_cpu subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct) @@ -769,10 +769,10 @@ subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct) work(ii) = A(i,1) * u(1 + n * (j - 1)) & + A(i,2) * u(2 + n * (j - 1)) & + A(i,3) * u(3 + n * (j - 1)) & - + A(i,4) * u(4 + n * (j - 1)) + + A(i,4) * u(4 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -780,21 +780,21 @@ subroutine tnsr3d_el_n4_cpu(v, u, A, Bt, Ct) work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) & + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) & + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) & - + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) + + work(l + n * (4 - 1) + nn * (i - 1)) * Bt(4,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) & + work2(i + nn * (2 - 1)) * Ct(2, j) & + work2(i + nn * (3 - 1)) * Ct(3, j) & - + work2(i + nn * (4 - 1)) * Ct(4, j) + + work2(i + nn * (4 - 1)) * Ct(4, j) end do end do - + end subroutine tnsr3d_el_n4_cpu subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct) @@ -811,30 +811,30 @@ subroutine tnsr3d_el_n3_cpu(v, u, A, Bt, Ct) ii = i + n * (j - 1) work(ii) = A(i,1) * u(1 + n * (j - 1)) & + A(i,2) * u(2 + n * (j - 1)) & - + A(i,3) * u(3 + n * (j - 1)) + + A(i,3) * u(3 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n ii = l + n * (j - 1) + nn * (i - 1) work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) & + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) & - + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) + + work(l + n * (3 - 1) + nn * (i - 1)) * Bt(3,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) & + work2(i + nn * (2 - 1)) * Ct(2, j) & - + work2(i + nn * (3 - 1)) * Ct(3, j) + + work2(i + nn * (3 - 1)) * Ct(3, j) end do end do - + end subroutine tnsr3d_el_n3_cpu subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct) @@ -850,30 +850,30 @@ subroutine tnsr3d_el_n2_cpu(v, u, A, Bt, Ct) do i = 1, n ii = i + n * (j - 1) work(ii) = A(i,1) * u(1 + n * (j - 1)) & - + A(i,2) * u(2 + n * (j - 1)) + + A(i,2) * u(2 + n * (j - 1)) end do end do - + do i = 1, n do j = 1, n do l = 1, n ii = l + n * (j - 1) + nn * (i - 1) work2(ii) = work(l + n * (1 - 1) + nn * (i - 1)) * Bt(1,j) & - + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) + + work(l + n * (2 - 1) + nn * (i - 1)) * Bt(2,j) end do end do end do - + do j = 1, n do i = 1, nn jj = i + nn * (j - 1) v(jj) = work2(i + nn * (1 - 1)) * Ct(1, j) & - + work2(i + nn * (2 - 1)) * Ct(2, j) + + work2(i + nn * (2 - 1)) * Ct(2, j) end do end do - + end subroutine tnsr3d_el_n2_cpu - + subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv) integer, intent(in) :: nv, nu, nelv real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv) @@ -888,7 +888,7 @@ subroutine tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv) end if end subroutine tnsr3d_cpu - + subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv) integer, intent(in) :: nv, nu, nelv real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv) @@ -898,9 +898,9 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv) integer :: nunu, nvnu, nvnv nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv - + do ie = 1,nelv do j = 1, nunu do i = 1, nv @@ -912,7 +912,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv) work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -926,7 +926,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) @@ -939,7 +939,7 @@ subroutine tnsr3d_nvnu_cpu(v, nv, u, nu, A, Bt, Ct, nelv) end do end do end do - + end subroutine tnsr3d_nvnu_cpu subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv) @@ -953,16 +953,16 @@ subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv) real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv) real(kind=rp) :: work(nu**2*nv), work2(nu*nv**2), tmp integer :: ie, i, j, k, l, ii, jj - + do ie = 1,nelv do j = 1, nunu do i = 1, nv ii = i + nv * (j - 1) work(ii) = A(i,1) * u(1 + nu * (j - 1), ie) & - + A(i,2) * u(2 + nu * (j - 1), ie) + + A(i,2) * u(2 + nu * (j - 1), ie) end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -976,16 +976,16 @@ subroutine tnsr3d_nu2nv4_cpu(v, u, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) v(jj, ie) = work2(i + nvnv * (1 - 1)) * Ct(1, j) & - + work2(i + nvnv * (2 - 1)) * Ct(2, j) + + work2(i + nvnv * (2 - 1)) * Ct(2, j) end do end do end do - + end subroutine tnsr3d_nu2nv4_cpu subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv) @@ -1000,7 +1000,7 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv) nvnu = nv * nu nvnv = nv * nv - + do ie = 1,nelv do j = 1, nunu do i = 1, nv @@ -1008,10 +1008,10 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv) work(ii) = A(i,1) * u(1 + nu * (j - 1), ie) & + A(i,2) * u(2 + nu * (j - 1), ie) & + A(i,3) * u(3 + nu * (j - 1), ie) & - + A(i,4) * u(4 + nu * (j - 1), ie) + + A(i,4) * u(4 + nu * (j - 1), ie) end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -1025,14 +1025,14 @@ subroutine tnsr3d_nu4_cpu(v, nv, u, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) v(jj, ie) = work2(i + nvnv * (1 - 1)) * Ct(1, j) & + work2(i + nvnv * (2 - 1)) * Ct(2, j) & + work2(i + nvnv * (3 - 1)) * Ct(3, j) & - + work2(i + nvnv * (4 - 1)) * Ct(4, j) + + work2(i + nvnv * (4 - 1)) * Ct(4, j) end do end do end do @@ -1049,10 +1049,10 @@ subroutine tnsr1_3d_cpu(v, nv, nu, A, Bt, Ct, nelv) else call tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) end if - + end subroutine tnsr1_3d_cpu - subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) + subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) integer, intent(in) :: nv, nu, nelv real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv) real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv) @@ -1063,26 +1063,26 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) real(kind=rp) :: tmp nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv - + e0 = 1 es = 1 ee = nelv - + if (nv.gt.nu) then e0 = nelv es = -1 ee = 1 endif - + nu3 = nu**3 nv3 = nv**3 do e = e0,ee,es iu = (e-1)*nu3 iv = (e-1)*nv3 - + do j = 1, nunu do i = 1, nv ii = i + nv * (j - 1) @@ -1094,7 +1094,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -1108,7 +1108,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) + iv @@ -1121,7 +1121,7 @@ subroutine tnsr1_3d_nvnu_cpu(v, nv, nu, A, Bt, Ct, nelv) end do end do end do - + end subroutine tnsr1_3d_nvnu_cpu subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv) @@ -1143,17 +1143,17 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv) do e = 1,nelv iu = (e-1)*nununu iv = (e-1)*nvnvnv - + do j = 1, nunu do i = 1, nv ii = i + nv * (j - 1) work(ii) = A(i,1) * v(1 + nu * (j - 1) + iu) & + A(i,2) * v(2 + nu * (j - 1) + iu) & + A(i,3) * v(3 + nu * (j - 1) + iu) & - + A(i,4) * v(4 + nu * (j - 1) + iu) + + A(i,4) * v(4 + nu * (j - 1) + iu) end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -1167,7 +1167,7 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) + iv @@ -1175,11 +1175,11 @@ subroutine tnsr1_3d_nu4nv2_cpu(v, A, Bt, Ct, nelv) + work2(i + nvnv * (2 - 1)) * Ct(2, j) & + work2(i + nvnv * (3 - 1)) * Ct(3, j) & + work2(i + nvnv * (4 - 1)) * Ct(4, j) - + end do end do end do - + end subroutine tnsr1_3d_nu4nv2_cpu - + end module tensor_cpu diff --git a/src/math/bcknd/device/ax_helm_device.F90 b/src/math/bcknd/device/ax_helm_device.F90 index f0e76e75653..4fa9dbce984 100644 --- a/src/math/bcknd/device/ax_helm_device.F90 +++ b/src/math/bcknd/device/ax_helm_device.F90 @@ -35,7 +35,6 @@ module ax_helm_device use num_types, only : rp use coefs, only : coef_t use space, only : space_t - use field, only : field_t use mesh, only : mesh_t use device_math, only : device_addcol4 use device, only : device_get_ptr @@ -43,14 +42,14 @@ module ax_helm_device use, intrinsic :: iso_c_binding, only : c_ptr, c_int implicit none private - + type, public, extends(ax_t) :: ax_helm_device_t contains procedure, nopass :: compute => ax_helm_device_compute end type ax_helm_device_t #ifdef HAVE_HIP - interface + interface subroutine hip_ax_helm(w_d, u_d, & dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d, & h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) & @@ -64,7 +63,7 @@ subroutine hip_ax_helm(w_d, u_d, & end subroutine hip_ax_helm end interface #elif HAVE_CUDA - interface + interface subroutine cuda_ax_helm(w_d, u_d, & dx_d, dy_d, dz_d, dxt_d, dyt_d, dzt_d,& h1_d, g11_d, g22_d, g33_d, g12_d, g13_d, g23_d, nelv, lx) & @@ -129,9 +128,9 @@ subroutine ax_helm_device_compute(w, u, coef, msh, Xh) if (coef%ifh2) then call device_addcol4(w_d ,coef%h2_d, coef%B_d, u_d, coef%dof%size()) end if - + end subroutine ax_helm_device_compute - + end module ax_helm_device diff --git a/src/math/bcknd/device/cuda/ax_helm.cu b/src/math/bcknd/device/cuda/ax_helm.cu index 4872ddcef9b..58f01824954 100644 --- a/src/math/bcknd/device/cuda/ax_helm.cu +++ b/src/math/bcknd/device/cuda/ax_helm.cu @@ -137,16 +137,21 @@ extern "C" { CASE_KSTEP(LX); \ break +#define CASE_LARGE_PADDED(LX) \ + case LX: \ + CASE_KSTEP_PADDED(LX); \ + break + if ((*lx) < 12) { switch(*lx) { CASE(2); CASE(3); - CASE(4); + CASE_PADDED(4); CASE(5); CASE(6); CASE(7); - CASE(8); + CASE_PADDED(8); CASE(9); CASE(10); CASE(11); @@ -163,7 +168,7 @@ extern "C" { CASE_LARGE(13); CASE_LARGE(14); CASE_LARGE(15); - CASE_LARGE(16); + CASE_LARGE_PADDED(16); default: { fprintf(stderr, __FILE__ ": size not supported: %d\n", *lx); diff --git a/src/math/bcknd/device/cuda/ax_helm_kernel.h b/src/math/bcknd/device/cuda/ax_helm_kernel.h index 9b0ab0fe557..57ece6bf669 100644 --- a/src/math/bcknd/device/cuda/ax_helm_kernel.h +++ b/src/math/bcknd/device/cuda/ax_helm_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_AX_HELM_KERNEL_H__ +#define __MATH_AX_HELM_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -344,3 +346,5 @@ __global__ void ax_helm_kernel_kstep_padded(T * __restrict__ w, w[ij + k*LX*LX + ele] = rw[k]; } } + +#endif // __MATH_AX_HELM_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/cdtp_kernel.h b/src/math/bcknd/device/cuda/cdtp_kernel.h index a8d41681256..8f79d5da6e5 100644 --- a/src/math/bcknd/device/cuda/cdtp_kernel.h +++ b/src/math/bcknd/device/cuda/cdtp_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CDTP_KERNEL_H__ +#define __MATH_CDTP_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -124,7 +126,6 @@ __global__ void __launch_bounds__(LX*LX,3) T rtar[LX]; T rtas[LX]; T rtat[LX]; - T rjac[LX]; const int e = blockIdx.x; const int j = threadIdx.y; @@ -174,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3) } } + +#endif // __MATH_CDTP_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/cfl_kernel.h b/src/math/bcknd/device/cuda/cfl_kernel.h index 436e9ed0365..5daf2416b45 100644 --- a/src/math/bcknd/device/cuda/cfl_kernel.h +++ b/src/math/bcknd/device/cuda/cfl_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CFL_KERNEL_H__ +#define __MATH_CFL_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -179,3 +181,5 @@ __global__ void cfl_kernel(const T dt, cfl_h[blockIdx.x] = cfl_tmp; } + +#endif // __MATH_CFL_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/conv1_kernel.h b/src/math/bcknd/device/cuda/conv1_kernel.h index c094ab98fe1..d77c1cde6aa 100644 --- a/src/math/bcknd/device/cuda/conv1_kernel.h +++ b/src/math/bcknd/device/cuda/conv1_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CONV1_KERNEL_H__ +#define __MATH_CONV1_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -210,3 +212,5 @@ __global__ void __launch_bounds__(LX*LX,3) } } + +#endif // __MATH_CONV1_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/dudxyz_kernel.h b/src/math/bcknd/device/cuda/dudxyz_kernel.h index a14123f5f6c..b42d111a217 100644 --- a/src/math/bcknd/device/cuda/dudxyz_kernel.h +++ b/src/math/bcknd/device/cuda/dudxyz_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_DUDXYZ_KERNEL_H__ +#define __MATH_DUDXYZ_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -173,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3) __syncthreads(); } } + +#endif // __MATH_DUDXYZ_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/fdm_kernel.h b/src/math/bcknd/device/cuda/fdm_kernel.h index 5c5d82675a5..bcdcbdc963c 100644 --- a/src/math/bcknd/device/cuda/fdm_kernel.h +++ b/src/math/bcknd/device/cuda/fdm_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_FDM_KERNEL_H__ +#define __MATH_FDM_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -135,3 +137,5 @@ __global__ void fdm_do_fast_kernel(T * __restrict__ e, } + +#endif // __MATH_FDM_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/lambda2_kernel.h b/src/math/bcknd/device/cuda/lambda2_kernel.h index b6598316e38..327f947fca2 100644 --- a/src/math/bcknd/device/cuda/lambda2_kernel.h +++ b/src/math/bcknd/device/cuda/lambda2_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_LAMBDA2_KERNEL_H__ +#define __MATH_LAMBDA2_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -358,3 +360,5 @@ lambda2_kernel_kstep(T * __restrict__ lambda2, } } + +#endif // __MATH_LAMBDA2_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/math.cu b/src/math/bcknd/device/cuda/math.cu index 67ae4c8159c..f71ef3869c8 100644 --- a/src/math/bcknd/device/cuda/math.cu +++ b/src/math/bcknd/device/cuda/math.cu @@ -52,6 +52,21 @@ extern "C" { (cudaStream_t) glb_cmd_queue)); } + /** Fortran wrapper for masked copy + * Copy a vector \f$ a(mask) = b(mask) \f$ + */ + void cuda_masked_copy(void *a, void *b, void *mask, int *n, int *m) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1); + + masked_copy_kernel<<>>((real *) a, (real*) b,(int*) mask, *n, *m); + CUDA_CHECK(cudaGetLastError()); + + } + + /** Fortran wrapper for rzero * Zero a real vector */ @@ -371,6 +386,40 @@ extern "C" { real * bufred = NULL; real * bufred_d = NULL; + /** + * Fortran wrapper vlsc3 + * Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ + */ + real cuda_vlsc3(void *u, void *v, void *w, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const int nb = ((*n) + 1024 - 1)/ 1024; + const cudaStream_t stream = (cudaStream_t) glb_cmd_queue; + + if ( nb > red_s){ + red_s = nb; + if (bufred != NULL) { + CUDA_CHECK(cudaFreeHost(bufred)); + CUDA_CHECK(cudaFree(bufred_d)); + } + CUDA_CHECK(cudaMallocHost(&bufred,nb*sizeof(real))); + CUDA_CHECK(cudaMalloc(&bufred_d, nb*sizeof(real))); + } + + glsc3_kernel<<>> + ((real *) u, (real *) v, (real *) w, bufred_d, *n); + CUDA_CHECK(cudaGetLastError()); + reduce_kernel<<<1, 1024, 0, stream>>> (bufred_d, nb); + CUDA_CHECK(cudaGetLastError()); + + CUDA_CHECK(cudaMemcpyAsync(bufred, bufred_d, sizeof(real), + cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); + + return bufred[0]; + } + /** * Fortran wrapper glsc3 * Weighted inner product \f$ a^T b c \f$ diff --git a/src/math/bcknd/device/cuda/math_kernel.h b/src/math/bcknd/device/cuda/math_kernel.h index 9f355d82bbf..f1d14405016 100644 --- a/src/math/bcknd/device/cuda/math_kernel.h +++ b/src/math/bcknd/device/cuda/math_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_MATH_KERNEL_H__ +#define __MATH_MATH_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -48,6 +50,25 @@ __global__ void cmult_kernel(T * __restrict__ a, } } +/** + * Device kernel for masked copy + */ +template< typename T > +__global__ void masked_copy_kernel(T * __restrict__ a, + T * __restrict__ b, + int * __restrict__ mask, + const int n, + const int m) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < m; i += str) { + a[mask[i+1]-1] = b[mask[i+1]-1]; + } +} + + /** * Device kernel for cmult2 */ @@ -424,7 +445,6 @@ __global__ void reduce_kernel(T * bufred, const int n) { bufred[blockIdx.x] = sum; } - /** * Reduction kernel for glsc3 */ @@ -606,3 +626,5 @@ __global__ void glsum_kernel(const T * a, buf_h[blockIdx.x] = sum; } + +#endif // __MATH_MATH_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/mathops_kernel.h b/src/math/bcknd/device/cuda/mathops_kernel.h index d691e3613a7..0341aa6f63d 100644 --- a/src/math/bcknd/device/cuda/mathops_kernel.h +++ b/src/math/bcknd/device/cuda/mathops_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_MATHOPS_KERNEL_H__ +#define __MATH_MATHOPS_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -192,3 +194,5 @@ __global__ void opadd2col_kernel(T * __restrict__ a1, } + +#endif // __MATH_MATHOPS_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/opgrad_kernel.h b/src/math/bcknd/device/cuda/opgrad_kernel.h index 1667a4ed00b..1eda4cfd4de 100644 --- a/src/math/bcknd/device/cuda/opgrad_kernel.h +++ b/src/math/bcknd/device/cuda/opgrad_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_OPGRAD_KERNEL_H__ +#define __MATH_OPGRAD_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -197,3 +199,5 @@ opgrad_kernel_kstep(T * __restrict__ ux, } } + +#endif // __MATH_OPGRAD_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/schwarz_kernel.h b/src/math/bcknd/device/cuda/schwarz_kernel.h index e54d868d44f..e3a25ccdbb6 100644 --- a/src/math/bcknd/device/cuda/schwarz_kernel.h +++ b/src/math/bcknd/device/cuda/schwarz_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_SCHWARZ_KERNEL_H__ +#define __MATH_SCHWARZ_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -126,3 +128,5 @@ __global__ void schwarz_toreg3d_kernel(T * __restrict__ b, b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2]; } } + +#endif // __MATH_SCHWARZ_KERNEL_H__ diff --git a/src/math/bcknd/device/cuda/tensor_kernel.h b/src/math/bcknd/device/cuda/tensor_kernel.h index 2ae5b80f08f..4e7b58b49b4 100644 --- a/src/math/bcknd/device/cuda/tensor_kernel.h +++ b/src/math/bcknd/device/cuda/tensor_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_TENSOR_KERNEL_H__ +#define __MATH_TENSOR_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -206,3 +208,5 @@ __global__ void tnsr3d_kernel_large(T * __restrict__ v, } + +#endif // __MATH_TENSOR_KERNEL_H__ diff --git a/src/math/bcknd/device/device_math.F90 b/src/math/bcknd/device/device_math.F90 index ae9b9605a2a..2c034602078 100644 --- a/src/math/bcknd/device/device_math.F90 +++ b/src/math/bcknd/device/device_math.F90 @@ -33,7 +33,7 @@ module device_math use comm use utils, only : neko_error - use num_types, only : rp, c_rp + use num_types, only : rp, c_rp use, intrinsic :: iso_c_binding implicit none private @@ -48,6 +48,15 @@ subroutine hip_copy(a_d, b_d, n) & end subroutine hip_copy end interface + interface + subroutine hip_masked_copy(a_d, b_d, mask_d, n, m) & + bind(c, name='hip_masked_copy') + use, intrinsic :: iso_c_binding + type(c_ptr), value :: a_d, b_d, mask_d + integer(c_int) :: n, m + end subroutine hip_masked_copy + end interface + interface subroutine hip_cmult(a_d, c, n) & bind(c, name='hip_cmult') @@ -91,7 +100,7 @@ subroutine hip_cfill(a_d, c, n) & integer(c_int) :: n end subroutine hip_cfill end interface - + interface subroutine hip_rzero(a_d, n) & bind(c, name='hip_rzero') @@ -100,23 +109,23 @@ subroutine hip_rzero(a_d, n) & integer(c_int) :: n end subroutine hip_rzero end interface - + interface subroutine hip_add2(a_d, b_d, n) & bind(c, name='hip_add2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d integer(c_int) :: n end subroutine hip_add2 end interface - + interface subroutine hip_add2s1(a_d, b_d, c1, n) & bind(c, name='hip_add2s1') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d real(c_rp) :: c1 @@ -135,7 +144,7 @@ subroutine hip_add2s2(a_d, b_d, c1, n) & integer(c_int) :: n end subroutine hip_add2s2 end interface - + interface subroutine hip_add2s2_many(y_d,x_d_d,a_d,j,n) & bind(c, name='hip_add2s2_many') @@ -163,7 +172,7 @@ end subroutine hip_addsqr2s2 subroutine hip_add3s2(a_d, b_d, c_d, c1, c2, n) & bind(c, name='hip_add3s2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d, c_d real(c_rp) :: c1, c2 @@ -190,7 +199,7 @@ subroutine hip_invcol2(a_d, b_d, n) & integer(c_int) :: n end subroutine hip_invcol2 end interface - + interface subroutine hip_col2(a_d, b_d, n) & bind(c, name='hip_col2') @@ -200,7 +209,7 @@ subroutine hip_col2(a_d, b_d, n) & integer(c_int) :: n end subroutine hip_col2 end interface - + interface subroutine hip_col3(a_d, b_d, c_d, n) & bind(c, name='hip_col3') @@ -230,7 +239,7 @@ subroutine hip_sub2(a_d, b_d, n) & integer(c_int) :: n end subroutine hip_sub2 end interface - + interface subroutine hip_sub3(a_d, b_d, c_d, n) & bind(c, name='hip_sub3') @@ -270,7 +279,18 @@ subroutine hip_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) & integer(c_int) :: n end subroutine hip_vdot3 end interface - + + interface + real(c_rp) function hip_vlsc3(u_d, v_d, w_d, n) & + bind(c, name='hip_vlsc3') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: u_d, v_d, w_d + integer(c_int) :: n + end function hip_vlsc3 + end interface + interface real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) & bind(c, name='hip_glsc3') @@ -281,7 +301,7 @@ real(c_rp) function hip_glsc3(a_d, b_d, c_d, n) & integer(c_int) :: n end function hip_glsc3 end interface - + interface subroutine hip_glsc3_many(h,w_d,v_d_d,mult_d,j,n) & bind(c, name='hip_glsc3_many') @@ -324,7 +344,14 @@ subroutine cuda_copy(a_d, b_d, n) & integer(c_int) :: n end subroutine cuda_copy end interface - + interface + subroutine cuda_masked_copy(a_d, b_d, mask_d, n, m) & + bind(c, name='cuda_masked_copy') + use, intrinsic :: iso_c_binding + type(c_ptr), value :: a_d, b_d, mask_d + integer(c_int) :: n, m + end subroutine cuda_masked_copy + end interface interface subroutine cuda_cmult(a_d, c, n) & bind(c, name='cuda_cmult') @@ -378,7 +405,7 @@ subroutine cuda_rzero(a_d, n) & integer(c_int) :: n end subroutine cuda_rzero end interface - + interface subroutine cuda_add2(a_d, b_d, n) & bind(c, name='cuda_add2') @@ -389,7 +416,7 @@ subroutine cuda_add2(a_d, b_d, n) & integer(c_int) :: n end subroutine cuda_add2 end interface - + interface subroutine cuda_add2s1(a_d, b_d, c1, n) & bind(c, name='cuda_add2s1') @@ -430,7 +457,7 @@ end subroutine cuda_addsqr2s2 subroutine cuda_add3s2(a_d, b_d, c_d, c1, c2, n) & bind(c, name='cuda_add3s2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d, c_d real(c_rp) :: c1, c2 @@ -451,13 +478,13 @@ end subroutine cuda_invcol1 interface subroutine cuda_invcol2(a_d, b_d, n) & bind(c, name='cuda_invcol2') - use, intrinsic :: iso_c_binding + use, intrinsic :: iso_c_binding implicit none type(c_ptr), value :: a_d, b_d integer(c_int) :: n end subroutine cuda_invcol2 end interface - + interface subroutine cuda_col2(a_d, b_d, n) & bind(c, name='cuda_col2') @@ -467,7 +494,7 @@ subroutine cuda_col2(a_d, b_d, n) & integer(c_int) :: n end subroutine cuda_col2 end interface - + interface subroutine cuda_col3(a_d, b_d, c_d, n) & bind(c, name='cuda_col3') @@ -497,7 +524,7 @@ subroutine cuda_sub2(a_d, b_d, n) & integer(c_int) :: n end subroutine cuda_sub2 end interface - + interface subroutine cuda_sub3(a_d, b_d, c_d, n) & bind(c, name='cuda_sub3') @@ -537,7 +564,18 @@ subroutine cuda_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) & integer(c_int) :: n end subroutine cuda_vdot3 end interface - + + interface + real(c_rp) function cuda_vlsc3(u_d, v_d, w_d, n) & + bind(c, name='cuda_vlsc3') + use, intrinsic :: iso_c_binding + import c_rp + implicit none + type(c_ptr), value :: u_d, v_d, w_d + integer(c_int) :: n + end function cuda_vlsc3 + end interface + interface subroutine cuda_add2s2_many(y_d,x_d_d,a_d,j,n) & bind(c, name='cuda_add2s2_many') @@ -593,7 +631,7 @@ real(c_rp) function cuda_glsum(a_d, n) & end function cuda_glsum end interface #elif HAVE_OPENCL - interface + interface subroutine opencl_copy(a_d, b_d, n) & bind(c, name='opencl_copy') use, intrinsic :: iso_c_binding @@ -602,6 +640,15 @@ subroutine opencl_copy(a_d, b_d, n) & end subroutine opencl_copy end interface + interface + subroutine opencl_masked_copy(a_d, b_d, mask_d, n, m) & + bind(c, name='opencl_masked_copy') + use, intrinsic :: iso_c_binding + type(c_ptr), value :: a_d, b_d, mask_d + integer(c_int) :: n, m + end subroutine opencl_masked_copy + end interface + interface subroutine opencl_cmult(a_d, c, n) & bind(c, name='opencl_cmult') @@ -672,12 +719,12 @@ subroutine opencl_add2(a_d, b_d, n) & integer(c_int) :: n end subroutine opencl_add2 end interface - + interface subroutine opencl_add2s1(a_d, b_d, c1, n) & bind(c, name='opencl_add2s1') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d real(c_rp) :: c1 @@ -689,7 +736,7 @@ end subroutine opencl_add2s1 subroutine opencl_add2s2(a_d, b_d, c1, n) & bind(c, name='opencl_add2s2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d real(c_rp) :: c1 @@ -712,7 +759,7 @@ end subroutine opencl_add2s2_many subroutine opencl_addsqr2s2(a_d, b_d, c1, n) & bind(c, name='opencl_addsqr2s2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d real(c_rp) :: c1 @@ -724,7 +771,7 @@ end subroutine opencl_addsqr2s2 subroutine opencl_add3s2(a_d, b_d, c_d, c1, c2, n) & bind(c, name='opencl_add3s2') use, intrinsic :: iso_c_binding - import c_rp + import c_rp implicit none type(c_ptr), value :: a_d, b_d, c_d real(c_rp) :: c1, c2 @@ -745,13 +792,13 @@ end subroutine opencl_invcol1 interface subroutine opencl_invcol2(a_d, b_d, n) & bind(c, name='opencl_invcol2') - use, intrinsic :: iso_c_binding + use, intrinsic :: iso_c_binding implicit none type(c_ptr), value :: a_d, b_d integer(c_int) :: n end subroutine opencl_invcol2 end interface - + interface subroutine opencl_col2(a_d, b_d, n) & bind(c, name='opencl_col2') @@ -791,7 +838,7 @@ subroutine opencl_sub2(a_d, b_d, n) & integer(c_int) :: n end subroutine opencl_sub2 end interface - + interface subroutine opencl_sub3(a_d, b_d, c_d, n) & bind(c, name='opencl_sub3') @@ -811,7 +858,7 @@ subroutine opencl_addcol3(a_d, b_d, c_d, n) & integer(c_int) :: n end subroutine opencl_addcol3 end interface - + interface subroutine opencl_addcol4(a_d, b_d, c_d, d_d, n) & bind(c, name='opencl_addcol4') @@ -882,8 +929,9 @@ end function opencl_glsum device_cadd, device_cfill, device_add2, device_add2s1, device_add2s2, & device_addsqr2s2, device_add3s2, device_invcol1, device_invcol2, & device_col2, device_col3, device_subcol3, device_sub2, device_sub3, & - device_addcol3, device_addcol4, device_vdot3, device_glsc3, & - device_glsc3_many, device_add2s2_many, device_glsc2, device_glsum + device_addcol3, device_addcol4, device_vdot3, device_vlsc3, device_glsc3, & + device_glsc3_many, device_add2s2_many, device_glsc2, device_glsum, & + device_masked_copy contains @@ -897,10 +945,25 @@ subroutine device_copy(a_d, b_d, n) #elif HAVE_OPENCL call opencl_copy(a_d, b_d, n) #else - call neko_error('No device backend configured') + call neko_error('no device backend configured') #endif end subroutine device_copy + subroutine device_masked_copy(a_d, b_d, mask_d, n, m) + type(c_ptr) :: a_d, b_d, mask_d + integer :: n, m +#ifdef HAVE_HIP + call hip_masked_copy(a_d, b_d, mask_d, n, m) +#elif HAVE_CUDA + call cuda_masked_copy(a_d, b_d, mask_d, n, m) +#elif HAVE_OPENCL + call opencl_masked_copy(a_d, b_d, mask_d, n, m) +#else + call neko_error('no device backend configured') +#endif + end subroutine device_masked_copy + + subroutine device_rzero(a_d, n) type(c_ptr) :: a_d integer :: n @@ -1002,7 +1065,7 @@ subroutine device_add2(a_d, b_d, n) call neko_error('No device backend configured') #endif end subroutine device_add2 - + subroutine device_add2s1(a_d, b_d, c1, n) type(c_ptr) :: a_d, b_d real(kind=rp) :: c1 @@ -1032,7 +1095,7 @@ subroutine device_add2s2(a_d, b_d, c1, n) call neko_error('No device backend configured') #endif end subroutine device_add2s2 - + subroutine device_addsqr2s2(a_d, b_d, c1, n) type(c_ptr) :: a_d, b_d real(kind=rp) :: c1 @@ -1104,7 +1167,7 @@ subroutine device_col2(a_d, b_d, n) call neko_error('No device backend configured') #endif end subroutine device_col2 - + subroutine device_col3(a_d, b_d, c_d, n) type(c_ptr) :: a_d, b_d, c_d integer :: n @@ -1146,7 +1209,7 @@ subroutine device_sub2(a_d, b_d, n) call neko_error('No device backend configured') #endif end subroutine device_sub2 - + subroutine device_sub3(a_d, b_d, c_d, n) type(c_ptr) :: a_d, b_d, c_d integer :: n @@ -1202,7 +1265,24 @@ subroutine device_vdot3(dot_d, u1_d, u2_d, u3_d, v1_d, v2_d, v3_d, n) call neko_error('No device backend configured') #endif end subroutine device_vdot3 - + + function device_vlsc3(u_d, v_d, w_d, n) result(res) + type(c_ptr) :: u_d, v_d, w_d + integer :: n + real(kind=rp) :: res + res = 0.0_rp +#ifdef HAVE_HIP + res = hip_vlsc3(u_d, v_d, w_d, n) +#elif HAVE_CUDA + res = cuda_vlsc3(u_d, v_d, w_d, n) +#elif HAVE_OPENCL + ! Same kernel as glsc3 (currently no device MPI for OpenCL) + res = opencl_glsc3(u_d, v_d, w_d, n) +#else + call neko_error('No device backend configured') +#endif + end function device_vlsc3 + function device_glsc3(a_d, b_d, c_d, n) result(res) type(c_ptr) :: a_d, b_d, c_d integer :: n, ierr @@ -1224,7 +1304,7 @@ function device_glsc3(a_d, b_d, c_d, n) result(res) end if #endif end function device_glsc3 - + subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n) type(c_ptr), value :: w_d, v_d_d, mult_d integer(c_int) :: j, n @@ -1247,7 +1327,7 @@ subroutine device_glsc3_many(h,w_d,v_d_d,mult_d,j,n) end if #endif end subroutine device_glsc3_many - + subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n) type(c_ptr), value :: y_d, x_d_d, a_d integer(c_int) :: j, n @@ -1261,7 +1341,7 @@ subroutine device_add2s2_many(y_d,x_d_d,a_d,j,n) call neko_error('No device backend configured') #endif end subroutine device_add2s2_many - + function device_glsc2(a_d, b_d, n) result(res) type(c_ptr) :: a_d, b_d integer :: n, ierr @@ -1305,6 +1385,6 @@ function device_glsum(a_d, n) result(res) end if #endif end function device_glsum - - + + end module device_math diff --git a/src/math/bcknd/device/device_mathops.F90 b/src/math/bcknd/device/device_mathops.F90 index 3ad0a70e9a4..a7cda7896c9 100644 --- a/src/math/bcknd/device/device_mathops.F90 +++ b/src/math/bcknd/device/device_mathops.F90 @@ -36,7 +36,7 @@ module device_mathops use, intrinsic :: iso_c_binding, only : c_int, c_ptr implicit none private - + #ifdef HAVE_HIP interface subroutine hip_opchsign(a1_d, a2_d, a3_d, gdim, n) & @@ -188,7 +188,7 @@ end subroutine opencl_opadd2col public :: device_opchsign, device_opcolv, device_opcolv3c, & device_opadd2cm, device_opadd2col - + contains !> \f$ a = -a \f$ @@ -221,7 +221,7 @@ subroutine device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n) #endif end subroutine device_opcolv - !> \f$ a(i) = b(i) * c(i) * d \f$ + !> \f$ a(i) = b(i) * c(i) * d \f$ subroutine device_opcolv3c(a1_d, a2_d, a3_d, & b1_d, b2_d, b3_d, c_d, d, n, gdim) type(c_ptr) :: a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d @@ -238,7 +238,7 @@ subroutine device_opcolv3c(a1_d, a2_d, a3_d, & #endif end subroutine device_opcolv3c - !> \f$ a(i) = a + b(i) * c \f$ + !> \f$ a(i) = a + b(i) * c \f$ subroutine device_opadd2cm (a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c, n, gdim) type(c_ptr) :: a1_d, a2_d, a3_d, b1_d, b2_d, b3_d real(kind=rp) :: c diff --git a/src/math/bcknd/device/device_mpi_op.h b/src/math/bcknd/device/device_mpi_op.h index 248f75cd423..b575647c61a 100644 --- a/src/math/bcknd/device/device_mpi_op.h +++ b/src/math/bcknd/device/device_mpi_op.h @@ -1,3 +1,5 @@ +#ifndef __MATH_DEVICE_MPI_OP_H__ +#define __MATH_DEVICE_MPI_OP_H__ /** * Dummy defintion of device MPI reduce operations * @note we can't put this in device_mpi_reduce.h and include it in device_mpi_reduce.c @@ -6,3 +8,5 @@ #define DEVICE_MPI_SUM 0 #define DEVICE_MPI_MAX 1 + +#endif // __MATH_DEVICE_MPI_OP_H__ diff --git a/src/math/bcknd/device/device_mpi_reduce.h b/src/math/bcknd/device/device_mpi_reduce.h index 4b2be3528c6..e871267318c 100644 --- a/src/math/bcknd/device/device_mpi_reduce.h +++ b/src/math/bcknd/device/device_mpi_reduce.h @@ -1,3 +1,5 @@ +#ifndef __MATH_DEVICE_MPI_REDUCE_H__ +#define __MATH_DEVICE_MPI_REDUCE_H__ /** * C wrapper for MPI calls, until we integrate with NCCL/RCCL @@ -10,3 +12,5 @@ extern "C" { void device_mpi_allreduce_inplace(void *buf_d, int count, size_t nbytes, int op); } + +#endif // __MATH_DEVICE_MPI_REDUCE_H__ diff --git a/src/math/bcknd/device/device_schwarz.F90 b/src/math/bcknd/device/device_schwarz.F90 index bee04432246..da09500671d 100644 --- a/src/math/bcknd/device/device_schwarz.F90 +++ b/src/math/bcknd/device/device_schwarz.F90 @@ -37,7 +37,7 @@ module device_schwarz use, intrinsic :: iso_c_binding, only : c_ptr, c_int implicit none private - + #ifdef HAVE_HIP interface subroutine hip_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) & @@ -55,7 +55,7 @@ subroutine hip_schwarz_toext3d(a_d,b_d,nx, nelv, stream) & use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: a_d, b_d, stream + type(c_ptr), value :: a_d, b_d, stream integer(c_int) :: nx, nelv end subroutine hip_schwarz_toext3d subroutine hip_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) & @@ -63,7 +63,7 @@ subroutine hip_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) & use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: a_d, b_d, stream + type(c_ptr), value :: a_d, b_d, stream integer(c_int) :: nx, nelv end subroutine hip_schwarz_toreg3d end interface @@ -74,7 +74,7 @@ subroutine cuda_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) & use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: arr1_d, arr2_d, stream + type(c_ptr), value :: arr1_d, arr2_d, stream integer(c_int) :: l1, l2, nx, nelv real(c_rp) :: f1, f2 @@ -92,35 +92,35 @@ subroutine cuda_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) & use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: a_d, b_d, stream + type(c_ptr), value :: a_d, b_d, stream integer(c_int) :: nx, nelv end subroutine cuda_schwarz_toreg3d end interface #elif HAVE_OPENCL interface - subroutine opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv) & + subroutine opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx, nelv, stream) & bind(c, name='opencl_schwarz_extrude') use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: arr1_d, arr2_d + type(c_ptr), value :: arr1_d, arr2_d, stream integer(c_int) :: l1, l2, nx, nelv real(c_rp) :: f1, f2 end subroutine opencl_schwarz_extrude - subroutine opencl_schwarz_toext3d(a_d,b_d,nx, nelv) & + subroutine opencl_schwarz_toext3d(a_d,b_d,nx, nelv, stream) & bind(c, name='opencl_schwarz_toext3d') use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: a_d, b_d + type(c_ptr), value :: a_d, b_d, stream integer(c_int) :: nx, nelv end subroutine opencl_schwarz_toext3d - subroutine opencl_schwarz_toreg3d(b_d,a_d,nx, nelv) & + subroutine opencl_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) & bind(c, name='opencl_schwarz_toreg3d') use, intrinsic :: iso_c_binding import c_rp implicit none - type(c_ptr), value :: a_d, b_d + type(c_ptr), value :: a_d, b_d, stream integer(c_int) :: nx, nelv end subroutine opencl_schwarz_toreg3d end interface @@ -128,7 +128,7 @@ end subroutine opencl_schwarz_toreg3d public :: device_schwarz_extrude, device_schwarz_toext3d, & device_schwarz_toreg3d - + contains subroutine device_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,ny,nz, nelv, stream) integer, intent(in) :: l1,l2,nx,ny,nz, nelv @@ -143,7 +143,7 @@ subroutine device_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,ny,nz, nelv, stre #elif HAVE_CUDA call cuda_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv, stream) #elif HAVE_OPENCL - call opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv) + call opencl_schwarz_extrude(arr1_d,l1,f1,arr2_d,l2,f2,nx,nelv, stream) #else call neko_error('No device backend configured') #endif @@ -162,7 +162,7 @@ subroutine device_schwarz_toext3d(a_d,b_d,nx, nelv, stream) #elif HAVE_CUDA call cuda_schwarz_toext3d(a_d,b_d,nx,nelv, stream) #elif HAVE_OPENCL - call opencl_schwarz_toext3d(a_d,b_d,nx,nelv) + call opencl_schwarz_toext3d(a_d,b_d,nx,nelv, stream) #else call neko_error('No device backend configured') #endif @@ -180,7 +180,7 @@ subroutine device_schwarz_toreg3d(b_d,a_d,nx, nelv, stream) #elif HAVE_CUDA call cuda_schwarz_toreg3d(b_d,a_d,nx,nelv, stream) #elif HAVE_OPENCL - call opencl_schwarz_toreg3d(b_d,a_d,nx,nelv) + call opencl_schwarz_toreg3d(b_d,a_d,nx,nelv, stream) #else call neko_error('No device backend configured') #endif diff --git a/src/math/bcknd/device/fdm_device.F90 b/src/math/bcknd/device/fdm_device.F90 index 2750e8097d0..a88ba3e1be9 100644 --- a/src/math/bcknd/device/fdm_device.F90 +++ b/src/math/bcknd/device/fdm_device.F90 @@ -37,7 +37,7 @@ module fdm_device use, intrinsic :: iso_c_binding, only : c_ptr, c_int implicit none private - + #ifdef HAVE_HIP interface subroutine hip_fdm_do_fast(e_d, r_d, s_d, d_d, nl, nelv, stream) & @@ -68,7 +68,7 @@ end subroutine opencl_fdm_do_fast #endif public :: fdm_do_fast_device - + contains subroutine fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream) @@ -76,7 +76,7 @@ subroutine fdm_do_fast_device(e, r, s, d, nl, ldim, nelv, stream) real(kind=rp), intent(inout) :: e(nl**ldim, nelv) real(kind=rp), intent(inout) :: r(nl**ldim, nelv) real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv) - real(kind=rp), intent(inout) :: d(nl**ldim, nelv) + real(kind=rp), intent(inout) :: d(nl**ldim, nelv) type(c_ptr) :: e_d, r_d, s_d, d_d type(c_ptr), optional :: stream diff --git a/src/math/bcknd/device/hip/ax_helm_kernel.h b/src/math/bcknd/device/hip/ax_helm_kernel.h index 5336e02fcf1..67dec5a935c 100644 --- a/src/math/bcknd/device/hip/ax_helm_kernel.h +++ b/src/math/bcknd/device/hip/ax_helm_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_AX_HELM_KERNEL_H__ +#define __MATH_AX_HELM_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -346,3 +348,5 @@ __global__ void __launch_bounds__(LX*LX,3) w[ij + k*LX*LX + ele] = rw[k]; } } + +#endif // __MATH_AX_HELM_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/cdtp_kernel.h b/src/math/bcknd/device/hip/cdtp_kernel.h index 1858ecebef6..6c3e958e404 100644 --- a/src/math/bcknd/device/hip/cdtp_kernel.h +++ b/src/math/bcknd/device/hip/cdtp_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CDTP_KERNEL_H__ +#define __MATH_CDTP_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -124,7 +126,6 @@ __global__ void __launch_bounds__(LX*LX,3) T rtar[LX]; T rtas[LX]; T rtat[LX]; - T rjac[LX]; const int e = blockIdx.x; const int j = threadIdx.y; @@ -174,3 +175,5 @@ __global__ void __launch_bounds__(LX*LX,3) } } + +#endif // __MATH_CDTP_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/cfl_kernel.h b/src/math/bcknd/device/hip/cfl_kernel.h index 239242e90b7..b51fded435e 100644 --- a/src/math/bcknd/device/hip/cfl_kernel.h +++ b/src/math/bcknd/device/hip/cfl_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CFL_KERNEL_H__ +#define __MATH_CFL_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -180,3 +182,5 @@ __global__ void cfl_kernel(const T dt, cfl_h[blockIdx.x] = cfl_tmp; } + +#endif // __MATH_CFL_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/conv1_kernel.h b/src/math/bcknd/device/hip/conv1_kernel.h index c094ab98fe1..d77c1cde6aa 100644 --- a/src/math/bcknd/device/hip/conv1_kernel.h +++ b/src/math/bcknd/device/hip/conv1_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_CONV1_KERNEL_H__ +#define __MATH_CONV1_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -210,3 +212,5 @@ __global__ void __launch_bounds__(LX*LX,3) } } + +#endif // __MATH_CONV1_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/dudxyz_kernel.h b/src/math/bcknd/device/hip/dudxyz_kernel.h index 6265d28d574..d2c4ef25d20 100644 --- a/src/math/bcknd/device/hip/dudxyz_kernel.h +++ b/src/math/bcknd/device/hip/dudxyz_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_DUDXYZ_KERNEL_H__ +#define __MATH_DUDXYZ_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -174,3 +176,5 @@ __global__ void __launch_bounds__(LX*LX,3) } } + +#endif // __MATH_DUDXYZ_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/fdm_kernel.h b/src/math/bcknd/device/hip/fdm_kernel.h index 0db84fbed01..f4baf90a89f 100644 --- a/src/math/bcknd/device/hip/fdm_kernel.h +++ b/src/math/bcknd/device/hip/fdm_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_FDM_KERNEL_H__ +#define __MATH_FDM_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -135,3 +137,5 @@ __global__ void fdm_do_fast_kernel(T * __restrict__ e, } + +#endif // __MATH_FDM_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/lambda2_kernel.h b/src/math/bcknd/device/hip/lambda2_kernel.h index 983589b2b0a..600b13e09ef 100644 --- a/src/math/bcknd/device/hip/lambda2_kernel.h +++ b/src/math/bcknd/device/hip/lambda2_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_LAMBDA2_KERNEL_H__ +#define __MATH_LAMBDA2_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -357,3 +359,5 @@ lambda2_kernel_kstep(T * __restrict__ lambda2, } } + +#endif // __MATH_LAMBDA2_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/math.hip b/src/math/bcknd/device/hip/math.hip index 9dcb13a4216..7f217f1e759 100644 --- a/src/math/bcknd/device/hip/math.hip +++ b/src/math/bcknd/device/hip/math.hip @@ -51,6 +51,21 @@ extern "C" { (hipStream_t) glb_cmd_queue)); } + /** Fortran wrapper for masked copy + * Copy a vector \f$ a(mask) = b(mask) \f$ + */ + void hip_masked_copy(void *a, void *b, void *mask, int *n, int *m) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*m)+1024 - 1)/ 1024, 1, 1); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(masked_copy_kernel), + nblcks, nthrds, 0, (hipStream_t) glb_cmd_queue, + (real *) a, (real *) b, (int *) mask, *n, *m); + + HIP_CHECK(hipGetLastError()); + + } /** Fortran wrapper for rzero * Zero a real vector */ @@ -358,6 +373,44 @@ extern "C" { real * bufred = NULL; real * bufred_d = NULL; + /** + * Fortran wrapper vlsc3 + * Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ + */ + real hip_vlsc3(void *u, void *v, void *w, int *n) { + + const dim3 nthrds(1024, 1, 1); + const dim3 nblcks(((*n)+1024 - 1)/ 1024, 1, 1); + const int nb = ((*n) + 1024 - 1)/ 1024; + const hipStream_t stream = (hipStream_t) glb_cmd_queue; + + if ( nb > red_s){ + red_s = nb; + if (bufred != NULL) { + HIP_CHECK(hipHostFree(bufred)); + HIP_CHECK(hipFree(bufred_d)); + } + HIP_CHECK(hipHostMalloc(&bufred,nb*sizeof(real),hipHostMallocDefault)); + HIP_CHECK(hipMalloc(&bufred_d, nb*sizeof(real))); + } + + hipLaunchKernelGGL(HIP_KERNEL_NAME(glsc3_kernel), + nblcks, nthrds, 0, stream, + (real *) u, (real *) v, + (real *) w, bufred_d, *n); + HIP_CHECK(hipGetLastError()); + hipLaunchKernelGGL(HIP_KERNEL_NAME(reduce_kernel), + 1, 1024, 0, stream, bufred_d, nb); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpyAsync(bufred, bufred_d, sizeof(real), + hipMemcpyDeviceToHost, stream)); + hipStreamSynchronize(stream); + + return bufred[0]; + } + + /** * Fortran wrapper glsc3 * Weighted inner product \f$ a^T b c \f$ diff --git a/src/math/bcknd/device/hip/math_kernel.h b/src/math/bcknd/device/hip/math_kernel.h index 04bdd1573e8..42673b026c8 100644 --- a/src/math/bcknd/device/hip/math_kernel.h +++ b/src/math/bcknd/device/hip/math_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_MATH_KERNEL_H__ +#define __MATH_MATH_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -48,6 +50,24 @@ __global__ void cmult_kernel(T * __restrict__ a, } } +/** + * Device kernel for masked copy + */ +template< typename T > +__global__ void masked_copy_kernel(T * __restrict__ a, + T * __restrict__ b, + int * __restrict__ mask, + const int n, + const int m) { + + const int idx = blockIdx.x * blockDim.x + threadIdx.x; + const int str = blockDim.x * gridDim.x; + + for (int i = idx; i < m; i += str) { + a[mask[i+1]-1] = b[mask[i+1]-1]; + } +} + /** * Device kernel for cmult2 */ @@ -605,3 +625,5 @@ __global__ void glsum_kernel(const T * a, buf_h[blockIdx.x] = sum; } + +#endif // __MATH_MATH_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/mathops_kernel.h b/src/math/bcknd/device/hip/mathops_kernel.h index 0df7a1a31fa..b7be1d88022 100644 --- a/src/math/bcknd/device/hip/mathops_kernel.h +++ b/src/math/bcknd/device/hip/mathops_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_MATHOPS_KERNEL_H__ +#define __MATH_MATHOPS_KERNEL_H__ /* Copyright (c) 2021, The Neko Authors All rights reserved. @@ -192,3 +194,5 @@ __global__ void opadd2col_kernel(T * __restrict__ a1, } + +#endif // __MATH_MATHOPS_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/opgrad_kernel.h b/src/math/bcknd/device/hip/opgrad_kernel.h index 1667a4ed00b..1eda4cfd4de 100644 --- a/src/math/bcknd/device/hip/opgrad_kernel.h +++ b/src/math/bcknd/device/hip/opgrad_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_OPGRAD_KERNEL_H__ +#define __MATH_OPGRAD_KERNEL_H__ /* Copyright (c) 2021-2023, The Neko Authors All rights reserved. @@ -197,3 +199,5 @@ opgrad_kernel_kstep(T * __restrict__ ux, } } + +#endif // __MATH_OPGRAD_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/schwarz_kernel.h b/src/math/bcknd/device/hip/schwarz_kernel.h index c3079d6231a..2cba07ab9b5 100644 --- a/src/math/bcknd/device/hip/schwarz_kernel.h +++ b/src/math/bcknd/device/hip/schwarz_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_SCHWARZ_KERNEL_H__ +#define __MATH_SCHWARZ_KERNEL_H__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -159,3 +161,5 @@ __global__ void schwarz_toreg3d_kernel(T * __restrict__ b, b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2]; } } + +#endif // __MATH_SCHWARZ_KERNEL_H__ diff --git a/src/math/bcknd/device/hip/tensor_kernel.h b/src/math/bcknd/device/hip/tensor_kernel.h index c785f778d3f..7e56b6dc484 100644 --- a/src/math/bcknd/device/hip/tensor_kernel.h +++ b/src/math/bcknd/device/hip/tensor_kernel.h @@ -1,3 +1,5 @@ +#ifndef __MATH_TENSOR_KERNEL_H__ +#define __MATH_TENSOR_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -150,3 +152,5 @@ __global__ void tnsr3d_kernel(T * __restrict__ v, } + +#endif // __MATH_TENSOR_KERNEL_H__ diff --git a/src/math/bcknd/device/opencl/ax_helm_kernel.cl b/src/math/bcknd/device/opencl/ax_helm_kernel.cl index d095ab14590..c08e4ce15ad 100644 --- a/src/math/bcknd/device/opencl/ax_helm_kernel.cl +++ b/src/math/bcknd/device/opencl/ax_helm_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_AX_HELM_KERNEL_CL__ +#define __MATH_AX_HELM_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -152,3 +154,5 @@ DEFINE_AX_HELM_KERNEL(7, 256) DEFINE_AX_HELM_KERNEL(8, 256) DEFINE_AX_HELM_KERNEL(9, 256) + +#endif // __MATH_AX_HELM_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/cdtp_kernel.cl b/src/math/bcknd/device/opencl/cdtp_kernel.cl index 7a6cfabda75..4c1349471e3 100644 --- a/src/math/bcknd/device/opencl/cdtp_kernel.cl +++ b/src/math/bcknd/device/opencl/cdtp_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_CDTP_KERNEL_CL__ +#define __MATH_CDTP_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -117,3 +119,5 @@ DEFINE_CDTP_KERNEL(10, 256) + +#endif // __MATH_CDTP_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/cfl_kernel.cl b/src/math/bcknd/device/opencl/cfl_kernel.cl index 3a6cf5e98fe..2cc1496755c 100644 --- a/src/math/bcknd/device/opencl/cfl_kernel.cl +++ b/src/math/bcknd/device/opencl/cfl_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_CFL_KERNEL_CL__ +#define __MATH_CFL_KERNEL_CL__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -145,3 +147,5 @@ DEFINE_CFL_KERNEL(7, 256) DEFINE_CFL_KERNEL(8, 256) DEFINE_CFL_KERNEL(9, 256) + +#endif // __MATH_CFL_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/conv1_kernel.cl b/src/math/bcknd/device/opencl/conv1_kernel.cl index 782e239088b..648ca2765d0 100644 --- a/src/math/bcknd/device/opencl/conv1_kernel.cl +++ b/src/math/bcknd/device/opencl/conv1_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_CONV1_KERNEL_CL__ +#define __MATH_CONV1_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -135,3 +137,5 @@ DEFINE_CONV1_KERNEL(7, 256) DEFINE_CONV1_KERNEL(8, 256) DEFINE_CONV1_KERNEL(9, 256) + +#endif // __MATH_CONV1_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/dudxyz_kernel.cl b/src/math/bcknd/device/opencl/dudxyz_kernel.cl index cff01a0bdd1..6326b3bb584 100644 --- a/src/math/bcknd/device/opencl/dudxyz_kernel.cl +++ b/src/math/bcknd/device/opencl/dudxyz_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_DUDXYZ_KERNEL_CL__ +#define __MATH_DUDXYZ_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -115,3 +117,5 @@ DEFINE_DUDXYZ_KERNEL(7, 256) DEFINE_DUDXYZ_KERNEL(8, 256) DEFINE_DUDXYZ_KERNEL(9, 256) + +#endif // __MATH_DUDXYZ_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/fdm_kernel.cl b/src/math/bcknd/device/opencl/fdm_kernel.cl index c81957ba49f..9d1578ffbf2 100644 --- a/src/math/bcknd/device/opencl/fdm_kernel.cl +++ b/src/math/bcknd/device/opencl/fdm_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_FDM_KERNEL_CL__ +#define __MATH_FDM_KERNEL_CL__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -161,3 +163,5 @@ DEFINE_FDM_DO_FAST_KERNEL(13) DEFINE_FDM_DO_FAST_KERNEL(14) DEFINE_FDM_DO_FAST_KERNEL(15) + +#endif // __MATH_FDM_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/lambda2_kernel.cl b/src/math/bcknd/device/opencl/lambda2_kernel.cl index 8b8e5468296..652765931b3 100644 --- a/src/math/bcknd/device/opencl/lambda2_kernel.cl +++ b/src/math/bcknd/device/opencl/lambda2_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_LAMBDA2_KERNEL_CL__ +#define __MATH_LAMBDA2_KERNEL_CL__ /* Copyright (c) 2023, The Neko Authors All rights reserved. @@ -250,3 +252,5 @@ DEFINE_LAMBDA2_KERNEL(10, 256) DEFINE_LAMBDA2_KERNEL(11, 256) DEFINE_LAMBDA2_KERNEL(12, 256) + +#endif // __MATH_LAMBDA2_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/math.c b/src/math/bcknd/device/opencl/math.c index 7b7da7f7c88..b0885c8388d 100644 --- a/src/math/bcknd/device/opencl/math.c +++ b/src/math/bcknd/device/opencl/math.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2021-2022, The Neko Authors + Copyright (c) 2021-2024, The Neko Authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include @@ -55,6 +56,34 @@ void opencl_copy(void *a, void *b, int *n) { 0, NULL, NULL)); } +/** Fortran wrapper for masked copy + * Copy a vector \f$ a = b \f$ + */ +void opencl_masked_copy(void *a, void *b, void *mask, int *n, int *m) { + cl_int err; + + if (math_program == NULL) + opencl_kernel_jit(math_kernel, (cl_program *) &math_program); + + cl_kernel kernel = clCreateKernel(math_program, "masked_copy_kernel", &err); + CL_CHECK(err); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &a)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &b)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &mask)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), n)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(int), m)); + + const int nb = ((*n) + 256 - 1) / 256; + const size_t global_item_size = 256 * nb; + const size_t local_item_size = 256; + + CL_CHECK(clEnqueueNDRangeKernel((cl_command_queue) glb_cmd_queue, kernel, 1, + NULL, &global_item_size, &local_item_size, + 0, NULL, NULL)); + +} + /** Fortran wrapper for rzero * Zero a real vector */ diff --git a/src/math/bcknd/device/opencl/math_kernel.cl b/src/math/bcknd/device/opencl/math_kernel.cl index c264f6c30d2..b6becea870c 100644 --- a/src/math/bcknd/device/opencl/math_kernel.cl +++ b/src/math/bcknd/device/opencl/math_kernel.cl @@ -1,5 +1,7 @@ +#ifndef __MATH_MATH_KERNEL_CL__ +#define __MATH_MATH_KERNEL_CL__ /* - Copyright (c) 2021-2022, The Neko Authors + Copyright (c) 2021-2024, The Neko Authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -32,6 +34,23 @@ POSSIBILITY OF SUCH DAMAGE. */ +/** + * Device kernel for masked copy + */ +__kernel void masked_copy_kernel(__global real * __restrict__ a, + __global real * __restrict__ b, + __global int * __restrict__ mask, + const int n, + const int m) { + + const int idx = get_global_id(0); + const int str = get_global_size(0); + + for (int i = idx; i < n; i += str) { + a[mask[i+1]-1] = b[mask[i+1]-1]; + } +} + /** * Device kernel for cmult */ @@ -503,3 +522,5 @@ __kernel void glsum_kernel(__global const real * __restrict__ a, } } + +#endif // __MATH_MATH_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/mathops_kernel.cl b/src/math/bcknd/device/opencl/mathops_kernel.cl index e23866dc52e..a33959cb0f6 100644 --- a/src/math/bcknd/device/opencl/mathops_kernel.cl +++ b/src/math/bcknd/device/opencl/mathops_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_MATHOPS_KERNEL_CL__ +#define __MATH_MATHOPS_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -187,3 +189,5 @@ __kernel void opadd2col_kernel(__global real *a1, } + +#endif // __MATH_MATHOPS_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/opgrad_kernel.cl b/src/math/bcknd/device/opencl/opgrad_kernel.cl index 3db362f955f..9f901d078b2 100644 --- a/src/math/bcknd/device/opencl/opgrad_kernel.cl +++ b/src/math/bcknd/device/opencl/opgrad_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_OPGRAD_KERNEL_CL__ +#define __MATH_OPGRAD_KERNEL_CL__ /* Copyright (c) 2021-2022, The Neko Authors All rights reserved. @@ -127,3 +129,5 @@ DEFINE_OPGRAD_KERNEL(5, 256) DEFINE_OPGRAD_KERNEL(4, 256) DEFINE_OPGRAD_KERNEL(3, 256) DEFINE_OPGRAD_KERNEL(2, 256) + +#endif // __MATH_OPGRAD_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/opr_cfl.c b/src/math/bcknd/device/opencl/opr_cfl.c index 9481b8fc7c5..701a077bb83 100644 --- a/src/math/bcknd/device/opencl/opr_cfl.c +++ b/src/math/bcknd/device/opencl/opr_cfl.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include diff --git a/src/math/bcknd/device/opencl/opr_opgrad.c b/src/math/bcknd/device/opencl/opr_opgrad.c index 86b421f53a3..035c9c20c2c 100644 --- a/src/math/bcknd/device/opencl/opr_opgrad.c +++ b/src/math/bcknd/device/opencl/opr_opgrad.c @@ -39,6 +39,7 @@ #endif #include +#include #include #include #include diff --git a/src/math/bcknd/device/opencl/schwarz_kernel.cl b/src/math/bcknd/device/opencl/schwarz_kernel.cl index 59911952dca..73e2343c909 100644 --- a/src/math/bcknd/device/opencl/schwarz_kernel.cl +++ b/src/math/bcknd/device/opencl/schwarz_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_SCHWARZ_KERNEL_CL__ +#define __MATH_SCHWARZ_KERNEL_CL__ /* Copyright (c) 2022-2023, The Neko Authors All rights reserved. @@ -175,3 +177,5 @@ __kernel void schwarz_toreg3d_kernel(__global real * __restrict__ b, b[ijk+el] = a[(i+1)+(j+1)*nx2+(k+1)*nx2*nx2+el2]; } } + +#endif // __MATH_SCHWARZ_KERNEL_CL__ diff --git a/src/math/bcknd/device/opencl/tensor_kernel.cl b/src/math/bcknd/device/opencl/tensor_kernel.cl index 1c1ed56108a..be04719536d 100644 --- a/src/math/bcknd/device/opencl/tensor_kernel.cl +++ b/src/math/bcknd/device/opencl/tensor_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __MATH_TENSOR_KERNEL_CL__ +#define __MATH_TENSOR_KERNEL_CL__ /* Copyright (c) 2022-2023, The Neko Authors All rights reserved. @@ -147,3 +149,5 @@ __kernel void tnsr3d_el_kernel(__global real * __restrict__ v, } + +#endif // __MATH_TENSOR_KERNEL_CL__ diff --git a/src/math/bcknd/device/opr_device.F90 b/src/math/bcknd/device/opr_device.F90 index 2ffaeeb2761..6c9a08ccf97 100644 --- a/src/math/bcknd/device/opr_device.F90 +++ b/src/math/bcknd/device/opr_device.F90 @@ -31,12 +31,12 @@ ! POSSIBILITY OF SUCH DAMAGE. ! !> Operators accelerator backends -module opr_device +module opr_device use gather_scatter use num_types, only : rp, c_rp use device_math use device_mathops - use device, only : device_get_ptr + use device, only : device_get_ptr use space, only : space_t use coefs, only : coef_t use mesh, only : mesh_t @@ -46,7 +46,7 @@ module opr_device use, intrinsic :: iso_c_binding implicit none private - + public :: opr_device_dudxyz, opr_device_opgrad, opr_device_cdtp, & opr_device_conv1, opr_device_curl, opr_device_cfl, opr_device_lambda2 @@ -105,7 +105,7 @@ subroutine hip_opgrad(ux_d, uy_d, uz_d, u_d, & integer(c_int) :: nel, lx end subroutine hip_opgrad end interface - + interface subroutine hip_lambda2(lambda2_d, u_d, v_d, w_d, & dx_d, dy_d, dz_d, & @@ -131,7 +131,7 @@ real(c_rp) function hip_cfl(dt, u_d, v_d, w_d, & jacinv_d, nel, lx) & bind(c, name='hip_cfl') use, intrinsic :: iso_c_binding - import c_rp + import c_rp type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d @@ -176,7 +176,7 @@ subroutine cuda_conv1(du_d, u_d, vx_d, vy_d, vz_d, & integer(c_int) :: nel, gdim, lx end subroutine cuda_conv1 end interface - + interface subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, & dx_d, dy_d, dz_d, & @@ -194,7 +194,7 @@ subroutine cuda_opgrad(ux_d, uy_d, uz_d, u_d, & integer(c_int) :: nel, lx end subroutine cuda_opgrad end interface - + interface subroutine cuda_lambda2(lambda2_d, u_d, v_d, w_d, & dx_d, dy_d, dz_d, & @@ -221,7 +221,7 @@ real(c_rp) function cuda_cfl(dt, u_d, v_d, w_d, & jacinv_d, nel, lx) & bind(c, name='cuda_cfl') use, intrinsic :: iso_c_binding - import c_rp + import c_rp type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d @@ -292,7 +292,7 @@ real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, & jacinv_d, nel, lx) & bind(c, name='opencl_cfl') use, intrinsic :: iso_c_binding - import c_rp + import c_rp type(c_ptr), value :: u_d, v_d, w_d, drdx_d, dsdx_d, dtdx_d type(c_ptr), value :: drdy_d, dsdy_d, dtdy_d, drdz_d, dsdz_d, dtdz_d type(c_ptr), value :: dr_inv_d, ds_inv_d, dt_inv_d, jacinv_d @@ -301,7 +301,7 @@ real(c_rp) function opencl_cfl(dt, u_d, v_d, w_d, & end function opencl_cfl end interface - interface + interface subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, & dx_d, dy_d, dz_d, & drdx_d, dsdx_d, dtdx_d, & @@ -318,8 +318,8 @@ subroutine opencl_lambda2(lambda2_d, u_d, v_d, w_d, & integer(c_int) :: nel, lx end subroutine opencl_lambda2 end interface -#endif - +#endif + contains subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef) @@ -337,7 +337,7 @@ subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef) ds_d = device_get_ptr(ds) dt_d = device_get_ptr(dt) - associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) + associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) #ifdef HAVE_HIP call hip_dudxyz(du_d, u_d, dr_d, ds_d, dt_d, & Xh%dx_d, Xh%dy_d, Xh%dz_d, coef%jacinv_d, & @@ -354,11 +354,11 @@ subroutine opr_device_dudxyz(du, u, dr, ds, dt, coef) call neko_error('No device backend configured') #endif end associate - + end subroutine opr_device_dudxyz - subroutine opr_device_opgrad(ux, uy, uz, u, coef) - type(coef_t), intent(in) :: coef + subroutine opr_device_opgrad(ux, uy, uz, u, coef) + type(coef_t), intent(in) :: coef real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz @@ -370,7 +370,7 @@ subroutine opr_device_opgrad(ux, uy, uz, u, coef) uz_d = device_get_ptr(uz) u_d = device_get_ptr(u) - + associate(Xh => coef%Xh, msh => coef%msh) #ifdef HAVE_HIP call hip_opgrad(ux_d, uy_d, uz_d, u_d, & @@ -397,35 +397,35 @@ subroutine opr_device_opgrad(ux, uy, uz, u, coef) call neko_error('No device backend configured') #endif end associate - + end subroutine opr_device_opgrad subroutine opr_device_lambda2(lambda2, u, v, w, coef) - type(coef_t), intent(in) :: coef - type(field_t), intent(inout) :: lambda2 + type(coef_t), intent(in) :: coef + type(field_t), intent(inout) :: lambda2 type(field_t), intent(in) :: u, v, w #ifdef HAVE_HIP - call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & + call hip_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, & coef%drdx_d, coef%dsdx_d, coef%dtdx_d, & coef%drdy_d, coef%dsdy_d, coef%dtdy_d, & coef%drdz_d, coef%dsdz_d, coef%dtdz_d, & coef%jacinv_d, coef%msh%nelv, coef%Xh%lx) #elif HAVE_CUDA - call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & + call cuda_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, & coef%drdx_d, coef%dsdx_d, coef%dtdx_d, & coef%drdy_d, coef%dsdy_d, coef%dtdy_d, & coef%drdz_d, coef%dsdz_d, coef%dtdz_d, & coef%jacinv_d, coef%msh%nelv, coef%Xh%lx) #elif HAVE_OPENCL - call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & + call opencl_lambda2(lambda2%x_d,u%x_d,v%x_d,w%x_d, & coef%Xh%dx_d, coef%Xh%dy_d, coef%Xh%dz_d, & coef%drdx_d, coef%dsdx_d, coef%dtdx_d, & coef%drdy_d, coef%dsdy_d, coef%dtdy_d, & coef%drdz_d, coef%dsdz_d, coef%dtdz_d, & coef%jacinv_d, coef%msh%nelv, coef%Xh%lx) #else - call neko_error('No device backend configured') + call neko_error('No device backend configured') #endif end subroutine opr_device_lambda2 @@ -444,8 +444,8 @@ subroutine opr_device_cdtp(dtx, x, dr,ds, dt, coef) dr_d = device_get_ptr(dr) ds_d = device_get_ptr(ds) dt_d = device_get_ptr(dt) - - associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) + + associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) #ifdef HAVE_HIP call hip_cdtp(dtx_d, x_d, dr_d, ds_d, dt_d, & Xh%dxt_d, Xh%dyt_d, Xh%dzt_d, coef%B_d, & @@ -461,11 +461,11 @@ subroutine opr_device_cdtp(dtx, x, dr,ds, dt, coef) #else call neko_error('No device backend configured') #endif - end associate + end associate end subroutine opr_device_cdtp - subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) + subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) type(space_t), intent(in) :: Xh type(coef_t), intent(in) :: coef integer, intent(in) :: nelv, gdim @@ -482,8 +482,8 @@ subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) vx_d = device_get_ptr(vx) vy_d = device_get_ptr(vy) vz_d = device_get_ptr(vz) - - associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) + + associate(Xh => coef%Xh, msh => coef%msh, dof => coef%dof) #ifdef HAVE_HIP call hip_conv1(du_d, u_d, vx_d, vy_d, vz_d, & Xh%dx_d, Xh%dy_d, Xh%dz_d, & @@ -509,7 +509,7 @@ subroutine opr_device_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) call neko_error('No device backend configured') #endif end associate - + end subroutine opr_device_conv1 subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) @@ -650,16 +650,16 @@ subroutine opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) call device_sub3(w3%x_d, work1%x_d, work2%x_d, n) !! BC dependent, Needs to change if cyclic - call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%B_d, gdim, n) - call c_Xh%gs_h%op(w1, GS_OP_ADD) - call c_Xh%gs_h%op(w2, GS_OP_ADD) + call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%B_d, gdim, n) + call c_Xh%gs_h%op(w1, GS_OP_ADD) + call c_Xh%gs_h%op(w2, GS_OP_ADD) call c_Xh%gs_h%op(w3, GS_OP_ADD) - call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%Binv_d, gdim, n) + call device_opcolv(w1%x_d, w2%x_d, w3%x_d, c_Xh%Binv_d, gdim, n) #else call neko_error('No device backend configured') #endif - + end subroutine opr_device_curl function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) @@ -667,7 +667,7 @@ function opr_device_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) type(coef_t) :: coef integer :: nelv, gdim real(kind=rp) :: dt - real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w + real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w real(kind=rp) :: cfl type(c_ptr) :: u_d, v_d, w_d diff --git a/src/math/bcknd/device/tensor_device.F90 b/src/math/bcknd/device/tensor_device.F90 index 63a3a179aaf..422209d3ea2 100644 --- a/src/math/bcknd/device/tensor_device.F90 +++ b/src/math/bcknd/device/tensor_device.F90 @@ -37,9 +37,9 @@ module tensor_device private public :: tnsr3d_device, tnsr3d_el_list_device - + #ifdef HAVE_HIP - interface + interface subroutine hip_tnsr3d_el_list(v_d, nv, u_d, nu, A_d, Bt_d, Ct_d, elements, n_points) & bind(c, name='hip_tnsr3d_el_list') use, intrinsic :: iso_c_binding diff --git a/src/math/bcknd/sx/ax_helm_sx.f90 b/src/math/bcknd/sx/ax_helm_sx.f90 index c64dc5df469..238202ad674 100644 --- a/src/math/bcknd/sx/ax_helm_sx.f90 +++ b/src/math/bcknd/sx/ax_helm_sx.f90 @@ -35,7 +35,6 @@ module ax_helm_sx use num_types, only : rp use coefs, only : coef_t use space, only : space_t - use field, only : field_t use mesh, only : mesh_t use math, only : addcol4 implicit none @@ -46,7 +45,7 @@ module ax_helm_sx procedure, nopass :: compute => ax_helm_sx_compute end type ax_helm_sx_t -contains +contains subroutine ax_helm_sx_compute(w, u, coef, msh, Xh) type(mesh_t), intent(inout) :: msh @@ -104,7 +103,7 @@ subroutine ax_helm_sx_compute(w, u, coef, msh, Xh) if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%size()) end subroutine ax_helm_sx_compute - + subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n, lx) integer, intent(in) :: n, lx @@ -223,7 +222,7 @@ subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -231,7 +230,7 @@ subroutine sx_ax_helm_lx(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end subroutine sx_ax_helm_lx - + subroutine sx_ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 14 @@ -351,7 +350,7 @@ subroutine sx_ax_helm_lx14(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -479,7 +478,7 @@ subroutine sx_ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -487,7 +486,7 @@ subroutine sx_ax_helm_lx13(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end subroutine sx_ax_helm_lx13 - + subroutine sx_ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 12 @@ -607,7 +606,7 @@ subroutine sx_ax_helm_lx12(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -635,7 +634,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -655,9 +654,9 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -735,7 +734,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -743,7 +742,7 @@ subroutine sx_ax_helm_lx11(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end subroutine sx_ax_helm_lx11 - + subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 10 @@ -763,7 +762,7 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -783,9 +782,9 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -863,7 +862,7 @@ subroutine sx_ax_helm_lx10(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -891,7 +890,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -911,9 +910,9 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -991,7 +990,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -999,7 +998,7 @@ subroutine sx_ax_helm_lx9(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end subroutine sx_ax_helm_lx9 - + subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & h1, G11, G22, G33, G12, G13, G23, n) integer, parameter :: lx = 8 @@ -1019,7 +1018,7 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -1039,9 +1038,9 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1119,7 +1118,7 @@ subroutine sx_ax_helm_lx8(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -1147,7 +1146,7 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -1167,9 +1166,9 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1247,7 +1246,7 @@ subroutine sx_ax_helm_lx7(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -1275,7 +1274,7 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -1295,9 +1294,9 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1375,7 +1374,7 @@ subroutine sx_ax_helm_lx6(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -1403,7 +1402,7 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & real(kind=rp), intent(in) :: Dxt(lx, lx) real(kind=rp), intent(in) :: Dyt(lx, lx) real(kind=rp), intent(in) :: Dzt(lx, lx) - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk real(kind=rp) :: ur(lx, lx, lx, n) real(kind=rp) :: us(lx, lx, lx, n) real(kind=rp) :: ut(lx, lx, lx, n) @@ -1423,9 +1422,9 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & end do do k = 1, lx - do i = 1, lx - do j = 1, lx - do e = 1, n + do i = 1, lx + do j = 1, lx + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1503,7 +1502,7 @@ subroutine sx_ax_helm_lx5(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do kk = 1, lx wt = wt + dzt(k, kk)*uut(i,j,kk,e) end do - w(i,j,k,e) = w(i,j,k,e) + wt + w(i,j,k,e) = w(i,j,k,e) + wt end do end do end do @@ -1544,7 +1543,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & ur(i,jj,1,1) = Dx(i,1)*u(1,jj,1,1) & + Dx(i,2)*u(2,jj,1,1) & + Dx(i,3)*u(3,jj,1,1) & - + Dx(i,4)*u(4,jj,1,1) + + Dx(i,4)*u(4,jj,1,1) end do end do @@ -1556,7 +1555,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & us(i,j,k,e) = Dy(j,1) * u(i,1,k,e) & + Dy(j,2) * u(i,2,k,e) & + Dy(j,3) * u(i,3,k,e) & - + Dy(j,4) * u(i,4,k,e) + + Dy(j,4) * u(i,4,k,e) end do end do end do @@ -1571,7 +1570,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & ut(i,j,k,e) = Dz(k,1) * u(i,j,1,e) & + Dz(k,2) * u(i,j,2,e) & + Dz(k,3) * u(i,j,3,e) & - + Dz(k,4) * u(i,j,4,e) + + Dz(k,4) * u(i,j,4,e) end do end do end do @@ -1599,7 +1598,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,jj,1,1) = Dxt(i,1) * uur(1,jj,1,1) & + Dxt(i,2) * uur(2,jj,1,1) & + Dxt(i,3) * uur(3,jj,1,1) & - + Dxt(i,4) * uur(4,jj,1,1) + + Dxt(i,4) * uur(4,jj,1,1) end do end do @@ -1610,7 +1609,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,j,k,e) = w(i,j,k,e) + Dyt(j,1) * uus(i,1,k,e) & + Dyt(j,2) * uus(i,2,k,e) & + Dyt(j,3) * uus(i,3,k,e) & - + Dyt(j,4) * uus(i,4,k,e) + + Dyt(j,4) * uus(i,4,k,e) end do end do end do @@ -1623,7 +1622,7 @@ subroutine sx_ax_helm_lx4(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & w(i,j,k,e) = w(i,j,k,e) + Dzt(k,1) * uut(i,j,1,e) & + Dzt(k,2) * uut(i,j,2,e) & + Dzt(k,3) * uut(i,j,3,e) & - + Dzt(k,4) * uut(i,j,4,e) + + Dzt(k,4) * uut(i,j,4,e) end do end do end do @@ -1662,7 +1661,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do jj = 1, lx * lx * n ur(i,jj,1,1) = Dx(i,1)*u(1,jj,1,1) & + Dx(i,2)*u(2,jj,1,1) & - + Dx(i,3)*u(3,jj,1,1) + + Dx(i,3)*u(3,jj,1,1) end do end do @@ -1673,7 +1672,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do e = 1, n us(i,j,k,e) = Dy(j,1) * u(i,1,k,e) & + Dy(j,2) * u(i,2,k,e) & - + Dy(j,3) * u(i,3,k,e) + + Dy(j,3) * u(i,3,k,e) end do end do end do @@ -1687,7 +1686,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do e = 1, n ut(i,j,k,e) = Dz(k,1) * u(i,j,1,e) & + Dz(k,2) * u(i,j,2,e) & - + Dz(k,3) * u(i,j,3,e) + + Dz(k,3) * u(i,j,3,e) end do end do end do @@ -1714,7 +1713,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do jj = 1, lx * lx * n w(i,jj,1,1) = Dxt(i,1) * uur(1,jj,1,1) & + Dxt(i,2) * uur(2,jj,1,1) & - + Dxt(i,3) * uur(3,jj,1,1) + + Dxt(i,3) * uur(3,jj,1,1) end do end do @@ -1724,7 +1723,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do e = 1, n w(i,j,k,e) = w(i,j,k,e) + Dyt(j,1) * uus(i,1,k,e) & + Dyt(j,2) * uus(i,2,k,e) & - + Dyt(j,3) * uus(i,3,k,e) + + Dyt(j,3) * uus(i,3,k,e) end do end do end do @@ -1736,7 +1735,7 @@ subroutine sx_ax_helm_lx3(w, u, Dx, Dy, Dz, Dxt, Dyt, Dzt, & do e = 1, n w(i,j,k,e) = w(i,j,k,e) + Dzt(k,1) * uut(i,j,1,e) & + Dzt(k,2) * uut(i,j,2,e) & - + Dzt(k,3) * uut(i,j,3,e) + + Dzt(k,3) * uut(i,j,3,e) end do end do end do diff --git a/src/math/bcknd/sx/fdm_sx.f90 b/src/math/bcknd/sx/fdm_sx.f90 index e91182e17ba..0c01f44eb27 100644 --- a/src/math/bcknd/sx/fdm_sx.f90 +++ b/src/math/bcknd/sx/fdm_sx.f90 @@ -36,7 +36,7 @@ module fdm_sx use tensor_sx implicit none private - + public :: fdm_do_fast_sx contains @@ -46,7 +46,7 @@ subroutine fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv) real(kind=rp), intent(inout) :: e(nl**ldim, nelv) real(kind=rp), intent(inout) :: r(nl**ldim, nelv) real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv) - real(kind=rp), intent(inout) :: d(nl**ldim, nelv) + real(kind=rp), intent(inout) :: d(nl**ldim, nelv) integer :: ie, nn, i nn = nl**ldim @@ -58,7 +58,7 @@ subroutine fdm_do_fast_sx(e, r, s, d, nl, ldim, nelv) end do call tnsr2d_el_sx(e(1,ie), nl, r(1,ie), nl, s(1,1,1,ie), s(1,2,2,ie)) end do - else + else select case (nl) case (14) call fdm_do_fast_sx_nl14(e, r, s, d, nelv) @@ -104,7 +104,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) nn = n**2 nnn = n**3 - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -117,7 +117,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -133,7 +133,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -141,7 +141,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) tmp = 0.0_rp do k = 1, n tmp = tmp + wrk2(i + nn * (k - 1), ie) * s(k, j, 1, 3, ie) - end do + end do e(jj,ie) = tmp end do end do @@ -150,7 +150,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -163,7 +163,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -179,7 +179,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -192,7 +192,7 @@ subroutine fdm_do_fast_sx_nl(e, r, s, d, nelv, n) end do end do end do - + end subroutine fdm_do_fast_sx_nl subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) @@ -206,7 +206,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -224,11 +224,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) & + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) & + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie) & - + s(i,14,2,1,ie) * r(14 + n * (j - 1), ie) + + s(i,14,2,1,ie) * r(14 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -261,12 +261,12 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + wrk(l + n * (13 - 1) + nn * (i - 1), ie) & * s(13,j,1,2,ie) & + wrk(l + n * (14 - 1) + nn * (i - 1), ie) & - * s(14,j,1,2,ie) + * s(14,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -284,7 +284,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) & + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) & + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie) & - + wrk2(i + nn * (14 - 1), ie) * s(14, j, 1, 3, ie) + + wrk2(i + nn * (14 - 1), ie) * s(14, j, 1, 3, ie) end do end do end do @@ -292,7 +292,7 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -310,11 +310,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) & + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) & + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie) & - + s(i,14,1,1,ie) * r(14 + n * (j - 1), ie) + + s(i,14,1,1,ie) * r(14 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -347,12 +347,12 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + wrk(l + n * (13 - 1) + nn * (i - 1), ie) & * s(13,j,2,2,ie) & + wrk(l + n * (14 - 1) + nn * (i - 1), ie) & - * s(14,j,2,2,ie) + * s(14,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -370,11 +370,11 @@ subroutine fdm_do_fast_sx_nl14(e, r, s, d, nelv) + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) & + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) & + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie) & - + wrk2(i + nn * (14 - 1), ie) * s(14, j, 2, 3, ie) + + wrk2(i + nn * (14 - 1), ie) * s(14, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl14 @@ -389,7 +389,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -406,11 +406,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) & + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) & + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) & - + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie) + + s(i,13,2,1,ie) * r(13 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -441,12 +441,12 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + wrk(l + n * (12 - 1) + nn * (i - 1), ie) & * s(12,j,1,2,ie) & + wrk(l + n * (13 - 1) + nn * (i - 1), ie) & - * s(13,j,1,2,ie) + * s(13,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -463,7 +463,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) & + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) & + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) & - + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie) + + wrk2(i + nn * (13 - 1), ie) * s(13, j, 1, 3, ie) end do end do end do @@ -471,7 +471,7 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -488,11 +488,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) & + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) & + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) & - + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie) + + s(i,13,1,1,ie) * r(13 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -523,12 +523,12 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + wrk(l + n * (12 - 1) + nn * (i - 1), ie) & * s(12,j,2,2,ie) & + wrk(l + n * (13 - 1) + nn * (i - 1), ie) & - * s(13,j,2,2,ie) + * s(13,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -545,11 +545,11 @@ subroutine fdm_do_fast_sx_nl13(e, r, s, d, nelv) + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) & + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) & + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) & - + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie) + + wrk2(i + nn * (13 - 1), ie) * s(13, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl13 @@ -564,7 +564,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -580,11 +580,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) & + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) & + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) & - + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) + + s(i,12,2,1,ie) * r(12 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -613,12 +613,12 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + wrk(l + n * (11 - 1) + nn * (i - 1), ie) & * s(11,j,1,2,ie) & + wrk(l + n * (12 - 1) + nn * (i - 1), ie) & - * s(12,j,1,2,ie) + * s(12,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -634,7 +634,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) & + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) & + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) & - + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) + + wrk2(i + nn * (12 - 1), ie) * s(12, j, 1, 3, ie) end do end do end do @@ -642,7 +642,7 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -658,11 +658,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) & + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) & + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) & - + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) + + s(i,12,1,1,ie) * r(12 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -691,12 +691,12 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + wrk(l + n * (11 - 1) + nn * (i - 1), ie) & * s(11,j,2,2,ie) & + wrk(l + n * (12 - 1) + nn * (i - 1), ie) & - * s(12,j,2,2,ie) + * s(12,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -712,11 +712,11 @@ subroutine fdm_do_fast_sx_nl12(e, r, s, d, nelv) + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) & + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) & + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) & - + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) + + wrk2(i + nn * (12 - 1), ie) * s(12, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl12 @@ -731,7 +731,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -746,11 +746,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) & + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) & + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) & - + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) + + s(i,11,2,1,ie) * r(11 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -777,12 +777,12 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + wrk(l + n * (10 - 1) + nn * (i - 1), ie) & * s(10,j,1,2,ie) & + wrk(l + n * (11 - 1) + nn * (i - 1), ie) & - * s(11,j,1,2,ie) + * s(11,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -797,7 +797,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) & + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) & + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) & - + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) + + wrk2(i + nn * (11 - 1), ie) * s(11, j, 1, 3, ie) end do end do end do @@ -805,7 +805,7 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -820,11 +820,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) & + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) & + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) & - + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) + + s(i,11,1,1,ie) * r(11 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -851,12 +851,12 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + wrk(l + n * (10 - 1) + nn * (i - 1), ie) & * s(10,j,2,2,ie) & + wrk(l + n * (11 - 1) + nn * (i - 1), ie) & - * s(11,j,2,2,ie) + * s(11,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -871,11 +871,11 @@ subroutine fdm_do_fast_sx_nl11(e, r, s, d, nelv) + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) & + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) & + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) & - + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) + + wrk2(i + nn * (11 - 1), ie) * s(11, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl11 @@ -890,7 +890,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -904,11 +904,11 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) & + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) & + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) & - + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) + + s(i,10,2,1,ie) * r(10 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -933,12 +933,12 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + wrk(l + n * (9 - 1) + nn * (i - 1), ie) & * s(9,j,1,2,ie) & + wrk(l + n * (10 - 1) + nn * (i - 1), ie) & - * s(10,j,1,2,ie) + * s(10,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -952,7 +952,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) & + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) & + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) & - + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) + + wrk2(i + nn * (10 - 1), ie) * s(10, j, 1, 3, ie) end do end do end do @@ -960,7 +960,7 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -974,11 +974,11 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) & + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) & + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) & - + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) + + s(i,10,1,1,ie) * r(10 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1003,12 +1003,12 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + wrk(l + n * (9 - 1) + nn * (i - 1), ie) & * s(9,j,2,2,ie) & + wrk(l + n * (10 - 1) + nn * (i - 1), ie) & - * s(10,j,2,2,ie) + * s(10,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1022,14 +1022,14 @@ subroutine fdm_do_fast_sx_nl10(e, r, s, d, nelv) + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) & + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) & + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) & - + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) + + wrk2(i + nn * (10 - 1), ie) * s(10, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl10 - + subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) integer, parameter :: n = 9 integer, parameter :: nn = n**2 @@ -1041,7 +1041,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1054,11 +1054,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) & + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) & + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) & - + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) + + s(i,9,2,1,ie) * r(9 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1081,12 +1081,12 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + wrk(l + n * (8 - 1) + nn * (i - 1), ie) & * s(8,j,1,2,ie) & + wrk(l + n * (9 - 1) + nn * (i - 1), ie) & - * s(9,j,1,2,ie) + * s(9,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1099,7 +1099,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) & + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) & + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) & - + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) + + wrk2(i + nn * (9 - 1), ie) * s(9, j, 1, 3, ie) end do end do end do @@ -1107,7 +1107,7 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1120,11 +1120,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) & + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) & + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) & - + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) + + s(i,9,1,1,ie) * r(9 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1147,12 +1147,12 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + wrk(l + n * (8 - 1) + nn * (i - 1), ie) & * s(8,j,2,2,ie) & + wrk(l + n * (9 - 1) + nn * (i - 1), ie) & - * s(9,j,2,2,ie) + * s(9,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1165,11 +1165,11 @@ subroutine fdm_do_fast_sx_nl9(e, r, s, d, nelv) + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) & + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) & + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) & - + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) + + wrk2(i + nn * (9 - 1), ie) * s(9, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl9 @@ -1184,7 +1184,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1196,11 +1196,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) & + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) & + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) & - + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) + + s(i,8,2,1,ie) * r(8 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1221,12 +1221,12 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + wrk(l + n * (7 - 1) + nn * (i - 1), ie) & * s(7,j,1,2,ie) & + wrk(l + n * (8 - 1) + nn * (i - 1), ie) & - * s(8,j,1,2,ie) + * s(8,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1238,7 +1238,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) & + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) & + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) & - + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) + + wrk2(i + nn * (8 - 1), ie) * s(8, j, 1, 3, ie) end do end do end do @@ -1246,7 +1246,7 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1258,11 +1258,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) & + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) & + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) & - + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) + + s(i,8,1,1,ie) * r(8 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1283,12 +1283,12 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + wrk(l + n * (7 - 1) + nn * (i - 1), ie) & * s(7,j,2,2,ie) & + wrk(l + n * (8 - 1) + nn * (i - 1), ie) & - * s(8,j,2,2,ie) + * s(8,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1300,11 +1300,11 @@ subroutine fdm_do_fast_sx_nl8(e, r, s, d, nelv) + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) & + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) & + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) & - + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) + + wrk2(i + nn * (8 - 1), ie) * s(8, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl8 @@ -1319,7 +1319,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1330,11 +1330,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) & + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) & + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) & - + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) + + s(i,7,2,1,ie) * r(7 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1353,12 +1353,12 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + wrk(l + n * (6 - 1) + nn * (i - 1), ie) & * s(6,j,1,2,ie) & + wrk(l + n * (7 - 1) + nn * (i - 1), ie) & - * s(7,j,1,2,ie) + * s(7,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1369,7 +1369,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) & + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) & + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) & - + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) + + wrk2(i + nn * (7 - 1), ie) * s(7, j, 1, 3, ie) end do end do end do @@ -1377,7 +1377,7 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1388,11 +1388,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) & + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) & + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) & - + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) + + s(i,7,1,1,ie) * r(7 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1411,12 +1411,12 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + wrk(l + n * (6 - 1) + nn * (i - 1), ie) & * s(6,j,2,2,ie) & + wrk(l + n * (7 - 1) + nn * (i - 1), ie) & - * s(7,j,2,2,ie) + * s(7,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1427,11 +1427,11 @@ subroutine fdm_do_fast_sx_nl7(e, r, s, d, nelv) + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) & + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) & + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) & - + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) + + wrk2(i + nn * (7 - 1), ie) * s(7, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl7 @@ -1446,7 +1446,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1456,11 +1456,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) & + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) & + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) & - + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) + + s(i,6,2,1,ie) * r(6 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1477,12 +1477,12 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + wrk(l + n * (5 - 1) + nn * (i - 1), ie) & * s(5,j,1,2,ie) & + wrk(l + n * (6 - 1) + nn * (i - 1), ie) & - * s(6,j,1,2,ie) + * s(6,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1492,7 +1492,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) & + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) & + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) & - + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) + + wrk2(i + nn * (6 - 1), ie) * s(6, j, 1, 3, ie) end do end do end do @@ -1500,7 +1500,7 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1510,11 +1510,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) & + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) & + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) & - + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) + + s(i,6,1,1,ie) * r(6 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1531,12 +1531,12 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + wrk(l + n * (5 - 1) + nn * (i - 1), ie) & * s(5,j,2,2,ie) & + wrk(l + n * (6 - 1) + nn * (i - 1), ie) & - * s(6,j,2,2,ie) + * s(6,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1546,11 +1546,11 @@ subroutine fdm_do_fast_sx_nl6(e, r, s, d, nelv) + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) & + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) & + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) & - + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) + + wrk2(i + nn * (6 - 1), ie) * s(6, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl6 @@ -1565,7 +1565,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1574,11 +1574,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) & + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) & + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) & - + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) + + s(i,5,2,1,ie) * r(5 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1593,12 +1593,12 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + wrk(l + n * (4 - 1) + nn * (i - 1), ie) & * s(4,j,1,2,ie) & + wrk(l + n * (5 - 1) + nn * (i - 1), ie) & - * s(5,j,1,2,ie) + * s(5,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1607,7 +1607,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) & + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) & + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) & - + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) + + wrk2(i + nn * (5 - 1), ie) * s(5, j, 1, 3, ie) end do end do end do @@ -1615,7 +1615,7 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1624,11 +1624,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) & + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) & + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) & - + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) + + s(i,5,1,1,ie) * r(5 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1643,12 +1643,12 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + wrk(l + n * (4 - 1) + nn * (i - 1), ie) & * s(4,j,2,2,ie) & + wrk(l + n * (5 - 1) + nn * (i - 1), ie) & - * s(5,j,2,2,ie) + * s(5,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1657,11 +1657,11 @@ subroutine fdm_do_fast_sx_nl5(e, r, s, d, nelv) + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) & + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) & + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) & - + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) + + wrk2(i + nn * (5 - 1), ie) * s(5, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl5 @@ -1676,7 +1676,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1684,11 +1684,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) & + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) & + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) & - + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) + + s(i,4,2,1,ie) * r(4 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1701,12 +1701,12 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) + wrk(l + n * (3 - 1) + nn * (i - 1), ie) & * s(3,j,1,2,ie) & + wrk(l + n * (4 - 1) + nn * (i - 1), ie) & - * s(4,j,1,2,ie) + * s(4,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1714,7 +1714,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) & + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) & + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) & - + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) + + wrk2(i + nn * (4 - 1), ie) * s(4, j, 1, 3, ie) end do end do end do @@ -1722,7 +1722,7 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv @@ -1730,11 +1730,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) & + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) & + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) & - + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) + + s(i,4,1,1,ie) * r(4 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1747,12 +1747,12 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) + wrk(l + n * (3 - 1) + nn * (i - 1), ie) & * s(3,j,2,2,ie) & + wrk(l + n * (4 - 1) + nn * (i - 1), ie) & - * s(4,j,2,2,ie) + * s(4,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv @@ -1760,11 +1760,11 @@ subroutine fdm_do_fast_sx_nl4(e, r, s, d, nelv) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) & + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) & + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) & - + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) + + wrk2(i + nn * (4 - 1), ie) * s(4, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl4 @@ -1779,18 +1779,18 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv ii = i + n * (j - 1) wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) & + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) & - + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) + + s(i,3,2,1,ie) * r(3 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1801,19 +1801,19 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv) + wrk(l + n * (2 - 1) + nn * (i - 1), ie) & * s(2,j,1,2,ie) & + wrk(l + n * (3 - 1) + nn * (i - 1), ie) & - * s(3,j,1,2,ie) + * s(3,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv jj = i + nn * (j - 1) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) & + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) & - + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) + + wrk2(i + nn * (3 - 1), ie) * s(3, j, 1, 3, ie) end do end do end do @@ -1821,18 +1821,18 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv ii = i + n * (j - 1) wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) & + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) & - + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) + + s(i,3,1,1,ie) * r(3 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1843,23 +1843,23 @@ subroutine fdm_do_fast_sx_nl3(e, r, s, d, nelv) + wrk(l + n * (2 - 1) + nn * (i - 1), ie) & * s(2,j,2,2,ie) & + wrk(l + n * (3 - 1) + nn * (i - 1), ie) & - * s(3,j,2,2,ie) + * s(3,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv jj = i + nn * (j - 1) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) & + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) & - + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) + + wrk2(i + nn * (3 - 1), ie) * s(3, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl3 @@ -1874,17 +1874,17 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv) real(kind=rp), intent(inout) :: d(n**3, nelv) real(kind=rp) :: wrk(n**3, nelv), wrk2(n**3, nelv) integer :: ie, i, j, l, ii, jj - + do j = 1, nn do i = 1, n do ie = 1, nelv ii = i + n * (j - 1) wrk(ii, ie) = s(i,1,2,1,ie) * r(1 + n * (j - 1), ie) & - + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) + + s(i,2,2,1,ie) * r(2 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1893,18 +1893,18 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv) wrk2(ii,ie) = wrk(l + n * (1 - 1) + nn * (i - 1), ie) & * s(1,j,1,2,ie) & + wrk(l + n * (2 - 1) + nn * (i - 1), ie) & - * s(2,j,1,2,ie) + * s(2,j,1,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv jj = i + nn * (j - 1) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 1, 3, ie) & - + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) + + wrk2(i + nn * (2 - 1), ie) * s(2, j, 1, 3, ie) end do end do end do @@ -1912,17 +1912,17 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv) do i = 1, nnn * nelv r(i,1) = d(i,1) * e(i,1) end do - + do j = 1, nn do i = 1, n do ie = 1, nelv ii = i + n * (j - 1) wrk(ii, ie) = s(i,1,1,1,ie) * r(1 + n * (j - 1), ie) & - + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) + + s(i,2,1,1,ie) * r(2 + n * (j - 1), ie) end do end do end do - + do i = 1, n do j = 1, n do l = 1, n @@ -1931,23 +1931,23 @@ subroutine fdm_do_fast_sx_nl2(e, r, s, d, nelv) wrk2(ii,ie) = wrk(l + n * (1 - 1) + nn * (i - 1), ie) & * s(1,j,2,2,ie) & + wrk(l + n * (2 - 1) + nn * (i - 1), ie) & - * s(2,j,2,2,ie) + * s(2,j,2,2,ie) end do end do end do end do - + do j = 1, n do i = 1, nn do ie = 1, nelv jj = i + nn * (j - 1) e(jj,ie) = wrk2(i + nn * (1 - 1), ie) * s(1, j, 2, 3, ie) & - + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) + + wrk2(i + nn * (2 - 1), ie) * s(2, j, 2, 3, ie) end do end do end do - + end subroutine fdm_do_fast_sx_nl2 - + end module fdm_sx diff --git a/src/math/bcknd/sx/opr_sx.f90 b/src/math/bcknd/sx/opr_sx.f90 index 075bce1bb84..dc5bd99e2b0 100644 --- a/src/math/bcknd/sx/opr_sx.f90 +++ b/src/math/bcknd/sx/opr_sx.f90 @@ -5,19 +5,19 @@ module opr_sx use sx_conv1 use sx_cdtp use sx_cfl + use sx_lambda2 use gather_scatter use num_types, only : rp use space, only : space_t use coefs, only : coef_t use math - use mesh, only : mesh_t use field, only : field_t use mathops implicit none private public :: opr_sx_dudxyz, opr_sx_opgrad, opr_sx_cdtp, opr_sx_conv1, & - opr_sx_curl, opr_sx_cfl + opr_sx_curl, opr_sx_cfl, opr_sx_lambda2 contains @@ -75,8 +75,8 @@ subroutine opr_sx_dudxyz(du, u, dr, ds, dt, coef) end subroutine opr_sx_dudxyz - subroutine opr_sx_opgrad(ux,uy,uz,u,coef) - type(coef_t), intent(in) :: coef + subroutine opr_sx_opgrad(ux,uy,uz,u,coef) + type(coef_t), intent(in) :: coef real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz @@ -286,7 +286,7 @@ subroutine opr_sx_cdtp(dtx,x,dr,ds,dt, coef) end subroutine opr_sx_cdtp - subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) + subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) type(space_t), intent(inout) :: Xh type(coef_t), intent(inout) :: coef integer, intent(in) :: nelv, gdim @@ -308,7 +308,7 @@ subroutine opr_sx_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & - coef%jacinv, nelv, gdim) + coef%jacinv, nelv, gdim) case(12) call sx_conv1_lx12(du, u, vx, vy, vz, Xh%dx, Xh%dy, Xh%dz, & coef%drdx, coef%dsdx, coef%dtdx, & @@ -425,9 +425,9 @@ subroutine opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) !! BC dependent, Needs to change if cyclic call opcolv(w1%x,w2%x,w3%x,c_Xh%B, gdim, n) - call c_Xh%gs_h%op(w1, GS_OP_ADD) - call c_Xh%gs_h%op(w2, GS_OP_ADD) - call c_Xh%gs_h%op(w3, GS_OP_ADD) + call c_Xh%gs_h%op(w1, GS_OP_ADD) + call c_Xh%gs_h%op(w2, GS_OP_ADD) + call c_Xh%gs_h%op(w3, GS_OP_ADD) call opcolv(w1%x,w2%x,w3%x,c_Xh%Binv, gdim, n) end subroutine opr_sx_curl @@ -440,107 +440,245 @@ function opr_sx_cfl(dt, u, v, w, Xh, coef, nelv, gdim) result(cfl) real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w real(kind=rp) :: cfl - select case(Xh%lx) - case (14) - cfl = sx_cfl_lx14(dt, u, v, w, & + select case(Xh%lx) + case (14) + cfl = sx_cfl_lx14(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (13) - cfl = sx_cfl_lx13(dt, u, v, w, & + case (13) + cfl = sx_cfl_lx13(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (12) - cfl = sx_cfl_lx12(dt, u, v, w, & + case (12) + cfl = sx_cfl_lx12(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (11) - cfl = sx_cfl_lx11(dt, u, v, w, & + case (11) + cfl = sx_cfl_lx11(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (10) - cfl = sx_cfl_lx10(dt, u, v, w, & + case (10) + cfl = sx_cfl_lx10(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (9) - cfl = sx_cfl_lx9(dt, u, v, w, & + case (9) + cfl = sx_cfl_lx9(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (8) - cfl = sx_cfl_lx8(dt, u, v, w, & + case (8) + cfl = sx_cfl_lx8(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (7) - cfl = sx_cfl_lx7(dt, u, v, w, & + case (7) + cfl = sx_cfl_lx7(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (6) - cfl = sx_cfl_lx6(dt, u, v, w, & + case (6) + cfl = sx_cfl_lx6(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (5) - cfl = sx_cfl_lx5(dt, u, v, w, & + case (5) + cfl = sx_cfl_lx5(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (4) - cfl = sx_cfl_lx4(dt, u, v, w, & + case (4) + cfl = sx_cfl_lx4(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (3) - cfl = sx_cfl_lx3(dt, u, v, w, & + case (3) + cfl = sx_cfl_lx3(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case (2) - cfl = sx_cfl_lx2(dt, u, v, w, & + case (2) + cfl = sx_cfl_lx2(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim) - case default - cfl = sx_cfl_lx(dt, u, v, w, & + case default + cfl = sx_cfl_lx(dt, u, v, w, & coef%drdx, coef%dsdx, coef%dtdx, & coef%drdy, coef%dsdy, coef%dtdy, & coef%drdz, coef%dsdz, coef%dtdz, & Xh%dr_inv, Xh%ds_inv, Xh%dt_inv, & coef%jacinv, nelv, gdim, Xh%lx) - end select + end select end function opr_sx_cfl + subroutine opr_sx_lambda2(lambda2, u, v, w, coef) + type(coef_t), intent(in) :: coef + type(field_t), intent(inout) :: lambda2 + type(field_t), intent(in) :: u, v, w + + associate(Xh => coef%Xh, msh => coef%msh) + select case(Xh%lx) + case (18) + call sx_lambda2_lx18(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (17) + call sx_lambda2_lx17(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (16) + call sx_lambda2_lx16(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (15) + call sx_lambda2_lx15(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (14) + call sx_lambda2_lx14(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (13) + call sx_lambda2_lx13(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (12) + call sx_lambda2_lx12(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (11) + call sx_lambda2_lx11(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (10) + call sx_lambda2_lx10(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (9) + call sx_lambda2_lx9(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (8) + call sx_lambda2_lx8(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (7) + call sx_lambda2_lx7(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (6) + call sx_lambda2_lx6(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (5) + call sx_lambda2_lx5(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (4) + call sx_lambda2_lx4(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (3) + call sx_lambda2_lx3(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case (2) + call sx_lambda2_lx2(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv) + case default + call sx_lambda2_lx(lambda2%x, u%x, v%x, w%x, & + Xh%dx, Xh%dy, Xh%dz, & + coef%drdx, coef%dsdx, coef%dtdx, & + coef%drdy, coef%dsdy, coef%dtdy, & + coef%drdz, coef%dsdz, coef%dtdz, & + Xh%w3, coef%B, msh%nelv, Xh%lx) + end select + end associate + + end subroutine opr_sx_lambda2 + end module opr_sx diff --git a/src/math/bcknd/sx/sx_cdtp.f90 b/src/math/bcknd/sx/sx_cdtp.f90 index b34208829f4..ab0e1d14853 100644 --- a/src/math/bcknd/sx/sx_cdtp.f90 +++ b/src/math/bcknd/sx/sx_cdtp.f90 @@ -48,7 +48,7 @@ subroutine sx_cdtp_lx(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd, lx) integer, intent(in) :: nel, nd, lx real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -108,7 +108,7 @@ subroutine sx_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -162,13 +162,13 @@ subroutine sx_cdtp_lx14(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) end do end subroutine sx_cdtp_lx14 - + subroutine sx_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, parameter :: lx = 13 integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -222,13 +222,13 @@ subroutine sx_cdtp_lx13(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) end do end subroutine sx_cdtp_lx13 - + subroutine sx_cdtp_lx12(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, parameter :: lx = 12 integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -288,7 +288,7 @@ subroutine sx_cdtp_lx11(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -348,7 +348,7 @@ subroutine sx_cdtp_lx10(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -408,7 +408,7 @@ subroutine sx_cdtp_lx9(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -468,7 +468,7 @@ subroutine sx_cdtp_lx8(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -528,7 +528,7 @@ subroutine sx_cdtp_lx7(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -588,7 +588,7 @@ subroutine sx_cdtp_lx6(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -648,7 +648,7 @@ subroutine sx_cdtp_lx5(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -708,7 +708,7 @@ subroutine sx_cdtp_lx4(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -768,7 +768,7 @@ subroutine sx_cdtp_lx3(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj @@ -828,7 +828,7 @@ subroutine sx_cdtp_lx2(dtx, x, dr, ds, dt, dxt, dyt, dzt, B, jac, nel, nd) integer, intent(in) :: nel, nd real(kind=rp), dimension(lx,lx,lx,nel), intent(inout) :: dtx real(kind=rp), dimension(lx,lx,lx,nel), intent(in) :: x, dr, ds, dt, jac, B - real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) + real(kind=rp), intent(in) :: dxt(lx,lx), dyt(lx,lx), dzt(lx,lx) real(kind=rp), dimension(lx,lx,lx,nel) :: wx, ta1 real(kind=rp) :: tmp integer :: e, i, j, k, kk, jj diff --git a/src/math/bcknd/sx/sx_cfl.f90 b/src/math/bcknd/sx/sx_cfl.f90 index 64518fc829b..7014497468e 100644 --- a/src/math/bcknd/sx/sx_cfl.f90 +++ b/src/math/bcknd/sx/sx_cfl.f90 @@ -53,7 +53,7 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl @@ -73,11 +73,11 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -86,7 +86,7 @@ function sx_cfl_lx(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & end do end function sx_cfl_lx - + function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & drdz, dsdz, dtdz, dr_inv, ds_inv, dt_inv, & jacinv,nelv, gdim) result(cfl) @@ -98,13 +98,13 @@ function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -118,11 +118,11 @@ function sx_cfl_lx14(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -143,13 +143,13 @@ function sx_cfl_lx13(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -163,11 +163,11 @@ function sx_cfl_lx13(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -188,13 +188,13 @@ function sx_cfl_lx12(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -208,11 +208,11 @@ function sx_cfl_lx12(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -233,13 +233,13 @@ function sx_cfl_lx11(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -253,11 +253,11 @@ function sx_cfl_lx11(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -278,13 +278,13 @@ function sx_cfl_lx10(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -298,11 +298,11 @@ function sx_cfl_lx10(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -323,13 +323,13 @@ function sx_cfl_lx9(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -343,11 +343,11 @@ function sx_cfl_lx9(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -368,13 +368,13 @@ function sx_cfl_lx8(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -388,11 +388,11 @@ function sx_cfl_lx8(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -413,13 +413,13 @@ function sx_cfl_lx7(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -433,11 +433,11 @@ function sx_cfl_lx7(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -458,13 +458,13 @@ function sx_cfl_lx6(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -478,11 +478,11 @@ function sx_cfl_lx6(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -503,13 +503,13 @@ function sx_cfl_lx5(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -523,11 +523,11 @@ function sx_cfl_lx5(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -548,13 +548,13 @@ function sx_cfl_lx4(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -568,11 +568,11 @@ function sx_cfl_lx4(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -593,13 +593,13 @@ function sx_cfl_lx3(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -613,11 +613,11 @@ function sx_cfl_lx3(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do @@ -638,13 +638,13 @@ function sx_cfl_lx2(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx), intent(in) :: dr_inv, ds_inv, dt_inv - real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv + real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv real(kind=rp) :: cflr, cfls, cflt, cflm real(kind=rp) :: ur, us, ut real(kind=rp) :: cfl integer :: i, j, k, e cfl = 0d0 - + do k = 1, lx do j = 1, lx do i = 1, lx @@ -658,11 +658,11 @@ function sx_cfl_lx2(dt, u, v, w, drdx, dsdx, dtdx, drdy, dsdy, dtdy, & ut = ( u(i,j,k,e)*dtdx(i,j,k,e) & + v(i,j,k,e)*dtdy(i,j,k,e) & + w(i,j,k,e)*dtdz(i,j,k,e) ) * jacinv(i,j,k,e) - + cflr = abs(dt*ur*dr_inv(i)) cfls = abs(dt*us*ds_inv(j)) cflt = abs(dt*ut*dt_inv(k)) - + cflm = cflr + cfls + cflt cfl = max(cfl,cflm) end do diff --git a/src/math/bcknd/sx/sx_conv1.f90 b/src/math/bcknd/sx/sx_conv1.f90 index 084cd4ad6a2..95230bfe623 100644 --- a/src/math/bcknd/sx/sx_conv1.f90 +++ b/src/math/bcknd/sx/sx_conv1.f90 @@ -53,15 +53,15 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -73,7 +73,7 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -88,7 +88,7 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -115,9 +115,9 @@ subroutine sx_conv1_lx(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx - + subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -129,15 +129,15 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -149,7 +149,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -164,7 +164,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -191,7 +191,7 @@ subroutine sx_conv1_lx14(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx14 subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & @@ -205,15 +205,15 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -225,7 +225,7 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -240,7 +240,7 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -267,9 +267,9 @@ subroutine sx_conv1_lx13(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx13 - + subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -281,15 +281,15 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -301,7 +301,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -316,7 +316,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -343,7 +343,7 @@ subroutine sx_conv1_lx12(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx12 subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & @@ -357,15 +357,15 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -377,7 +377,7 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -392,7 +392,7 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -419,9 +419,9 @@ subroutine sx_conv1_lx11(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx11 - + subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -433,15 +433,15 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -453,7 +453,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -468,7 +468,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -495,7 +495,7 @@ subroutine sx_conv1_lx10(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx10 subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & @@ -509,15 +509,15 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -529,7 +529,7 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -544,7 +544,7 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -571,9 +571,9 @@ subroutine sx_conv1_lx9(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx9 - + subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -585,15 +585,15 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -605,7 +605,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -620,7 +620,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -647,7 +647,7 @@ subroutine sx_conv1_lx8(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx8 subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & @@ -661,15 +661,15 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -681,7 +681,7 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -696,7 +696,7 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -723,9 +723,9 @@ subroutine sx_conv1_lx7(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx7 - + subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -737,15 +737,15 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -757,7 +757,7 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -772,7 +772,7 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -799,9 +799,9 @@ subroutine sx_conv1_lx6(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx6 - + subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, & jacinv, nelv, gdim) @@ -813,15 +813,15 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -833,7 +833,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -848,7 +848,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -875,7 +875,7 @@ subroutine sx_conv1_lx5(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx5 subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & @@ -889,15 +889,15 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -909,7 +909,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -924,7 +924,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -951,7 +951,7 @@ subroutine sx_conv1_lx4(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx4 subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & @@ -965,15 +965,15 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -985,7 +985,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1000,7 +1000,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & do j=1, lx do i=1, lx do k=1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk=1, lx @@ -1027,7 +1027,7 @@ subroutine sx_conv1_lx3(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx3 subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & @@ -1041,15 +1041,15 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdy, dsdy, dtdy real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: drdz, dsdz, dtdz real(kind=rp), dimension(lx,lx,lx,nelv), intent(in) :: jacinv - real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nelv) :: dudr real(kind=rp), dimension(lx,lx,lx,nelv) :: duds real(kind=rp), dimension(lx,lx,lx,nelv) :: dudt real(kind=rp) :: wr, ws, wt - integer :: e, i, j, k, jj, kk + integer :: e, i, j, k, jj, kk do i = 1, lx - do jj = 1, lx * lx * nelv + do jj = 1, lx * lx * nelv wr = 0d0 do kk = 1, lx wr = wr + dx(i,kk)*u(kk,jj,1,1) @@ -1061,7 +1061,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nelv + do e = 1, nelv ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1076,7 +1076,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & do j = 1, lx do i = 1, lx do k = 1, lx - do e = 1, nelv + do e = 1, nelv wt = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1103,7 +1103,7 @@ subroutine sx_conv1_lx2(du, u, vx, vy, vz, dx, dy, dz, & + dsdz(i,1,1,1)*duds(i,1,1,1) & + dtdz(i,1,1,1)*dudt(i,1,1,1))) end do - + end subroutine sx_conv1_lx2 - + end module sx_conv1 diff --git a/src/math/bcknd/sx/sx_dudxyz.f90 b/src/math/bcknd/sx/sx_dudxyz.f90 index 787f831adea..ccdcedc868b 100644 --- a/src/math/bcknd/sx/sx_dudxyz.f90 +++ b/src/math/bcknd/sx/sx_dudxyz.f90 @@ -52,7 +52,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -70,7 +70,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -102,7 +102,7 @@ subroutine sx_dudxyz_lx(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd, lx) call addcol3(du, drst, dt, nd) call col2 (du, jacinv, nd) end subroutine sx_dudxyz_lx - + subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) integer, parameter :: lx = 14 integer, intent(in) :: nel, nd @@ -112,7 +112,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -130,7 +130,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -162,7 +162,7 @@ subroutine sx_dudxyz_lx14(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) call addcol3(du, drst, dt, nd) call col2 (du, jacinv, nd) end subroutine sx_dudxyz_lx14 - + subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) integer, parameter :: lx = 13 integer, intent(in) :: nel, nd @@ -172,7 +172,7 @@ subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -190,7 +190,7 @@ subroutine sx_dudxyz_lx13(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -232,7 +232,7 @@ subroutine sx_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -250,7 +250,7 @@ subroutine sx_dudxyz_lx12(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -292,7 +292,7 @@ subroutine sx_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -310,7 +310,7 @@ subroutine sx_dudxyz_lx11(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -352,7 +352,7 @@ subroutine sx_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -370,7 +370,7 @@ subroutine sx_dudxyz_lx10(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -412,7 +412,7 @@ subroutine sx_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -430,7 +430,7 @@ subroutine sx_dudxyz_lx9(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -472,7 +472,7 @@ subroutine sx_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -490,7 +490,7 @@ subroutine sx_dudxyz_lx8(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -532,7 +532,7 @@ subroutine sx_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -550,7 +550,7 @@ subroutine sx_dudxyz_lx7(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -592,7 +592,7 @@ subroutine sx_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -610,7 +610,7 @@ subroutine sx_dudxyz_lx6(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -652,7 +652,7 @@ subroutine sx_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -670,7 +670,7 @@ subroutine sx_dudxyz_lx5(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -712,7 +712,7 @@ subroutine sx_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -730,7 +730,7 @@ subroutine sx_dudxyz_lx4(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -772,7 +772,7 @@ subroutine sx_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -790,7 +790,7 @@ subroutine sx_dudxyz_lx3(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -832,7 +832,7 @@ subroutine sx_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,nel) :: drst integer :: e, k - integer :: i, j, jj, kk + integer :: i, j, jj, kk real(kind=rp) :: wr, ws, wt do i = 1, lx @@ -850,7 +850,7 @@ subroutine sx_dudxyz_lx2(du, u, dr, ds, dt, dx, dy, dz, jacinv, nel, nd) do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, nel + do e = 1, nel ws = 0d0 !NEC$ unroll_completely do kk = 1, lx diff --git a/src/math/bcknd/sx/sx_lambda2.f90 b/src/math/bcknd/sx/sx_lambda2.f90 new file mode 100644 index 00000000000..eb49aa00fec --- /dev/null +++ b/src/math/bcknd/sx/sx_lambda2.f90 @@ -0,0 +1,3282 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Lambda2 kernels for SX-Aurora +module sx_lambda2 + use num_types, only : rp + use math, only : pi + implicit none + +contains + + subroutine sx_lambda2_lx(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n, lx) + integer, intent(in) :: n, lx + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx + + subroutine sx_lambda2_lx18(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 18 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx18 + + subroutine sx_lambda2_lx17(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 17 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx17 + + subroutine sx_lambda2_lx16(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 16 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx16 + + subroutine sx_lambda2_lx15(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 15 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx15 + + subroutine sx_lambda2_lx14(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 14 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx14 + + subroutine sx_lambda2_lx13(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 13 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx13 + + subroutine sx_lambda2_lx12(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 12 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx12 + + subroutine sx_lambda2_lx11(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 11 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx11 + + subroutine sx_lambda2_lx10(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 10 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx10 + + subroutine sx_lambda2_lx9(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 9 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx9 + + subroutine sx_lambda2_lx8(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 8 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx8 + + subroutine sx_lambda2_lx7(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 7 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx7 + + subroutine sx_lambda2_lx6(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 6 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx6 + + subroutine sx_lambda2_lx5(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 5 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx5 + + subroutine sx_lambda2_lx4(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 4 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx4 + + subroutine sx_lambda2_lx3(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 3 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx3 + + subroutine sx_lambda2_lx2(lambda2, u, v, w, dx, dy, dz, & + drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, cB, n) + integer, parameter :: lx = 2 + integer, intent(in) :: n + real(kind=rp), dimension(lx,lx,lx,n), intent(inout) :: lambda2 + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: u + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: v + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: w + real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: cB + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 + real(kind=rp) :: grad(lx*lx*lx,3,3) + integer :: temp_indices(9), ind_sort(3) + real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 + real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 + real(kind=rp) :: a11, a22, a33, a12, a13, a23 + real(kind=rp) :: msk1, msk2, msk3 + real(kind=rp) :: ur(lx,lx,lx) + real(kind=rp) :: vr(lx,lx,lx) + real(kind=rp) :: wr(lx,lx,lx) + real(kind=rp) :: us(lx,lx,lx) + real(kind=rp) :: vs(lx,lx,lx) + real(kind=rp) :: ws(lx,lx,lx) + real(kind=rp) :: ut(lx,lx,lx) + real(kind=rp) :: vt(lx,lx,lx) + real(kind=rp) :: wt(lx,lx,lx) + real(kind=rp) :: tmp1, tmp2, tmp3 + integer :: e, i, j, k, l + + do e = 1, n + do j = 1, lx * lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do k = 1, lx + tmp1 = tmp1 + dx(i,k) * u(k,j,1,e) + tmp2 = tmp2 + dx(i,k) * v(k,j,1,e) + tmp3 = tmp3 + dx(i,k) * w(k,j,1,e) + end do + ur(i,j,1) = tmp1 + vr(i,j,1) = tmp2 + wr(i,j,1) = tmp3 + end do + end do + + do k = 1, lx + do j = 1, lx + do i = 1, lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dy(j,l) * u(i,l,k,e) + tmp2 = tmp2 + dy(j,l) * v(i,l,k,e) + tmp3 = tmp3 + dy(j,l) * w(i,l,k,e) + end do + us(i,j,k) = tmp1 + vs(i,j,k) = tmp2 + ws(i,j,k) = tmp3 + end do + end do + end do + + do k = 1, lx + do i = 1, lx*lx + tmp1 = 0.0_rp + tmp2 = 0.0_rp + tmp3 = 0.0_rp + do l = 1, lx + tmp1 = tmp1 + dz(k,l) * u(i,1,l,e) + tmp2 = tmp2 + dz(k,l) * v(i,1,l,e) + tmp3 = tmp3 + dz(k,l) * w(i,1,l,e) + end do + ut(i,1,k) = tmp1 + vt(i,1,k) = tmp2 + wt(i,1,k) = tmp3 + end do + end do + + do i = 1, lx * lx * lx + grad(1,1,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * ur(i,1,1) & + + dsdx(i,1,1,e) * us(i,1,1) & + + dtdx(i,1,1,e) * ut(i,1,1) ) + grad(1,1,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * us(i,1,1) & + + drdy(i,1,1,e) * ur(i,1,1) & + + dtdy(i,1,1,e) * ut(i,1,1) ) + grad(1,1,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * ut(i,1,1) & + + drdz(i,1,1,e) * ur(i,1,1) & + + dsdz(i,1,1,e) * us(i,1,1) ) + + grad(1,2,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * vr(i,1,1) & + + dsdx(i,1,1,e) * vs(i,1,1) & + + dtdx(i,1,1,e) * vt(i,1,1) ) + grad(1,2,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * vs(i,1,1) & + + drdy(i,1,1,e) * vr(i,1,1) & + + dtdy(i,1,1,e) * vt(i,1,1) ) + grad(1,2,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * vt(i,1,1) & + + drdz(i,1,1,e) * vr(i,1,1) & + + dsdz(i,1,1,e) * vs(i,1,1) ) + + grad(1,3,1) = w3(i,1,1) & + * ( drdx(i,1,1,e) * wr(i,1,1) & + + dsdx(i,1,1,e) * ws(i,1,1) & + + dtdx(i,1,1,e) * wt(i,1,1) ) + grad(1,3,2) = w3(i,1,1) & + * ( dsdy(i,1,1,e) * ws(i,1,1) & + + drdy(i,1,1,e) * wr(i,1,1) & + + dtdy(i,1,1,e) * wt(i,1,1) ) + grad(1,3,3) = w3(i,1,1) & + * ( dtdz(i,1,1,e) * wt(i,1,1) & + + drdz(i,1,1,e) * wr(i,1,1) & + + dsdz(i,1,1,e) * ws(i,1,1) ) + end do + + + do i = 1, lx * lx * lx + s11 = grad(i,1,1) + s22 = grad(i,2,2) + s33 = grad(i,3,3) + + + s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) + s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) + s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) + + o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) + o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) + o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) + + a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 + a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 + a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 + + a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 + a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 + a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 + + + B = -(a11 + a22 + a33) + C = -(a12*a12 + a13*a13 + a23*a23 & + - a11 * a22 - a11 * a33 - a22 * a33) + D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & + - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) + + + q = (3.0 * C - B*B) / 9.0 + r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 + theta = acos( r / sqrt(-q*q*q) ) + + eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 + eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 + eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 + + msk1 = merge(1.0_rp, 0.0_rp, eigen(2) .le. eigen(1) & + .and. eigen(1) .le. eigen(3) .or. eigen(3) & + .le. eigen(1) .and. eigen(1) .le. eigen(2) ) + msk2 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(2) & + .and. eigen(2) .le. eigen(3) .or. eigen(3) & + .le. eigen(2) .and. eigen(2) .le. eigen(1)) + msk3 = merge(1.0_rp, 0.0_rp, eigen(1) .le. eigen(3) & + .and. eigen(3) .le. eigen(2) .or. eigen(2) & + .le. eigen(3) .and. eigen(3) .le. eigen(1)) + + l2 = msk1 * eigen(1) + msk2 * eigen(2) + msk3 * eigen(3) + + lambda2(i,1,1,e) = l2/(cB(i,1,1,e)**2) + end do + end do + end subroutine sx_lambda2_lx2 + +end module sx_lambda2 + + diff --git a/src/math/bcknd/sx/sx_opgrad.f90 b/src/math/bcknd/sx/sx_opgrad.f90 index 94a4f17f941..d08e1ce7896 100644 --- a/src/math/bcknd/sx/sx_opgrad.f90 +++ b/src/math/bcknd/sx/sx_opgrad.f90 @@ -45,8 +45,8 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -62,11 +62,11 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -92,7 +92,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -104,7 +104,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -112,7 +112,7 @@ subroutine sx_opgrad_lx(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx - + subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 18 @@ -122,8 +122,8 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -139,11 +139,11 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -169,7 +169,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -181,7 +181,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -189,7 +189,7 @@ subroutine sx_opgrad_lx18(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx18 - + subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 17 @@ -199,8 +199,8 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -216,11 +216,11 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -246,7 +246,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -258,7 +258,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -266,7 +266,7 @@ subroutine sx_opgrad_lx17(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx17 - + subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 16 @@ -276,8 +276,8 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -293,11 +293,11 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -323,7 +323,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -335,7 +335,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -343,7 +343,7 @@ subroutine sx_opgrad_lx16(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx16 - + subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 15 @@ -353,8 +353,8 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -370,11 +370,11 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -400,7 +400,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -412,7 +412,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -420,7 +420,7 @@ subroutine sx_opgrad_lx15(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx15 - + subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 14 @@ -430,8 +430,8 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -447,11 +447,11 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -477,7 +477,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -489,7 +489,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -497,7 +497,7 @@ subroutine sx_opgrad_lx14(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx14 - + subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 13 @@ -507,8 +507,8 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -524,11 +524,11 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -554,7 +554,7 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -566,7 +566,7 @@ subroutine sx_opgrad_lx13(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -584,8 +584,8 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -601,11 +601,11 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1,lx @@ -631,7 +631,7 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -643,7 +643,7 @@ subroutine sx_opgrad_lx12(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -661,8 +661,8 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -678,11 +678,11 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -708,7 +708,7 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -720,7 +720,7 @@ subroutine sx_opgrad_lx11(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -738,8 +738,8 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -755,11 +755,11 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -785,7 +785,7 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -797,7 +797,7 @@ subroutine sx_opgrad_lx10(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -815,8 +815,8 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx,lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -832,11 +832,11 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -862,7 +862,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -874,7 +874,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -882,7 +882,7 @@ subroutine sx_opgrad_lx9(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx9 - + subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 8 @@ -892,8 +892,8 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -909,11 +909,11 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -939,7 +939,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -951,7 +951,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -960,7 +960,7 @@ subroutine sx_opgrad_lx8(ux, uy, uz, u, dx, dy, dz, & end do end subroutine sx_opgrad_lx8 - subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & + subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 7 integer, intent(in) :: n @@ -969,8 +969,8 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -986,11 +986,11 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1016,7 +1016,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1028,7 +1028,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1036,7 +1036,7 @@ subroutine sx_opgrad_lx7(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx7 - + subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 6 @@ -1046,8 +1046,8 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -1063,11 +1063,11 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1093,7 +1093,7 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1105,7 +1105,7 @@ subroutine sx_opgrad_lx6(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1123,8 +1123,8 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -1140,11 +1140,11 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1170,7 +1170,7 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1182,7 +1182,7 @@ subroutine sx_opgrad_lx5(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1200,8 +1200,8 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -1217,11 +1217,11 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1247,7 +1247,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1259,7 +1259,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1267,7 +1267,7 @@ subroutine sx_opgrad_lx4(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx4 - + subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & drdx, dsdx, dtdx, drdy, dsdy, dtdy, drdz, dsdz, dtdz, w3, n) integer, parameter :: lx = 3 @@ -1277,8 +1277,8 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -1294,11 +1294,11 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1324,7 +1324,7 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1336,7 +1336,7 @@ subroutine sx_opgrad_lx3(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1354,8 +1354,8 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & real(kind=rp), dimension(lx, lx), intent(in) :: dx, dy, dz real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdx, dsdx, dtdx real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdy, dsdy, dtdy - real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz - real(kind=rp), dimension(lx, lx), intent(in) :: w3(lx,lx,lx) + real(kind=rp), dimension(lx,lx,lx,n), intent(in) :: drdz, dsdz, dtdz + real(kind=rp), dimension(lx,lx,lx), intent(in) :: w3 real(kind=rp) :: ur(lx,lx,lx, n) real(kind=rp) :: us(lx,lx,lx, n) real(kind=rp) :: ut(lx,lx,lx, n) @@ -1371,11 +1371,11 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & ur(i,jj,1,1) = wr end do end do - + do k = 1, lx do i = 1, lx do j = 1, lx - do e = 1, n + do e = 1, n ws = 0d0 !NEC$ unroll_completely do kk = 1, lx @@ -1401,7 +1401,7 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & end do end do end do - + do i = 1, lx * lx * lx do e = 1, n ux(i,1,1,e) = w3(i,1,1) * & @@ -1413,7 +1413,7 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & ( dsdy(i,1,1,e) * us(i,1,1,e) & + drdy(i,1,1,e) * ur(i,1,1,e) & + dtdy(i,1,1,e) * ut(i,1,1,e) ) - + uz(i,1,1,e) = w3(i,1,1) * & ( dtdz(i,1,1,e) * ut(i,1,1,e) & + drdz(i,1,1,e) * ur(i,1,1,e) & @@ -1421,5 +1421,5 @@ subroutine sx_opgrad_lx2(ux, uy, uz, u, dx, dy, dz, & end do end do end subroutine sx_opgrad_lx2 - + end module sx_opgrad diff --git a/src/math/bcknd/sx/tensor_sx.f90 b/src/math/bcknd/sx/tensor_sx.f90 index 94a005693bd..beca41e214a 100644 --- a/src/math/bcknd/sx/tensor_sx.f90 +++ b/src/math/bcknd/sx/tensor_sx.f90 @@ -17,9 +17,9 @@ subroutine tnsr2d_el_sx(v, nv, u, nu, A, Bt) call mxm(A, nv, u, nu, work, nu) call mxm(work, nv, Bt, nu, v, nv) - + end subroutine tnsr2d_el_sx - + subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct) integer, intent(in) :: nv, nu real(kind=rp), intent(inout) :: v(nv*nv*nv), u(nu*nu*nu) @@ -29,7 +29,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct) integer :: i, j, k, l, nunu, nvnu, nvnv integer :: ii, jj nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv do j = 1, nunu @@ -42,7 +42,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct) work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -56,7 +56,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) @@ -68,7 +68,7 @@ subroutine tnsr3d_el_sx(v, nv, u, nu, A, Bt, Ct) v(jj) = tmp end do end do - + end subroutine tnsr3d_el_sx subroutine tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv) @@ -95,7 +95,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv) integer :: nunu, nvnu, nvnv nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv do ie = 1, nelv @@ -109,7 +109,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv) work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -123,7 +123,7 @@ subroutine tnsr3d_nvnu_sx(v, nv, u, nu, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) @@ -150,18 +150,18 @@ subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv) real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv) real(kind=rp) :: work(nu**2*nv,nelv), work2(nu*nv**2,nelv), tmp integer :: ie, i, j, k, l, ii, jj - + do j = 1, nunu do i = 1, nv do ie = 1, nelv ii = i + nv * (j - 1) work(ii, ie) = A(i,1) * u(1 + nu * (j - 1), ie) & - + A(i,2) * u(2 + nu * (j - 1), ie) + + A(i,2) * u(2 + nu * (j - 1), ie) end do end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -178,13 +178,13 @@ subroutine tnsr3d_nu2nv4_sx(v, u, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv do ie = 1, nelv jj = i + nvnv * (j - 1) v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) & - + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) + + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) end do end do end do @@ -203,15 +203,15 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv) nvnu = nv * nu nvnv = nv * nv - + do j = 1, nunu do i = 1, nv - do ie = 1, nelv + do ie = 1, nelv ii = i + nv * (j - 1) work(ii, ie) = A(i,1) * u(1 + nu * (j - 1), ie) & + A(i,2) * u(2 + nu * (j - 1), ie) & + A(i,3) * u(3 + nu * (j - 1), ie) & - + A(i,4) * u(4 + nu * (j - 1), ie) + + A(i,4) * u(4 + nu * (j - 1), ie) end do end do end do @@ -219,7 +219,7 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv) do i = 1, nu do j = 1, nv do l = 1, nv - do ie = 1, nelv + do ie = 1, nelv ii = l + nv * (j - 1) + nvnv * (i - 1) tmp = 0.0_rp !NEC$ unroll_completely @@ -232,15 +232,15 @@ subroutine tnsr3d_nu4_sx(v, nv, u, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv - do ie = 1, nelv + do ie = 1, nelv jj = i + nvnv * (j - 1) v(jj, ie) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) & + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) & + work2(i + nvnv * (3 - 1),ie) * Ct(3, j) & - + work2(i + nvnv * (4 - 1),ie) * Ct(4, j) + + work2(i + nvnv * (4 - 1),ie) * Ct(4, j) end do end do end do @@ -258,7 +258,7 @@ subroutine tnsr1_3d_sx(v, nv, nu, A, Bt, Ct, nelv) else call tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) end if - + end subroutine tnsr1_3d_sx subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) @@ -272,26 +272,26 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) real(kind=rp) :: tmp nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv - + e0 = 1 es = 1 ee = nelv - + if (nv.gt.nu) then e0 = nelv es = -1 ee = 1 endif - + nu3 = nu**3 nv3 = nv**3 - + do e = e0,ee,es iu = (e-1)*nu3 iv = (e-1)*nv3 - + do j = 1, nunu do i = 1, nv ii = i + nv * (j - 1) @@ -303,7 +303,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) work(ii) = tmp end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv @@ -317,7 +317,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv jj = i + nvnv * (j - 1) + iv @@ -330,7 +330,7 @@ subroutine tnsr1_3d_nvnu_sx(v, nv, nu, A, Bt, Ct, nelv) end do end do end do - + end subroutine tnsr1_3d_nvnu_sx subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv) @@ -361,14 +361,14 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv) end do end do end do - + do i = 1, nu do j = 1, nv do l = 1, nv do ie = 1, nelv ii = l + nv * (j - 1) + nvnv * (i - 1) tmp = 0.0_rp - !NEC$ unroll_completely + !NEC$ unroll_completely do k = 1, nu jj = l + nv * (k - 1) + nvnu * (i - 1) tmp = tmp + work(jj,ie) * Bt(k,j) @@ -378,7 +378,7 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv) end do end do end do - + do j = 1, nv do i = 1, nvnv do ie = 1, nelv @@ -387,11 +387,11 @@ subroutine tnsr1_3d_nu4nv2_sx(v, A, Bt, Ct, nelv) v(jj) = work2(i + nvnv * (1 - 1),ie) * Ct(1, j) & + work2(i + nvnv * (2 - 1),ie) * Ct(2, j) & + work2(i + nvnv * (3 - 1),ie) * Ct(3, j) & - + work2(i + nvnv * (4 - 1),ie) * Ct(4, j) + + work2(i + nvnv * (4 - 1),ie) * Ct(4, j) end do end do end do - + end subroutine tnsr1_3d_nu4nv2_sx - + end module tensor_sx diff --git a/src/math/bcknd/xsmm/ax_helm_xsmm.F90 b/src/math/bcknd/xsmm/ax_helm_xsmm.F90 index 198ab6e1a47..38b39d2599e 100644 --- a/src/math/bcknd/xsmm/ax_helm_xsmm.F90 +++ b/src/math/bcknd/xsmm/ax_helm_xsmm.F90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! module ax_helm_xsmm @@ -62,7 +62,6 @@ module ax_helm_xsmm use num_types, only : rp use coefs, only : coef_t use space, only : space_t - use field, only : field_t use mesh, only : mesh_t use mxm_wrapper use num_types @@ -71,21 +70,21 @@ module ax_helm_xsmm #endif implicit none private - + type, public, extends(ax_t) :: ax_helm_xsmm_t contains procedure, nopass :: compute => ax_helm_xsmm_compute end type ax_helm_xsmm_t -contains - +contains + subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh) type(mesh_t), intent(inout) :: msh type(space_t), intent(inout) :: Xh type(coef_t), intent(inout) :: coef real(kind=rp), intent(inout) :: w(Xh%lx, Xh%ly, Xh%lz, msh%nelv) real(kind=rp), intent(inout) :: u(Xh%lx, Xh%ly, Xh%lz, msh%nelv) - + real(kind=rp) :: dudr(Xh%lx,Xh%ly,Xh%lz) real(kind=rp) :: duds(Xh%lx,Xh%ly,Xh%lz) real(kind=rp) :: dudt(Xh%lx,Xh%ly,Xh%lz) @@ -119,56 +118,56 @@ subroutine ax_helm_xsmm_compute(w, u, coef, msh, Xh) ax_helm_xsmm_lx = Xh%lx end if - + do e = 1, msh%nelv if(msh%gdim .eq. 2) then - call mxm(Xh%dx, Xh%lx,u(1,1,1,e), Xh%lx, dudr, lyz) - call mxm(u(1,1,1,e), Xh%lx, Xh%dyt, Xh%ly, duds, Xh%ly) - call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz) - call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz) - if (msh%dfrmd_el(e)) then - call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz) - call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz) - end if - call col2(tmp1, coef%h1(1,1,1,e), lxyz) - call col2(tmp2, coef%h1(1,1,1,e), lxyz) - call mxm(Xh%dxt, Xh%lx, tmp1, Xh%lx, tm1, lyz) - call mxm(tmp2, Xh%lx, Xh%dy, Xh%ly, tm2, Xh%ly) - call add3(w(1,1,1,e), tm1, tm2, lxyz) + call mxm(Xh%dx, Xh%lx,u(1,1,1,e), Xh%lx, dudr, lyz) + call mxm(u(1,1,1,e), Xh%lx, Xh%dyt, Xh%ly, duds, Xh%ly) + call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz) + call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz) + if (msh%dfrmd_el(e)) then + call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz) + call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz) + end if + call col2(tmp1, coef%h1(1,1,1,e), lxyz) + call col2(tmp2, coef%h1(1,1,1,e), lxyz) + call mxm(Xh%dxt, Xh%lx, tmp1, Xh%lx, tm1, lyz) + call mxm(tmp2, Xh%lx, Xh%dy, Xh%ly, tm2, Xh%ly) + call add3(w(1,1,1,e), tm1, tm2, lxyz) - ! 3D evaluation! - else - call libxsmm_mmcall(ax_helm_xmm1, Xh%dx, u(1,1,1,e), dudr) - do k = 1,Xh%lz - call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), Xh%dyt, duds(1,1,k)) - end do - call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), Xh%dzt, dudt) - call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz) - call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz) - call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz) - if (msh%dfrmd_el(e)) then - call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz) - call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz) - call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz) - call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz) - call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz) - call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz) - end if - call col2(tmp1, coef%h1(1,1,1,e), lxyz) - call col2(tmp2, coef%h1(1,1,1,e), lxyz) - call col2(tmp3, coef%h1(1,1,1,e), lxyz) - call libxsmm_mmcall(ax_helm_xmm1, Xh%dxt, tmp1, tm1) - do k = 1,Xh%lz - call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), Xh%dy, tm2(1,1,k)) - end do - call libxsmm_mmcall(ax_helm_xmm3, tmp3, Xh%dz, tm3) - call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz) - end if + ! 3D evaluation! + else + call libxsmm_mmcall(ax_helm_xmm1, Xh%dx, u(1,1,1,e), dudr) + do k = 1,Xh%lz + call libxsmm_mmcall(ax_helm_xmm2, u(1,1,k,e), Xh%dyt, duds(1,1,k)) + end do + call libxsmm_mmcall(ax_helm_xmm3, u(1,1,1,e), Xh%dzt, dudt) + call col3(tmp1, dudr, coef%G11(1,1,1,e), lxyz) + call col3(tmp2, duds, coef%G22(1,1,1,e), lxyz) + call col3(tmp3, dudt, coef%G33(1,1,1,e), lxyz) + if (msh%dfrmd_el(e)) then + call addcol3(tmp1, duds, coef%G12(1,1,1,e), lxyz) + call addcol3(tmp1, dudt, coef%G13(1,1,1,e), lxyz) + call addcol3(tmp2, dudr, coef%G12(1,1,1,e), lxyz) + call addcol3(tmp2, dudt, coef%G23(1,1,1,e), lxyz) + call addcol3(tmp3, dudr, coef%G13(1,1,1,e), lxyz) + call addcol3(tmp3, duds, coef%G23(1,1,1,e), lxyz) + end if + call col2(tmp1, coef%h1(1,1,1,e), lxyz) + call col2(tmp2, coef%h1(1,1,1,e), lxyz) + call col2(tmp3, coef%h1(1,1,1,e), lxyz) + call libxsmm_mmcall(ax_helm_xmm1, Xh%dxt, tmp1, tm1) + do k = 1,Xh%lz + call libxsmm_mmcall(ax_helm_xmm2, tmp2(1,1,k), Xh%dy, tm2(1,1,k)) + end do + call libxsmm_mmcall(ax_helm_xmm3, tmp3, Xh%dz, tm3) + call add4(w(1,1,1,e), tm1, tm2, tm3, lxyz) + end if end do - + if (coef%ifh2) call addcol4 (w,coef%h2,coef%B,u,coef%dof%n_dofs) #endif end subroutine ax_helm_xsmm_compute - + end module ax_helm_xsmm diff --git a/src/math/bcknd/xsmm/fdm_xsmm.f90 b/src/math/bcknd/xsmm/fdm_xsmm.f90 index dfe6bbe0c04..74a3e36075b 100644 --- a/src/math/bcknd/xsmm/fdm_xsmm.f90 +++ b/src/math/bcknd/xsmm/fdm_xsmm.f90 @@ -5,13 +5,13 @@ module fdm_xsmm implicit none contains - + subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv) integer, intent(in) :: nl, nelv, ldim real(kind=rp), intent(inout) :: e(nl**ldim, nelv) real(kind=rp), intent(inout) :: r(nl**ldim, nelv) real(kind=rp), intent(inout) :: s(nl*nl,2,ldim, nelv) - real(kind=rp), intent(inout) :: d(nl**ldim, nelv) + real(kind=rp), intent(inout) :: d(nl**ldim, nelv) integer :: ie, nn, i nn = nl**ldim @@ -35,5 +35,5 @@ subroutine fdm_do_fast_xsmm(e, r, s, d, nl, ldim, nelv) end do end if end subroutine fdm_do_fast_xsmm - + end module fdm_xsmm diff --git a/src/math/bcknd/xsmm/opr_xsmm.F90 b/src/math/bcknd/xsmm/opr_xsmm.F90 index 4eaefc8d873..c2e00a061ea 100644 --- a/src/math/bcknd/xsmm/opr_xsmm.F90 +++ b/src/math/bcknd/xsmm/opr_xsmm.F90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Operators libxsmm backend @@ -77,11 +77,11 @@ module opr_xsmm public :: opr_xsmm_dudxyz, opr_xsmm_opgrad, opr_xsmm_cdtp, opr_xsmm_conv1, opr_xsmm_curl #ifdef HAVE_LIBXSMM - type(libxsmm_dmmfunction), private :: lgrad_xmm1 - type(libxsmm_dmmfunction), private :: lgrad_xmm2 - type(libxsmm_dmmfunction), private :: lgrad_xmm3 + type(libxsmm_dmmfunction), private :: lgrad_xmm1 + type(libxsmm_dmmfunction), private :: lgrad_xmm2 + type(libxsmm_dmmfunction), private :: lgrad_xmm3 #endif - + contains subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) @@ -89,7 +89,7 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), intent(inout) :: du real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), intent(in) :: u, dr, ds, dt real(kind=rp) :: drst(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz) - type(space_t), pointer :: Xh + type(space_t), pointer :: Xh type(mesh_t), pointer :: msh integer :: e, k, lxy, lyz, lxyz #ifdef HAVE_LIBXSMM @@ -97,9 +97,9 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) type(libxsmm_dmmfunction), save :: dudxyz_xmm2 type(libxsmm_dmmfunction), save :: dudxyz_xmm3 logical, save :: dudxyz_xsmm_init = .false. - + Xh => coef%Xh - msh => coef%msh + msh => coef%msh lxy = Xh%lx*Xh%ly lyz = Xh%ly*Xh%lz lxyz = Xh%lx*Xh%ly*Xh%lz @@ -133,11 +133,11 @@ subroutine opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) end do call col2(du, coef%jacinv, coef%dof%n_dofs) -#endif +#endif end subroutine opr_xsmm_dudxyz - subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef) - type(coef_t), intent(in) :: coef + subroutine opr_xsmm_opgrad(ux, uy, uz, u, coef) + type(coef_t), intent(in) :: coef real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz @@ -215,7 +215,7 @@ subroutine local_grad3_xsmm(ur, us, ut, u, n, D, Dt) end subroutine local_grad3_xsmm subroutine local_grad2(ur, us, u, n, D, Dt) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), intent(inout) :: ur(0:n, 0:n) real(kind=rp), intent(inout) :: us(0:n, 0:n) real(kind=rp), intent(in) :: u(0:n, 0:n) @@ -284,7 +284,7 @@ subroutine opr_xsmm_cdtp(dtx,x,dr,ds,dt, coef) #endif end subroutine opr_xsmm_cdtp - subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) + subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) type(space_t), intent(in) :: Xh type(coef_t), intent(in) :: coef integer, intent(in) :: nelv, gdim @@ -301,7 +301,7 @@ subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) type(libxsmm_dmmfunction), save :: conv1_xmm2 type(libxsmm_dmmfunction), save :: conv1_xmm3 logical, save :: conv1_xsmm_init = .false. - + if (.not. conv1_xsmm_init) then call libxsmm_dispatch(conv1_xmm1, Xh%lx, Xh%ly*Xh%lx, Xh%lx, & alpha=1d0, beta=0d0, prefetch=LIBXSMM_PREFETCH_AUTO) @@ -352,7 +352,7 @@ subroutine opr_xsmm_conv1(du,u, vx, vy, vz, Xh, coef, nelv, gdim) end do #endif - + end subroutine opr_xsmm_conv1 subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) @@ -395,9 +395,9 @@ subroutine opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) !! BC dependent, Needs to change if cyclic call opcolv(w1%x,w2%x,w3%x,c_Xh%B, gdim, n) - call c_Xh%gs_h%op(w1, GS_OP_ADD) - call c_Xh%gs_h%op(w2, GS_OP_ADD) - call c_Xh%gs_h%op(w3, GS_OP_ADD) + call c_Xh%gs_h%op(w1, GS_OP_ADD) + call c_Xh%gs_h%op(w2, GS_OP_ADD) + call c_Xh%gs_h%op(w3, GS_OP_ADD) call opcolv(w1%x, w2%x, w3%x, c_Xh%Binv, gdim, n) end subroutine opr_xsmm_curl diff --git a/src/math/bcknd/xsmm/tensor_xsmm.F90 b/src/math/bcknd/xsmm/tensor_xsmm.F90 index 88d3e8341c4..3ac6f0af5c0 100644 --- a/src/math/bcknd/xsmm/tensor_xsmm.F90 +++ b/src/math/bcknd/xsmm/tensor_xsmm.F90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Tensor operations libxsmm backend @@ -65,9 +65,9 @@ module tensor_xsmm private public :: tnsr2d_el_xsmm, tnsr3d_el_xsmm, tnsr3d_xsmm, tnsr1_3d_xsmm - + contains - + subroutine tnsr2d_el_xsmm(v, nv, u, nu, A, Bt) integer, intent(in) :: nv, nu real(kind=rp), intent(inout) :: v(nv*nv), u(nu*nu) @@ -76,7 +76,7 @@ subroutine tnsr2d_el_xsmm(v, nv, u, nu, A, Bt) call mxm(A, nv, u, nu, work, nu) call mxm(work, nv, Bt, nu, v, nv) - + end subroutine tnsr2d_el_xsmm subroutine tnsr3d_el_xsmm(v, nv, u, nu, A, Bt, Ct) @@ -87,17 +87,17 @@ subroutine tnsr3d_el_xsmm(v, nv, u, nu, A, Bt, Ct) integer :: i, nunu, nvnu, nvnv nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv - + call mxm(A, nv, u(1), nu ,work, nunu) do i = 0,nu-1 call mxm(work(nvnu*i), nv, Bt, nu, work2(nv*nv*i), nv) end do call mxm(work2, nvnv, Ct, nu, v(1), nv) - + end subroutine tnsr3d_el_xsmm - + subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv) integer, intent(inout) :: nv, nu, nelv real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv) @@ -106,9 +106,9 @@ subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv) integer :: ie, i, nunu, nvnu, nvnv nvnu = nv * nu - nunu = nu * nu + nunu = nu * nu nvnv = nv * nv - + do ie = 1,nelv call mxm(A, nv, u(1,ie), nu, work, nunu) do i = 0,nu-1 @@ -116,10 +116,10 @@ subroutine tnsr3d_xsmm(v, nv, u, nu, A, Bt, Ct, nelv) end do call mxm(work2, nvnv, Ct, nu, v(1,ie), nv) end do - + end subroutine tnsr3d_xsmm - subroutine tnsr1_3d_xsmm(v, nv, nu, A, Bt, Ct, nelv) + subroutine tnsr1_3d_xsmm(v, nv, nu, A, Bt, Ct, nelv) integer, intent(in) :: nv, nu, nelv real(kind=rp), intent(inout) :: v(nv*nv*nv*nelv) real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv) diff --git a/src/math/fast3d.f90 b/src/math/fast3d.f90 index 09c3bdc44d6..45b7fa38e29 100644 --- a/src/math/fast3d.f90 +++ b/src/math/fast3d.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Fast diagonalization methods from NEKTON @@ -78,7 +78,7 @@ module fast3d !! !! Given gridpoints \f$ x_0, x_1, \dots x_n \f$ and some point \f$\xi\f$ !! (not necessarily a grid point!) find weights \f$ c_{j, k} \f$, such that - !! the expansions + !! the expansions !! \f$ \frac{d^k f}{d x^k}|_{x=\xi} \approx \sum_{j=0}^n c_{j,k} f(x_j)\f$, !! \f$k=0, \dots m\f$ are optimal. !! Note that finite-difference stencils are exactly such type of expansions. @@ -93,7 +93,7 @@ module fast3d !! !! @warning The calculation of the wieghts is numerically stable. !! But applying the weights to a function can be ill-conditioned in the case - !! of high-order derivatives. + !! of high-order derivatives. !! !! @param xi Point at which the approximations are to be accurate !! @param x The coordinates for the grid points @@ -121,29 +121,29 @@ subroutine fd_weights_full(xi, x, n, m, c) c(0,0) = 1d0 - do i = 1, n + do i = 1, n mn = min(i,m) - c2 = 1d0 - c5 = c4 + c2 = 1d0 + c5 = c4 c4 = x(i) - xi - do j = 0, i - 1 + do j = 0, i - 1 c3 = x(i) - x(j) - c2 = c2 * c3 - do k = mn, 1, -1 + c2 = c2 * c3 + do k = mn, 1, -1 c(i,k) = c1 * (k * c(i-1,k-1) - c5 * c(i-1,k)) / c2 end do c(i,0) = -c1 * c5 * c(i-1,0) / c2 - do k = mn, 1, -1 + do k = mn, 1, -1 c(j,k) = (c4 * c(j,k) - k * c(j,k-1)) / c3 end do c(j,0) = c4 * c(j,0) / c3 end do c1 = c2 end do - + end subroutine fd_weights_full - - + + !> Generate matrices for single element, 1D operators: !! a = Laplacian !! b = diagonal mass matrix @@ -200,12 +200,12 @@ subroutine semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w) end do call rzero(a, np*np) do j = 0,n - do i = 0,n - do k = 0,n - a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j) + do i = 0,n + do k = 0,n + a(i,j) = a(i,j) + d(k,i)*b(k)*d(k,j) + end do + c(i,j) = b(i)*d(i,j) end do - c(i,j) = b(i)*d(i,j) - end do end do call zwgl(zgl, bgl, nm) do i = 1,n-1 @@ -219,7 +219,7 @@ end subroutine semhat !> Compute interpolation weights for points `z_to` using values at points !! `z_from`. - !! @details + !! @details !! This is essentially a wrapper for calling fd_weights_full() for several !! points. For each point in `z_to`, we get a set of interpolation weights of !! size `n_from`. @@ -227,7 +227,7 @@ end subroutine semhat !! in `z_to` and each column the weight of a point in `z_from`. !! !! This routine is used for interpolating between elements of different - !! polynomial order. In other words, belonging to different + !! polynomial order. In other words, belonging to different !! \ref space::space_t . The points are then GL, GLL, etc., depending on the !! space. !! diff --git a/src/math/fdm.f90 b/src/math/fdm.f90 index 2f70dc4784b..3e7ee9b4e83 100644 --- a/src/math/fdm.f90 +++ b/src/math/fdm.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Type for the Fast Diagonalization connected with the schwarz overlapping solves. @@ -69,7 +69,7 @@ module fdm use gather_scatter use fast3d use tensor - use fdm_sx + use fdm_sx use fdm_xsmm use fdm_cpu use fdm_device @@ -94,7 +94,7 @@ module fdm type(dofmap_t), pointer :: dof type(gs_t), pointer :: gs_h type(mesh_t), pointer :: msh - contains + contains procedure, pass(this) :: init => fdm_init procedure, pass(this) :: free => fdm_free procedure, pass(this) :: compute => fdm_compute @@ -105,7 +105,7 @@ module fdm end interface sygv contains - + subroutine fdm_init(this, Xh, dm, gs_h) class(fdm_t), intent(inout) :: this type(space_t), target, intent(inout) :: Xh @@ -119,7 +119,7 @@ subroutine fdm_init(this, Xh, dm, gs_h) n = Xh%lx -1 !Polynomnial degree nl = Xh%lx + 2 !Schwarz! nelv = dm%msh%nelv - call fdm_free(this) + call fdm_free(this) allocate(this%s(nl*nl,2,dm%msh%gdim, dm%msh%nelv)) allocate(this%d(nl**3,dm%msh%nelv)) allocate(this%swplen(Xh%lx, Xh%lx, Xh%lx,dm%msh%nelv)) @@ -130,11 +130,11 @@ subroutine fdm_init(this, Xh, dm, gs_h) ! Zeroing here enables easier debugging since then ! MPI messages in GS are deterministic call rzero(this%swplen, Xh%lxyz * dm%msh%nelv) - + if (NEKO_BCKND_DEVICE .eq. 1) then - call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv) - call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv) - call device_map(this%swplen,this%swplen_d, Xh%lxyz*dm%msh%nelv) + call device_map(this%s, this%s_d,nl*nl*2*dm%msh%gdim*dm%msh%nelv) + call device_map(this%d, this%d_d,nl**dm%msh%gdim*dm%msh%nelv) + call device_map(this%swplen,this%swplen_d, Xh%lxyz*dm%msh%nelv) end if call semhat(ah, bh, ch, dh, zh, dph, jph, bgl, zglhat, dgl, jgl, n, wh) @@ -149,11 +149,11 @@ subroutine fdm_init(this, Xh, dm, gs_h) if (NEKO_BCKND_DEVICE .eq. 1) then call device_memcpy(this%s, this%s_d, & - nl*nl*2*dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE) + nl*nl*2*dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%d, this%d_d, & - nl**dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE) + nl**dm%msh%gdim*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%swplen, this%swplen_d, & - Xh%lxyz*dm%msh%nelv, HOST_TO_DEVICE) + Xh%lxyz*dm%msh%nelv, HOST_TO_DEVICE, sync=.false.) end if end subroutine fdm_init @@ -193,9 +193,11 @@ subroutine swap_lengths(this, x, y, z, nelv, gdim) end do end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(l, this%swplen_d, this%dof%size(), HOST_TO_DEVICE) + call device_memcpy(l, this%swplen_d, this%dof%size(), & + HOST_TO_DEVICE, sync=.false.) call this%gs_h%op(l, this%dof%size(), GS_OP_ADD) - call device_memcpy(l, this%swplen_d, this%dof%size(), DEVICE_TO_HOST) + call device_memcpy(l, this%swplen_d, this%dof%size(), & + DEVICE_TO_HOST, sync=.false.) else call this%gs_h%op(l, this%dof%size(), GS_OP_ADD) end if @@ -219,9 +221,11 @@ subroutine swap_lengths(this, x, y, z, nelv, gdim) end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(l, this%swplen_d, this%dof%size(),HOST_TO_DEVICE) + call device_memcpy(l, this%swplen_d, this%dof%size(), & + HOST_TO_DEVICE, sync=.false.) call this%gs_h%op(l, this%dof%size(), GS_OP_ADD) - call device_memcpy(l, this%swplen_d, this%dof%size(),DEVICE_TO_HOST, sync=.true.) + call device_memcpy(l, this%swplen_d, this%dof%size(), & + DEVICE_TO_HOST, sync=.true.) else call this%gs_h%op(l, this%dof%size(), GS_OP_ADD) end if @@ -338,7 +342,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n) integer :: ie, il, nr, ns, nt integer :: lbr, rbr, lbs, rbs, lbt, rbt real(kind=rp) :: eps, diag - + associate(s => this%s, d => this%d, & llr => this%len_lr, lls => this%len_ls, llt => this%len_lt, & lmr => this%len_mr, lms => this%len_ms, lmt => this%len_mt, & @@ -350,7 +354,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n) rbs = this%dof%msh%facet_type(4, ie) lbt = this%dof%msh%facet_type(5, ie) rbt = this%dof%msh%facet_type(6, ie) - + nr = nl ns = nl nt = nl @@ -398,7 +402,7 @@ subroutine fdm_setup_fast(this, ah, bh, nl, n) end associate end subroutine fdm_setup_fast - + subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n) integer, intent(in) :: nl, lbc, rbc, n real(kind=rp), intent(inout) :: s(nl, nl, 2), lam(nl), ll, lm, lr @@ -408,7 +412,7 @@ subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n) lx1 = n + 1 lxm = lx1 + 2 - + call fdm_setup_fast1d_a(s, lbc, rbc, ll, lm, lr, ah, n) call fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n) call generalev(s, b, lam, nl, lx1) @@ -416,9 +420,9 @@ subroutine fdm_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n) if(lbc .eq. 1) call row_zero(s, nl, nl, 2) if(rbc .gt. 0) call row_zero(s, nl, nl, nl) if(rbc .eq. 1) call row_zero(s, nl, nl, nl-1) - + call trsp(s(1,1,2), nl, s, nl) - + end subroutine fdm_setup_fast1d !> Solve the generalized eigenvalue problem /$ A x = lam B x/$ @@ -433,7 +437,7 @@ subroutine generalev(a, b, lam, n, lx) lbw = 4*(lx+2)**3 lw = n*n call sygv(a, b, lam, n, lx, bw, lbw) - + end subroutine generalev subroutine sp_sygv(a, b, lam, n, lx, bw, lbw) @@ -484,7 +488,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n) if(lbc .eq. 1) i0 = 1 i1 = n if(rbc .eq. 1) i1 = n - 1 - + call rzero(a, (n+3) * (n+3)) fac = 2.0_rp / lm @@ -496,7 +500,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n) a(i+1,j+1) = fac * ah(i,j) enddo enddo - + if(lbc .eq. 0) then fac = 2.0_rp / ll a(0,0) = fac * ah(n-1,n-1) @@ -506,7 +510,7 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n) else a(0,0) = 1.0_rp endif - + if(rbc .eq. 0) then fac = 2.0_rp / lr a(n+1,n+1) = a(n+1,n+1) + fac*ah(0,0) @@ -516,21 +520,21 @@ subroutine fdm_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n) else a(n+2,n+2) = 1.0_rp endif - + end subroutine fdm_setup_fast1d_a subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n) integer, intent(in) :: lbc, rbc, n real(kind=rp), intent(inout) :: b(0:n+2, 0:n+2), ll, lm, lr - real(kind=rp), intent(inout) :: bh(0:n) + real(kind=rp), intent(inout) :: bh(0:n) real(kind=rp) :: fac integer :: i, i0, i1 - + i0 = 0 if(lbc .eq. 1) i0 = 1 i1 = n if(rbc .eq. 1) i1 = n - 1 - + call rzero(b, (n + 3) * (n + 3)) fac = 0.5_rp * lm @@ -556,60 +560,60 @@ subroutine fdm_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n) else b(n+2,n+2) = 1.0_rp end if - + end subroutine fdm_setup_fast1d_b subroutine fdm_free(this) class(fdm_t), intent(inout) :: this - + if(allocated(this%s)) then deallocate(this%s) end if - + if(allocated(this%d)) then deallocate(this%d) end if - + if(allocated(this%len_lr)) then deallocate(this%len_lr) end if - + if(allocated(this%len_ls)) then deallocate(this%len_ls) end if - + if(allocated(this%len_lt)) then deallocate(this%len_lt) end if - + if(allocated(this%len_mr)) then deallocate(this%len_mr) end if - + if(allocated(this%len_ms)) then deallocate(this%len_ms) end if - + if(allocated(this%len_mt)) then deallocate(this%len_mt) end if - + if(allocated(this%len_rr)) then deallocate(this%len_rr) end if - + if(allocated(this%len_rs)) then deallocate(this%len_rs) end if - + if(allocated(this%len_rt)) then deallocate(this%len_rt) end if - + if(allocated(this%swplen)) then deallocate(this%swplen) end if - + nullify(this%Xh) nullify(this%dof) nullify(this%gs_h) @@ -644,6 +648,6 @@ subroutine fdm_compute(this, e, r, stream) end if end subroutine fdm_compute - + end module fdm diff --git a/src/math/math.f90 b/src/math/math.f90 index 3290f48645d..75dec60c61b 100644 --- a/src/math/math.f90 +++ b/src/math/math.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! module math @@ -76,11 +76,16 @@ module math module procedure sabscmp, dabscmp, qabscmp end interface abscmp + interface relcmp + module procedure srelcmp, drelcmp, qrelcmp + end interface relcmp + public :: abscmp, rzero, izero, row_zero, rone, copy, cmult, cadd, cfill, & - glsum, glmax, glmin, chsign, vlmax, invcol1, invcol3, invers2, vcross, & + glsum, glmax, glmin, chsign, vlmax, vlmin, invcol1, invcol3, invers2, vcross, & vdot2, vdot3, vlsc3, vlsc2, add2, add3, add4, sub2, sub3, add2s1, add2s2, & addsqr2s2, cmult2, invcol2, col2, col3, subcol3, add3s2, subcol4, addcol3,& - addcol4, ascol5, p_update, x_update, glsc2, glsc3, glsc4, sort + addcol4, ascol5, p_update, x_update, glsc2, glsc3, glsc4, sort, & + masked_copy, relcmp, glimax, glimin contains @@ -88,7 +93,7 @@ module math pure function sabscmp(x, y) real(kind=sp), intent(in) :: x real(kind=sp), intent(in) :: y - logical :: sabscmp + logical :: sabscmp sabscmp = abs(x - y) .lt. NEKO_EPS @@ -98,28 +103,71 @@ end function sabscmp pure function dabscmp(x, y) real(kind=dp), intent(in) :: x real(kind=dp), intent(in) :: y - logical :: dabscmp + logical :: dabscmp dabscmp = abs(x - y) .lt. NEKO_EPS - + end function dabscmp !> Return double precision absolute comparison \f$ | x - y | < \epsilon \f$ pure function qabscmp(x, y) real(kind=qp), intent(in) :: x real(kind=qp), intent(in) :: y - logical :: qabscmp + logical :: qabscmp qabscmp = abs(x - y) .lt. NEKO_EPS end function qabscmp + !> Return single precision relative comparison \f$ | x - y |<= \epsilon*|y| \f$ + pure function srelcmp(x, y, eps) + real(kind=sp), intent(in) :: x + real(kind=sp), intent(in) :: y + real(kind=sp), intent(in), optional :: eps + logical :: srelcmp + if (present(eps)) then + srelcmp = abs(x - y) .le. eps*abs(y) + else + srelcmp = abs(x - y) .le. NEKO_EPS*abs(y) + end if + + end function srelcmp + + !> Return double precision relative comparison \f$ | x - y |/|y| < \epsilon \f$ + pure function drelcmp(x, y, eps) + real(kind=dp), intent(in) :: x + real(kind=dp), intent(in) :: y + real(kind=dp), intent(in), optional :: eps + logical :: drelcmp + if (present(eps)) then + drelcmp = abs(x - y) .le. eps*abs(y) + else + drelcmp = abs(x - y) .le. NEKO_EPS*abs(y) + end if + + end function drelcmp + + + !> Return quad precision relative comparison \f$ | x - y |/|y| < \epsilon \f$ + pure function qrelcmp(x, y, eps) + real(kind=qp), intent(in) :: x + real(kind=qp), intent(in) :: y + real(kind=qp), intent(in), optional :: eps + logical :: qrelcmp + if (present(eps)) then + qrelcmp = abs(x - y)/abs(y) .lt. eps + else + qrelcmp = abs(x - y)/abs(y) .lt. NEKO_EPS + end if + + end function qrelcmp + !> Zero a real vector subroutine rzero(a, n) integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a integer :: i - + do i = 1, n a(i) = 0.0_rp end do @@ -130,7 +178,7 @@ subroutine izero(a, n) integer, intent(in) :: n integer, dimension(n), intent(inout) :: a integer :: i - + do i = 1, n a(i) = 0 end do @@ -144,7 +192,7 @@ subroutine row_zero(a, m, n, e) do j = 1,n a(e,j) = 0.0_rp - end do + end do end subroutine row_zero !> Set all elements to one @@ -152,7 +200,7 @@ subroutine rone(a, n) integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a integer :: i - + do i = 1, n a(i) = 1.0_rp end do @@ -170,7 +218,29 @@ subroutine copy(a, b, n) end do end subroutine copy + + !> Copy a masked vector \f$ a(mask) = b(mask) \f$. + !! @param a Destination array of size `n`. + !! @param b Source array of size `n`. + !! @param mask Mask array of length m+1, where `mask(0)=m` + !! the length of the mask array. + !! @param n Size of the arrays `a` and `b`. + !! @param m Size of the mask array `mask`. + subroutine masked_copy(a, b, mask, n, m) + integer, intent(in) :: n, m + real(kind=rp), dimension(n), intent(in) :: b + real(kind=rp), dimension(n), intent(inout) :: a + integer, dimension(0:m) :: mask + integer :: i, j + + do i = 1, m + j = mask(i) + a(j) = b(j) + end do + + end subroutine masked_copy + !> Multiplication by constant c \f$ a = c \cdot a \f$ subroutine cmult(a, c, n) integer, intent(in) :: n @@ -182,14 +252,14 @@ subroutine cmult(a, c, n) a(i) = c * a(i) end do end subroutine cmult - + !> Add a scalar to vector \f$ a = \sum a_i + s \f$ subroutine cadd(a, s, n) integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), intent(in) :: s integer :: i - + do i = 1, n a(i) = a(i) + s end do @@ -207,8 +277,8 @@ subroutine cfill(a, c, n) end do end subroutine cfill - !>Sum a vector of length n - function glsum(a, n) + !>Sum a vector of length n + function glsum(a, n) integer, intent(in) :: n real(kind=rp), dimension(n) :: a real(kind=rp) :: tmp, glsum @@ -219,11 +289,11 @@ function glsum(a, n) end do call MPI_Allreduce(tmp, glsum, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) - + end function glsum - - !>Max of a vector of length n - function glmax(a, n) + + !>Max of a vector of length n + function glmax(a, n) integer, intent(in) :: n real(kind=rp), dimension(n) :: a real(kind=rp) :: tmp, glmax @@ -235,9 +305,23 @@ function glmax(a, n) call MPI_Allreduce(tmp, glmax, 1, & MPI_REAL_PRECISION, MPI_MAX, NEKO_COMM, ierr) end function glmax + + !>Max of an integer vector of length n + function glimax(a, n) + integer, intent(in) :: n + integer, dimension(n) :: a + integer :: tmp, glimax + integer :: i, ierr + tmp = a(1) + do i = 2, n + tmp = max(tmp,a(i)) + end do + call MPI_Allreduce(tmp, glimax, 1, & + MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr) + end function glimax - !>Min of a vector of length n - function glmin(a, n) + !>Min of a vector of length n + function glmin(a, n) integer, intent(in) :: n real(kind=rp), dimension(n) :: a real(kind=rp) :: tmp, glmin @@ -250,6 +334,21 @@ function glmin(a, n) MPI_REAL_PRECISION, MPI_MIN, NEKO_COMM, ierr) end function glmin + !>Min of an integer vector of length n + function glimin(a, n) + integer, intent(in) :: n + integer, dimension(n) :: a + integer :: tmp, glimin + integer :: i, ierr + tmp = a(1) + do i = 2, n + tmp = min(tmp,a(i)) + end do + call MPI_Allreduce(tmp, glimin, 1, & + MPI_INTEGER, MPI_MIN, NEKO_COMM, ierr) + end function glimin + + !> Change sign of vector \f$ a = -a \f$ @@ -261,10 +360,10 @@ subroutine chsign(a, n) do i = 1, n a(i) = -a(i) end do - + end subroutine chsign - - !> Maximum value of a vector of length @a n + + !> maximum value of a vector of length @a n function vlmax(vec,n) result(tmax) integer :: n, i real(kind=rp), intent(in) :: vec(n) @@ -275,6 +374,18 @@ function vlmax(vec,n) result(tmax) end do end function vlmax + !> minimun value of a vector of length @a n + function vlmin(vec,n) result(tmin) + integer, intent(in) :: n + real(kind=rp), intent(in) :: vec(n) + real(kind=rp) :: tmin + integer :: i + tmin = real(99.0e20, rp) + do i=1,n + tmin = min(tmin,vec(i)) + end do + end function vlmin + !> Invert a vector \f$ a = 1 / a \f$ subroutine invcol1(a, n) integer, intent(in) :: n @@ -284,9 +395,9 @@ subroutine invcol1(a, n) do i = 1, n a(i) = 1.0_rp / a(i) end do - + end subroutine invcol1 - + !> Invert a vector \f$ a = b / c \f$ subroutine invcol3(a, b, c, n) integer, intent(in) :: n @@ -297,9 +408,9 @@ subroutine invcol3(a, b, c, n) do i = 1, n a(i) = b(i) / c(i) end do - + end subroutine invcol3 - + !> Compute inverted vector \f$ a = 1 / b \f$ subroutine invers2(a, b, n) integer, intent(in) :: n @@ -310,13 +421,13 @@ subroutine invers2(a, b, n) do i = 1, n a(i) = 1.0_rp / b(i) end do - + end subroutine invers2 !> Compute a cross product \f$ u = v \times w \f$ !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc. subroutine vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(in) :: v1, v2, v3 real(kind=rp), dimension(n), intent(in) :: w1, w2, w3 real(kind=rp), dimension(n), intent(out) :: u1, u2, u3 @@ -330,7 +441,7 @@ subroutine vcross(u1, u2, u3, v1, v2, v3, w1, w2, w3, n) end subroutine vcross - !> Compute a dot product \f$ dot = u \cdot v \f$ (2-d version) + !> Compute a dot product \f$ dot = u \cdot v \f$ (2-d version) !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc. subroutine vdot2(dot, u1, u2, v1, v2, n) integer, intent(in) :: n @@ -338,51 +449,51 @@ subroutine vdot2(dot, u1, u2, v1, v2, n) real(kind=rp), dimension(n), intent(in) :: v1, v2 real(kind=rp), dimension(n), intent(out) :: dot integer :: i - do i = 1, n + do i = 1, n dot(i) = u1(i)*v1(i) + u2(i)*v2(i) end do end subroutine vdot2 - !> Compute a dot product \f$ dot = u \cdot v \f$ (3-d version) + !> Compute a dot product \f$ dot = u \cdot v \f$ (3-d version) !! assuming vector components \f$ u = (u_1, u_2, u_3) \f$ etc. subroutine vdot3(dot, u1, u2, u3, v1, v2, v3, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(in) :: u1, u2, u3 real(kind=rp), dimension(n), intent(in) :: v1, v2, v3 real(kind=rp), dimension(n), intent(out) :: dot integer :: i - do i = 1, n + do i = 1, n dot(i) = u1(i)*v1(i) + u2(i)*v2(i) + u3(i)*v3(i) end do end subroutine vdot3 - !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ + !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ function vlsc3(u, v, w, n) result(s) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(in) :: u, v, w real(kind=rp) :: s integer :: i s = 0.0_rp - do i = 1, n - s = s + u(i)*v(i)*w(i) + do i = 1, n + s = s + u(i)*v(i)*w(i) end do end function vlsc3 - - !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ + + !> Compute multiplication sum \f$ dot = u \cdot v \cdot w \f$ function vlsc2(u, v, n) result(s) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(in) :: u, v real(kind=rp) :: s integer :: i s = 0.0_rp - do i = 1, n - s = s + u(i)*v(i) + do i = 1, n + s = s + u(i)*v(i) end do end function vlsc2 @@ -416,7 +527,7 @@ end subroutine add3 !> Vector addition \f$ a = b + c + d\f$ subroutine add4(a, b, c, d, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: d real(kind=rp), dimension(n), intent(inout) :: c real(kind=rp), dimension(n), intent(inout) :: b @@ -439,7 +550,7 @@ subroutine sub2(a, b, n) do i = 1, n a(i) = a(i) - b(i) end do - + end subroutine sub2 !> Vector subtraction \f$ a = b - c \f$ @@ -469,13 +580,13 @@ subroutine add2s1(a, b, c1, n) do i = 1, n a(i) = c1 * a(i) + b(i) end do - + end subroutine add2s1 !> Vector addition with scalar multiplication \f$ a = a + c_1 b \f$ !! (multiplication on second argument) subroutine add2s2(a, b, c1, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(inout) :: b real(kind=rp), intent(in) :: c1 @@ -484,12 +595,12 @@ subroutine add2s2(a, b, c1, n) do i = 1, n a(i) = a(i) + c1 * b(i) end do - + end subroutine add2s2 !> Returns \f$ a = a + c1 * (b * b )\f$ subroutine addsqr2s2(a, b, c1, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), intent(in) :: c1 @@ -500,10 +611,10 @@ subroutine addsqr2s2(a, b, c1, n) end do end subroutine addsqr2s2 - + !> Multiplication by constant c \f$ a = c \cdot b \f$ subroutine cmult2(a, b, c, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), intent(in) :: c @@ -512,12 +623,12 @@ subroutine cmult2(a, b, c, n) do i = 1, n a(i) = c * b(i) end do - + end subroutine cmult2 !> Vector division \f$ a = a / b \f$ subroutine invcol2(a, b, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b integer :: i @@ -525,13 +636,13 @@ subroutine invcol2(a, b, n) do i = 1, n a(i) = a(i) /b(i) end do - + end subroutine invcol2 !> Vector multiplication \f$ a = a \cdot b \f$ subroutine col2(a, b, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b integer :: i @@ -539,12 +650,12 @@ subroutine col2(a, b, n) do i = 1, n a(i) = a(i) * b(i) end do - + end subroutine col2 !> Vector multiplication with 3 vectors \f$ a = b \cdot c \f$ subroutine col3(a, b, c, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -553,26 +664,26 @@ subroutine col3(a, b, c, n) do i = 1, n a(i) = b(i) * c(i) end do - + end subroutine col3 !> Returns \f$ a = a - b*c \f$ subroutine subcol3(a, b, c, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c integer :: i do i = 1,n - a(i) = a(i) - b(i) * c(i) + a(i) = a(i) - b(i) * c(i) end do end subroutine subcol3 !> Returns \f$ a = c1 * b + c2 * c \f$ subroutine add3s2(a, b, c, c1, c2 ,n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -580,7 +691,7 @@ subroutine add3s2(a, b, c, c1, c2 ,n) integer :: i do i = 1,n - a(i) = c1 * b(i) + c2 * c(i) + a(i) = c1 * b(i) + c2 * c(i) end do end subroutine add3s2 @@ -588,7 +699,7 @@ end subroutine add3s2 !> Returns \f$ a = a - b*c*d \f$ subroutine subcol4(a, b, c, d, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -600,24 +711,24 @@ subroutine subcol4(a, b, c, d, n) end do end subroutine subcol4 - + !> Returns \f$ a = a + b*c \f$ subroutine addcol3(a, b, c, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c integer :: i do i = 1,n - a(i) = a(i) + b(i) * c(i) + a(i) = a(i) + b(i) * c(i) end do end subroutine addcol3 !> Returns \f$ a = a + b*c*d \f$ subroutine addcol4(a, b, c, d, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -632,7 +743,7 @@ end subroutine addcol4 !> Returns \f$ a = b \dot c - d \cdot e \f$ subroutine ascol5(a, b, c, d, e, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -648,7 +759,7 @@ end subroutine ascol5 !> Returns \f$ a = b \dot c1 ( a - c2 \cdot c )\f$ subroutine p_update(a, b, c, c1, c2, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -663,7 +774,7 @@ end subroutine p_update !> Returns \f$ a = b \dot c1 ( a - c2 \cdot c )\f$ subroutine x_update(a, b, c, c1, c2, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), dimension(n), intent(inout) :: a real(kind=rp), dimension(n), intent(in) :: b real(kind=rp), dimension(n), intent(in) :: c @@ -686,14 +797,14 @@ function glsc2(a, b, n) tmp = 0.0_rp do i = 1, n - tmp = tmp + a(i) * b(i) + tmp = tmp + a(i) * b(i) end do - + call MPI_Allreduce(tmp, glsc2, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) end function glsc2 - + !> Weighted inner product \f$ a^T b c \f$ function glsc3(a, b, c, n) integer, intent(in) :: n @@ -707,7 +818,7 @@ function glsc3(a, b, c, n) do i = 1, n tmp = tmp + a(i) * b(i) * c(i) end do - + call MPI_Allreduce(tmp, glsc3, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) @@ -725,7 +836,7 @@ function glsc4(a, b, c, d, n) do i = 1, n tmp = tmp + a(i) * b(i) * c(i) * d(i) end do - + call MPI_Allreduce(tmp, glsc4, 1, & MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) @@ -742,34 +853,34 @@ subroutine sort(a,ind,n) end do if (n.le.1) return - + l=n/2+1 ir=n - do while (.true.) + do while (.true.) if (l.gt.1) then l=l-1 aa = a (l) ii = ind(l) else - aa = a(ir) - ii = ind(ir) - a(ir) = a( 1) + aa = a(ir) + ii = ind(ir) + a(ir) = a( 1) ind(ir) = ind( 1) ir=ir-1 if (ir.eq.1) then - a(1) = aa + a(1) = aa ind(1) = ii return endif endif i=l j=l+l - do while (j .le. ir) + do while (j .le. ir) if (j.lt.ir) then if ( a(j).lt.a(j+1) ) j=j+1 endif if (aa.lt.a(j)) then - a(i) = a(j) + a(i) = a(j) ind(i) = ind(j) i=j j=j+j @@ -781,5 +892,5 @@ subroutine sort(a,ind,n) ind(i) = ii end do end subroutine sort - + end module math diff --git a/src/math/mathops.f90 b/src/math/mathops.f90 index 81348f2a932..f381c70486b 100644 --- a/src/math/mathops.f90 +++ b/src/math/mathops.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,46 +21,46 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! - !> Collection of vector field operations operating on \f$ a_i \f$ and \f$b_i\f$. - !! Note that in general the indices \f$i=1 \ldots gdim\f$ and \f$j=1 \ldots n\f$. - !! \f$gdim\f$ is assumed to be either 2 or 3 only. + !> Collection of vector field operations operating on \f$ a_i \f$ and \f$b_i\f$. + !! Note that in general the indices \f$i=1 \ldots gdim\f$ and \f$j=1 \ldots n\f$. + !! \f$gdim\f$ is assumed to be either 2 or 3 only. module mathops use num_types, only : rp @@ -88,7 +88,7 @@ subroutine opchsign(a1, a2, a3, gdim, n) end if end subroutine opchsign - + !> \f$ a_i(j) = a_i(j) * c(j) \f$ subroutine opcolv(a1, a2, a3, c, gdim, n) integer, intent(in) :: n, gdim @@ -111,7 +111,7 @@ subroutine opcolv(a1, a2, a3, c, gdim, n) end subroutine opcolv - !> \f$ a_i(j) = b_i(j) * c(j) * d \f$ + !> \f$ a_i(j) = b_i(j) * c(j) * d \f$ subroutine opcolv3c(a1, a2, a3, b1, b2, b3, c, d, n, gdim) integer, intent(in) :: n, gdim real(kind=rp), dimension(n), intent(inout) :: a1, a2, a3 @@ -134,7 +134,7 @@ subroutine opcolv3c(a1, a2, a3, b1, b2, b3, c, d, n, gdim) end subroutine opcolv3c - !> \f$ a_i(j) = a_i(j) + b_i(j) * c \f$ + !> \f$ a_i(j) = a_i(j) + b_i(j) * c \f$ subroutine opadd2cm(a1, a2, a3, b1, b2, b3, c, n, gdim) integer, intent(in) :: n, gdim real(kind=rp), dimension(n), intent(inout) :: a1, a2, a3 @@ -164,7 +164,7 @@ subroutine opadd2col(a1, a2, a3, b1, b2, b3, c, n, gdim) real(kind=rp), dimension(n), intent(in) :: b1, b2, b3 real(kind=rp), intent(in) :: c(n) integer :: i - + if (gdim .eq. 3) then do i = 1, n a1(i) = a1(i) + b1(i)*c(i) @@ -177,7 +177,7 @@ subroutine opadd2col(a1, a2, a3, b1, b2, b3, c, n, gdim) a2(i) = a2(i) + b2(i)*c(i) end do endif - + end subroutine opadd2col - + end module mathops diff --git a/src/math/matrix.f90 b/src/math/matrix.f90 index 5e5de61c507..fff18f79317 100644 --- a/src/math/matrix.f90 +++ b/src/math/matrix.f90 @@ -61,7 +61,7 @@ module matrix generic :: assignment(=) => matrix_assign_matrix, & matrix_assign_scalar end type matrix_t - + contains !> Initialise a matrix of size `nrows*ncols`. diff --git a/src/math/mxm_wrapper.F90 b/src/math/mxm_wrapper.F90 index 2e46c3e2ce2..ef2dbc46ee9 100644 --- a/src/math/mxm_wrapper.F90 +++ b/src/math/mxm_wrapper.F90 @@ -13,7 +13,7 @@ module mxm_wrapper interface mxm_blas module procedure mxm_blas_sp, mxm_blas_dp, mxm_blas_qp end interface mxm_blas - + interface mxm_libxsmm module procedure mxm_libxsmm_sp, mxm_libxsmm_dp, mxm_libxsmm_qp end interface mxm_libxsmm @@ -48,7 +48,7 @@ subroutine mxm_blas_sp(a,n1,b,n2,c,n3) call sgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1) end subroutine mxm_blas_sp - + subroutine mxm_blas_dp(a,n1,b,n2,c,n3) integer, intent(in) :: n1, n2, n3 real(kind=dp), intent(in) :: a(n1, n2) @@ -83,9 +83,9 @@ subroutine mxm_libxsmm_sp(a,n1,b,n2,c,n3) call libxsmm_smmcall_abc(xmm, a, b, c) return end if -#endif +#endif end subroutine mxm_libxsmm_sp - + subroutine mxm_libxsmm_dp(a,n1,b,n2,c,n3) integer, intent(in) :: n1, n2, n3 real(kind=dp), intent(in) :: a(n1, n2) @@ -112,5 +112,5 @@ subroutine mxm_libxsmm_qp(a,n1,b,n2,c,n3) call neko_error('Not implemented yet!') end subroutine mxm_libxsmm_qp - + end module mxm_wrapper diff --git a/src/math/operators.f90 b/src/math/operators.f90 index d8836ba6fb5..524f1d0b754 100644 --- a/src/math/operators.f90 +++ b/src/math/operators.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2020-2023, The Neko Authors +! Copyright (c) 2020-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -32,35 +32,48 @@ ! !> Operators module operators - use neko_config + use neko_config, only : NEKO_BCKND_SX, NEKO_BCKND_DEVICE, NEKO_BCKND_XSMM,& + NEKO_DEVICE_MPI use num_types, only : rp - use opr_cpu - use opr_sx - use opr_xsmm - use opr_device + use opr_cpu, only : opr_cpu_cfl, opr_cpu_curl, opr_cpu_opgrad, opr_cpu_conv1,& + opr_cpu_cdtp, opr_cpu_dudxyz, opr_cpu_lambda2 + use opr_sx, only : opr_sx_cfl, opr_sx_curl, opr_sx_dudxyz, opr_sx_opgrad, & + opr_sx_cdtp, opr_sx_conv1, opr_sx_lambda2 + use opr_xsmm, only : opr_xsmm_cdtp, opr_xsmm_conv1, opr_xsmm_curl, & + opr_xsmm_dudxyz, opr_xsmm_opgrad + use opr_device, only : opr_device_cdtp, opr_device_cfl, opr_device_curl, & + opr_device_conv1, opr_device_dudxyz, & + opr_device_lambda2, opr_device_opgrad use space, only : space_t use coefs, only : coef_t - use field, only: field_t - use math + use field, only : field_t + use math, only : glsum, cmult, add2, cadd + use device, only : c_ptr, device_get_ptr + use device_math, only : device_add2, device_cmult use comm - use device - use device_math implicit none private - public :: dudxyz, opgrad, ortho, cdtp, conv1, curl, cfl, lambda2op + public :: dudxyz, opgrad, ortho, cdtp, conv1, curl, cfl,& + lambda2op, strain_rate contains - - !> Compute dU/dx or dU/dy or dU/dz - subroutine dudxyz (du,u,dr,ds,dt,coef) + + !> Compute derivative of a scalar field along a single direction. + !! @param du Holds the resulting derivative values. + !! @param u The values of the field. + !! @param dr The derivative of r with respect to the chosen direction. + !! @param ds The derivative of s with respect to the chosen direction. + !! @param dt The derivative of t with respect to the chosen direction. + !! @param coef The SEM coefficients. + subroutine dudxyz (du, u, dr, ds, dt, coef) type(coef_t), intent(in), target :: coef real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), & intent(inout) :: du real(kind=rp), dimension(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz,coef%msh%nelv), & intent(in) :: u, dr, ds, dt - if (NEKO_BCKND_SX .eq. 1) then + if (NEKO_BCKND_SX .eq. 1) then call opr_sx_dudxyz(du, u, dr, ds, dt, coef) else if (NEKO_BCKND_XSMM .eq. 1) then call opr_xsmm_dudxyz(du, u, dr, ds, dt, coef) @@ -69,17 +82,27 @@ subroutine dudxyz (du,u,dr,ds,dt,coef) else call opr_cpu_dudxyz(du, u, dr, ds, dt, coef) end if - + end subroutine dudxyz - !> Equals wgradm1 in nek5000. Gradient of velocity vectors. - subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad - type(coef_t), intent(in) :: coef + !> Compute the gradient of a scalar field. + !! @details By providing `es` and `ee`, it is possible to compute only for a + !! range of element indices. + !! @param ux Will store the x component of the gradient. + !! @param uy Will store the y component of the gradient. + !! @param uz Will store the z component of the gradient. + !! @param u The values of the field. + !! @param coef The SEM coefficients. + !! @param es Starting element index, optional, defaults to 1. + !! @param ee Ending element index, optional, defaults to `nelv`. + !! @note Equals wgradm1 in Nek5000, the weak form of the gradient. + subroutine opgrad(ux, uy, uz, u, coef, es, ee) + type(coef_t), intent(in) :: coef real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: ux real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uy real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: uz real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: u - integer, optional :: es, ee + integer, optional :: es, ee integer :: eblk_start, eblk_end if (present(es)) then @@ -94,7 +117,7 @@ subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad eblk_end = coef%msh%nelv end if - if (NEKO_BCKND_SX .eq. 1) then + if (NEKO_BCKND_SX .eq. 1) then call opr_sx_opgrad(ux, uy, uz, u, coef) else if (NEKO_BCKND_XSMM .eq. 1) then call opr_xsmm_opgrad(ux, uy, uz, u, coef) @@ -103,24 +126,34 @@ subroutine opgrad(ux,uy,uz,u,coef, es, ee) ! weak form of grad else call opr_cpu_opgrad(ux, uy, uz, u, coef, eblk_start, eblk_end) end if - + end subroutine opgrad - + !> Othogonalize with regard to vector (1,1,1,1,1,1...,1)^T. - subroutine ortho(x,n ,glb_n) + !! @param x The vector to orthogonolize. + !! @param n The size of `x`. + !! @param glb_n The global number of elements of `x` across all MPI ranks. Be careful with overflow! + subroutine ortho(x, n, glb_n) integer, intent(in) :: n integer, intent(in) :: glb_n real(kind=rp), dimension(n), intent(inout) :: x real(kind=rp) :: rlam - rlam = glsum(x,n)/glb_n - call cadd(x,-rlam,n) + rlam = glsum(x, n)/glb_n + call cadd(x, -rlam, n) end subroutine ortho - - !> Compute DT*X (entire field) - !> This needs to be revised... the loop over n1,n2 is probably unesccssary - subroutine cdtp (dtx,x,dr,ds,dt, coef) + + !> Apply D^T to a scalar field, where D is the derivative matrix. + !! @param dtx Will store the result. + !! @param x The values of the field. + !! @param dr The derivative of r with respect to the chosen direction. + !! @param ds The derivative of s with respect to the chosen direction. + !! @param dt The derivative of t with respect to the chosen direction. + !! @param coef The SEM coefficients. + !> @note This needs to be revised... the loop over n1,n2 is probably + !! unesccssary + subroutine cdtp (dtx, x, dr, ds, dt, coef) type(coef_t), intent(in) :: coef real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: dtx real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(inout) :: x @@ -128,7 +161,7 @@ subroutine cdtp (dtx,x,dr,ds,dt, coef) real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: ds real(kind=rp), dimension(coef%Xh%lxyz,coef%msh%nelv), intent(in) :: dt - if (NEKO_BCKND_SX .eq. 1) then + if (NEKO_BCKND_SX .eq. 1) then call opr_sx_cdtp(dtx, x, dr, ds, dt, coef) else if (NEKO_BCKND_XSMM .eq. 1) then call opr_xsmm_cdtp(dtx, x, dr, ds, dt, coef) @@ -137,9 +170,9 @@ subroutine cdtp (dtx,x,dr,ds,dt, coef) else call opr_cpu_cdtp(dtx, x, dr, ds, dt, coef) end if - + end subroutine cdtp - + !> Compute the advection term. !! @param du Holds the result. !! @param u The advected field. @@ -147,7 +180,7 @@ end subroutine cdtp !! @param vy The y component of the advecting velocity. !! @param vz The z component of the advecting velocity. !! @param Xh The function space for the fields involved. - !! @param coeff The coeffients of the (Xh, mesh) pair. + !! @param coef The SEM coefficients. !! @param es Starting element index, defaults to 1. !! @param ee Last element index, defaults to mesh size. subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee) @@ -167,14 +200,14 @@ subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee) else eblk_start = 1 end if - + if (present(ee)) then eblk_end = ee else eblk_end = coef%msh%nelv end if - - if (NEKO_BCKND_SX .eq. 1) then + + if (NEKO_BCKND_SX .eq. 1) then call opr_sx_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) else if (NEKO_BCKND_XSMM .eq. 1) then call opr_xsmm_conv1(du, u, vx, vy, vz, Xh, coef, nelv, gdim) @@ -187,7 +220,17 @@ subroutine conv1(du, u, vx, vy, vz, Xh, coef, es, ee) end subroutine conv1 - subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) + !! Compute the curl fo a vector field. + !! @param w1 Will store the x component of the curl. + !! @param w2 Will store the y component of the curl. + !! @param w3 Will store the z component of the curl. + !! @param u1 The x component of the vector field. + !! @param u2 The y component of the vector field. + !! @param u3 The z component of the vector field. + !! @param work1 A temporary array for computations. + !! @param work2 A temporary array for computations. + !! @param coef The SEM coefficients. + subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, coef) type(field_t), intent(inout) :: w1 type(field_t), intent(inout) :: w2 type(field_t), intent(inout) :: w3 @@ -196,26 +239,35 @@ subroutine curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) type(field_t), intent(inout) :: u3 type(field_t), intent(inout) :: work1 type(field_t), intent(inout) :: work2 - type(coef_t), intent(in) :: c_Xh + type(coef_t), intent(in) :: coef if (NEKO_BCKND_SX .eq. 1) then - call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) + call opr_sx_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef) else if (NEKO_BCKND_XSMM .eq. 1) then - call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) + call opr_xsmm_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef) else if (NEKO_BCKND_DEVICE .eq. 1) then - call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) + call opr_device_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef) else - call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, c_Xh) + call opr_cpu_curl(w1, w2, w3, u1, u2, u3, work1, work2, coef) end if end subroutine curl + !! Compute the CFL number + !! @param dt The timestep. + !! @param u The x component of velocity. + !! @param v The y component of velocity. + !! @param w The z component of velocity. + !! @param Xh The SEM function space. + !! @param coef The SEM coefficients. + !! @param nelv The total number of elements. + !! @param gdim Number of geometric dimensions. function cfl(dt, u, v, w, Xh, coef, nelv, gdim) - type(space_t) :: Xh - type(coef_t) :: coef - integer :: nelv, gdim - real(kind=rp) :: dt - real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv) :: u, v, w + type(space_t), intent(in) :: Xh + type(coef_t), intent(in) :: coef + integer, intent(in) :: nelv, gdim + real(kind=rp), intent(in) :: dt + real(kind=rp), dimension(Xh%lx,Xh%ly,Xh%lz,nelv), intent(in) :: u, v, w real(kind=rp) :: cfl integer :: ierr @@ -231,13 +283,22 @@ function cfl(dt, u, v, w, Xh, coef, nelv, gdim) call MPI_Allreduce(MPI_IN_PLACE, cfl, 1, & MPI_REAL_PRECISION, MPI_MAX, NEKO_COMM, ierr) end if - + end function cfl - + !> Compute double the strain rate tensor, i.e du_i/dx_j + du_j/dx_i - !! Similar to comp_sij in Nek5000. - subroutine strain_rate(s11, s22, s33, s12, s13, s23, & - u, v, w, coef) + !! @param s11 Will hold the 1,1 component of the strain rate tensor. + !! @param s22 Will hold the 2,2 component of the strain rate tensor. + !! @param s33 Will hold the 3,3 component of the strain rate tensor. + !! @param s12 Will hold the 1,2 component of the strain rate tensor. + !! @param s13 Will hold the 1,3 component of the strain rate tensor. + !! @param s23 Will hold the 2,3 component of the strain rate tensor. + !! @param u The x component of velocity. + !! @param v The y component of velocity. + !! @param w The z component of velocity. + !! @param coef The SEM coefficients. + !! @note Similar to comp_sij in Nek5000. + subroutine strain_rate(s11, s22, s33, s12, s13, s23, u, v, w, coef) type(field_t), intent(in) :: u, v, w !< velocity components type(coef_t), intent(in) :: coef real(kind=rp), intent(inout) :: s11(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv) @@ -246,9 +307,9 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, & real(kind=rp), intent(inout) :: s12(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv) real(kind=rp), intent(inout) :: s23(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv) real(kind=rp), intent(inout) :: s13(u%Xh%lx, u%Xh%ly, u%Xh%lz, u%msh%nelv) - + type(c_ptr) :: s11_d, s22_d, s33_d, s12_d, s23_d, s13_d - + integer :: nelv, lxyz if (NEKO_BCKND_DEVICE .eq. 1) then @@ -259,10 +320,10 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, & s23_d = device_get_ptr(s23) s13_d = device_get_ptr(s13) endif - + nelv = u%msh%nelv lxyz = u%Xh%lxyz - + ! we use s11 as a work array here call dudxyz (s12, u%x, coef%drdy, coef%dsdy, coef%dtdy, coef) call dudxyz (s11, v%x, coef%drdx, coef%dsdx, coef%dtdx, coef) @@ -301,94 +362,28 @@ subroutine strain_rate(s11, s22, s33, s12, s13, s23, & call cmult(s22, 2.0_rp, nelv*lxyz) call cmult(s33, 2.0_rp, nelv*lxyz) endif - + end subroutine strain_rate - !> Lambda2 calcuation. - !! @param lambda2 holds the second eigen values. - !! @param u, the x-velocity - !! @param v, the y-velocity - !! @param w, the z-velocity - !! @param coef, the field coefficients - subroutine lambda2op(lambda2, u, v, w,coef) - type(coef_t), intent(in) :: coef - type(field_t), intent(inout) :: lambda2 + !> Compute the Lambda2 field for a given velocity field. + !! @param lambda2 Holds the computed Lambda2 field. + !! @param u The x-velocity. + !! @param v The y-velocity. + !! @param w the z-velocity. + !! @param coef The SEM coefficients. + subroutine lambda2op(lambda2, u, v, w, coef) + type(coef_t), intent(in) :: coef + type(field_t), intent(inout) :: lambda2 type(field_t), intent(in) :: u, v, w - real(kind=rp) :: grad(coef%Xh%lxyz,3,3) - integer :: temp_indices(9), e, i, ind_sort(3) - real(kind=rp) :: eigen(3), B, C, D, q, r, theta, l2 - real(kind=rp) :: s11, s22, s33, s12, s13, s23, o12, o13, o23 - real(kind=rp) :: a11, a22, a33, a12, a13, a23 - if (NEKO_BCKND_DEVICE .eq. 1) then + if (NEKO_BCKND_SX .eq. 1) then + call opr_sx_lambda2(lambda2, u, v, w, coef) + else if (NEKO_BCKND_DEVICE .eq. 1) then call opr_device_lambda2(lambda2, u, v, w, coef) else - do e = 1, coef%msh%nelv - call opgrad(grad(1,1,1), grad(1,1,2), grad(1,1,3), & - u%x(1,1,1,e),coef,e,e) - call opgrad(grad(1,2,1), grad(1,2,2), grad(1,2,3), & - v%x(1,1,1,e),coef,e,e) - call opgrad(grad(1,3,1), grad(1,3,2), grad(1,3,3), & - w%x(1,1,1,e),coef,e,e) - - do i = 1, coef%Xh%lxyz - s11 = grad(i,1,1) - s22 = grad(i,2,2) - s33 = grad(i,3,3) - - - s12 = 0.5*(grad(i,1,2) + grad(i,2,1)) - s13 = 0.5*(grad(i,1,3) + grad(i,3,1)) - s23 = 0.5*(grad(i,2,3) + grad(i,3,2)) - - o12 = 0.5*(grad(i,1,2) - grad(i,2,1)) - o13 = 0.5*(grad(i,1,3) - grad(i,3,1)) - o23 = 0.5*(grad(i,2,3) - grad(i,3,2)) - - a11 = s11*s11 + s12*s12 + s13*s13 - o12*o12 - o13*o13 - a12 = s11 * s12 + s12 * s22 + s13 * s23 - o13 * o23 - a13 = s11 * s13 + s12 * s23 + s13 * s33 + o12 * o23 - - a22 = s12*s12 + s22*s22 + s23*s23 - o12*o12 - o23*o23 - a23 = s12 * s13 + s22 * s23 + s23 * s33 - o12 * o13 - a33 = s13*s13 + s23*s23 + s33*s33 - o13*o13 - o23*o23 - - - B = -(a11 + a22 + a33) - C = -(a12*a12 + a13*a13 + a23*a23 & - - a11 * a22 - a11 * a33 - a22 * a33) - D = -(2.0 * a12 * a13 * a23 - a11 * a23*a23 & - - a22 * a13*a13 - a33 * a12*a12 + a11 * a22 * a33) - - - q = (3.0 * C - B*B) / 9.0 - r = (9.0 * C * B - 27.0 * D - 2.0 * B*B*B) / 54.0 - theta = acos( r / sqrt(-q*q*q) ) - - eigen(1) = 2.0 * sqrt(-q) * cos(theta / 3.0) - B / 3.0 - eigen(2) = 2.0 * sqrt(-q) * cos((theta + 2.0 * pi) / 3.0) - B / 3.0 - eigen(3) = 2.0 * sqrt(-q) * cos((theta + 4.0 * pi) / 3.0) - B / 3.0 - - if (eigen(1) .le. eigen(2) .and. eigen(2) .le. eigen(3)) then - l2 = eigen(2) - else if (eigen(3) .le. eigen(2) .and. eigen(2) .le. eigen(1)) then - l2 = eigen(2) - else if (eigen(1) .le. eigen(3) .and. eigen(3) .le. eigen(2)) then - l2 = eigen(3) - else if (eigen(2) .le. eigen(3) .and. eigen(3) .le. eigen(1)) then - l2 = eigen(3) - else if (eigen(2) .le. eigen(1) .and. eigen(1) .le. eigen(3)) then - l2 = eigen(1) - else if (eigen(3) .le. eigen(1) .and. eigen(1) .le. eigen(2)) then - l2 = eigen(1) - else - l2 = 0.0 - end if - lambda2%x(i,1,1,e) = l2/(coef%B(i,1,1,e)**2) - end do - end do + call opr_cpu_lambda2(lambda2, u, v, w, coef) end if - + end subroutine lambda2op - + end module operators diff --git a/src/math/schwarz.f90 b/src/math/schwarz.f90 index ed5417fd3c0..a946d0ba04f 100644 --- a/src/math/schwarz.f90 +++ b/src/math/schwarz.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Overlapping schwarz solves @@ -65,7 +65,6 @@ module schwarz use space use dofmap, only : dofmap_t use bc - use dirichlet, only : dirichlet_t use gather_scatter use device_schwarz use device_math @@ -75,32 +74,32 @@ module schwarz use, intrinsic :: iso_c_binding implicit none private - + type, public :: schwarz_t - real(kind=rp), allocatable :: work1(:) - real(kind=rp), allocatable :: work2(:) - real(kind=rp), allocatable :: wt(:,:,:,:,:) - type(c_ptr) :: work1_d = C_NULL_PTR - type(c_ptr) :: work2_d = C_NULL_PTR - type(c_ptr) :: wt_d = C_NULL_PTR - type(space_t) :: Xh_schwarz !< needed to init gs - type(gs_t) :: gs_schwarz !< We are only interested in the gather-scatter! - type(dofmap_t) :: dm_schwarz !< needed to init gs - type(fdm_t) :: fdm - type(space_t), pointer :: Xh - type(bc_list_t), pointer :: bclst - type(dofmap_t), pointer :: dm - type(gs_t), pointer :: gs_h - type(mesh_t), pointer :: msh - type(c_ptr) :: event - contains - procedure, pass(this) :: init => schwarz_init - procedure, pass(this) :: free => schwarz_free - procedure, pass(this) :: compute => schwarz_compute + real(kind=rp), allocatable :: work1(:) + real(kind=rp), allocatable :: work2(:) + real(kind=rp), allocatable :: wt(:,:,:,:,:) + type(c_ptr) :: work1_d = C_NULL_PTR + type(c_ptr) :: work2_d = C_NULL_PTR + type(c_ptr) :: wt_d = C_NULL_PTR + type(space_t) :: Xh_schwarz !< needed to init gs + type(gs_t) :: gs_schwarz !< We are only interested in the gather-scatter! + type(dofmap_t) :: dm_schwarz !< needed to init gs + type(fdm_t) :: fdm + type(space_t), pointer :: Xh + type(bc_list_t), pointer :: bclst + type(dofmap_t), pointer :: dm + type(gs_t), pointer :: gs_h + type(mesh_t), pointer :: msh + type(c_ptr) :: event + contains + procedure, pass(this) :: init => schwarz_init + procedure, pass(this) :: free => schwarz_free + procedure, pass(this) :: compute => schwarz_compute end type schwarz_t - + contains - + subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh) class(schwarz_t), target, intent(inout) :: this type(space_t), target, intent(inout) :: Xh @@ -110,15 +109,15 @@ subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh) type(bc_list_t), target, intent(inout):: bclst call this%free() - + call this%Xh_schwarz%init(GLL, Xh%lx+2, Xh%lx+2, Xh%lx+2) - this%dm_schwarz = dofmap_t(msh, this%Xh_schwarz) + this%dm_schwarz = dofmap_t(msh, this%Xh_schwarz) call this%gs_schwarz%init(this%dm_schwarz) allocate(this%work1(this%dm_schwarz%size())) allocate(this%work2(this%dm_schwarz%size())) allocate(this%wt(Xh%lx, Xh%lx, 4, msh%gdim, msh%nelv)) - + call this%fdm%init(Xh, dm, gs_h) @@ -128,28 +127,29 @@ subroutine schwarz_init(this, Xh, dm, gs_h, bclst, msh) this%dm => dm this%gs_h => gs_h if (NEKO_BCKND_DEVICE .eq. 1) then - call device_map(this%work1, this%work1_d,this%dm_schwarz%size()) - call device_map(this%work2, this%work2_d,this%dm_schwarz%size()) + call device_map(this%work1, this%work1_d,this%dm_schwarz%size()) + call device_map(this%work2, this%work2_d,this%dm_schwarz%size()) end if call schwarz_setup_wt(this) if (NEKO_BCKND_DEVICE .eq. 1) then - call device_alloc(this%wt_d,int(this%dm%size()*rp, i8)) + call device_alloc(this%wt_d,int(this%dm%size()*rp, i8)) call rone(this%work1, this%dm%size()) call schwarz_wt3d(this%work1, this%wt, Xh%lx, msh%nelv) - call device_memcpy(this%work1, this%wt_d, this%dm%size(), HOST_TO_DEVICE) + call device_memcpy(this%work1, this%wt_d, this%dm%size(), & + HOST_TO_DEVICE, sync=.false.) call device_event_create(this%event, 2) end if end subroutine schwarz_init - + subroutine schwarz_free(this) class(schwarz_t), intent(inout) :: this - + if(allocated(this%work1)) deallocate(this%work1) if(allocated(this%work2)) deallocate(this%work2) if(allocated(this%wt)) deallocate(this%wt) - + call this%Xh_schwarz%free() call this%gs_schwarz%free() !why cant I do this? @@ -162,7 +162,7 @@ subroutine schwarz_free(this) nullify(this%gs_h) nullify(this%msh) end subroutine schwarz_free - !> setup weights + !> setup weights subroutine schwarz_setup_wt(this) class(schwarz_t), intent(inout) :: this integer :: enx,eny,enz, n, ie, k, ns @@ -178,19 +178,21 @@ subroutine schwarz_setup_wt(this) enz = Xh_schwarz%lz if(.not. msh%gdim .eq. 3) enz=1 ns = enx*eny*enz*msh%nelv - + call rone(work2, ns) call rzero(work1, ns) - + ! Sum overlap region (border excluded) ! Cred to PFF for this, very clever call schwarz_extrude(work1, 0, zero, work2, 0, one , enx, eny, enz, msh%nelv) if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(work2, this%work2_d, ns, HOST_TO_DEVICE) - call this%gs_schwarz%op(work2, ns, GS_OP_ADD) - call device_memcpy(work2, this%work2_d, ns, DEVICE_TO_HOST) + call device_memcpy(work2, this%work2_d, ns, & + HOST_TO_DEVICE, sync=.false.) + call this%gs_schwarz%op(work2, ns, GS_OP_ADD) + call device_memcpy(work2, this%work2_d, ns, & + DEVICE_TO_HOST, sync=.false.) else - call this%gs_schwarz%op(work2, ns, GS_OP_ADD) + call this%gs_schwarz%op(work2, ns, GS_OP_ADD) end if call schwarz_extrude(work2, 0, one, work1, 0, -one, enx, eny, enz, msh%nelv) call schwarz_extrude(work2, 2, one, work2, 0, one, enx, eny, enz, msh%nelv) @@ -200,13 +202,15 @@ subroutine schwarz_setup_wt(this) ! else call schwarz_toreg3d(work1, work2, Xh%lx, msh%nelv) ! endif - + if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(work1, this%work1_d, n, HOST_TO_DEVICE) - call this%gs_h%op(work1, n, GS_OP_ADD) - call device_memcpy(work1, this%work1_d, n, DEVICE_TO_HOST, sync=.true.) + call device_memcpy(work1, this%work1_d, n, & + HOST_TO_DEVICE, sync=.false.) + call this%gs_h%op(work1, n, GS_OP_ADD) + call device_memcpy(work1, this%work1_d, n, & + DEVICE_TO_HOST, sync=.true.) else - call this%gs_h%op(work1, n, GS_OP_ADD) + call this%gs_h%op(work1, n, GS_OP_ADD) end if k = 1 @@ -248,9 +252,9 @@ end subroutine schwarz_setup_schwarz_wt2d_2 subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv) integer, intent(in) ::n, nelv, ie real(kind=rp), intent(inout) :: wt(n,n,4,3,nelv) - real(kind=rp), intent(inout) :: work(n,n,n) + real(kind=rp), intent(inout) :: work(n,n,n) integer :: i,j,k - + do k = 1, n do j = 1, n wt(j,k,1,1,ie) = 1.0_rp / work(1,j,k) @@ -259,7 +263,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv) wt(j,k,4,1,ie) = 1.0_rp / work(n,j,k) end do end do - + do k = 1, n do i = 1, n wt(i,k,1,2,ie) = 1.0_rp / work(i,1,k) @@ -268,7 +272,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv) wt(i,k,4,2,ie) = 1.0_rp / work(i,n,k) end do end do - + do j = 1, n do i = 1, n wt(i,j,1,3,ie) = 1.0_rp / work(i,j,1) @@ -277,7 +281,7 @@ subroutine schwarz_setup_schwarz_wt3d_2(wt, ie, n, work, nelv) wt(i,j,4,3,ie) = 1.0_rp / work(i,j,n) end do end do - + end subroutine schwarz_setup_schwarz_wt3d_2 !> convert array a from extended size to regular @@ -315,7 +319,7 @@ subroutine schwarz_toext3d(a, b, n, nelv) end do end subroutine schwarz_toext3d - !> Sum values along rows l1, l2 with weights f1, f2 and store along row l1. + !> Sum values along rows l1, l2 with weights f1, f2 and store along row l1. !! Helps us avoid complicated communcation to get neighbor values. !! Simply copy interesting values to the boundary and then do gs_op on extended array. subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv) @@ -325,7 +329,7 @@ subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv) integer :: i, j, k, ie, i0, i1 i0=2 i1=nx-1 - + if(nz .eq. 1) then do ie = 1, nelv do j = i0, i1 @@ -370,7 +374,7 @@ subroutine schwarz_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz, nelv) end do endif end subroutine schwarz_extrude - + subroutine schwarz_compute(this, e, r) class(schwarz_t), intent(inout) :: this real(kind=rp), dimension(this%dm%size()), intent(inout) :: e, r @@ -381,80 +385,80 @@ subroutine schwarz_compute(this, e, r) associate(work1 => this%work1, work1_d => this%work1_d,& work2 => this%work2, work2_d => this%work2_d) - n = this%dm%size() - enx=this%Xh_schwarz%lx - eny=this%Xh_schwarz%ly - enz=this%Xh_schwarz%lz - if(.not. this%msh%gdim .eq. 3) enz=1 - ns = enx*eny*enz*this%msh%nelv - if (NEKO_BCKND_DEVICE .eq. 1) then - r_d = device_get_ptr(r) - e_d = device_get_ptr(e) - call device_event_record(this%event, glb_cmd_queue) - call device_stream_wait_event(aux_cmd_queue, this%event, 0) - call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, & + n = this%dm%size() + enx=this%Xh_schwarz%lx + eny=this%Xh_schwarz%ly + enz=this%Xh_schwarz%lz + if(.not. this%msh%gdim .eq. 3) enz=1 + ns = enx*eny*enz*this%msh%nelv + if (NEKO_BCKND_DEVICE .eq. 1) then + r_d = device_get_ptr(r) + e_d = device_get_ptr(e) + call device_event_record(this%event, glb_cmd_queue) + call device_stream_wait_event(aux_cmd_queue, this%event, 0) + call device_schwarz_toext3d(work1_d, r_d, this%Xh%lx, & this%msh%nelv, aux_cmd_queue) - call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, & + call device_schwarz_extrude(work1_d, 0, zero, work1_d, 2, one, & enx,eny,enz, this%msh%nelv,aux_cmd_queue) - this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue - call this%gs_schwarz%op(work1, ns, GS_OP_ADD,this%event) - call device_event_sync(this%event) - call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, & + this%gs_schwarz%bcknd%gs_stream = aux_cmd_queue + call this%gs_schwarz%op(work1, ns, GS_OP_ADD,this%event) + call device_event_sync(this%event) + call device_schwarz_extrude(work1_d, 0, one, work1_d, 2, -one, & enx, eny, enz, this%msh%nelv, aux_cmd_queue) - - call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves - call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, & + call this%fdm%compute(work2, work1,aux_cmd_queue) ! do local solves + + call device_schwarz_extrude(work1_d, 0, zero, work2_d, 0, one, & enx, eny, enz, this%msh%nelv, aux_cmd_queue) - call this%gs_schwarz%op(work2, ns, GS_OP_ADD,this%event) - call device_event_sync(this%event) + call this%gs_schwarz%op(work2, ns, GS_OP_ADD,this%event) + call device_event_sync(this%event) - call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, & + call device_schwarz_extrude(work2_d, 0, one, work1_d, 0, -one, & enx, eny, enz, this%msh%nelv, aux_cmd_queue) - call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, & + call device_schwarz_extrude(work2_d, 2, one, work2_d, 0, one, & enx, eny, enz, this%msh%nelv, aux_cmd_queue) - call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, & + call device_schwarz_toreg3d(e_d, work2_d, this%Xh%lx, & this%msh%nelv, aux_cmd_queue) - call device_event_record(this%event,aux_cmd_queue) - call device_event_sync(this%event) + call device_event_record(this%event,aux_cmd_queue) + call device_event_sync(this%event) - call this%gs_h%op(e, n, GS_OP_ADD, this%event) - call bc_list_apply_scalar(this%bclst, e, n) - call device_col2(e_d,this%wt_d, n) - call device_stream_wait_event(aux_cmd_queue, this%event, 0) - else - call bc_list_apply_scalar(this%bclst, r, n) - call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv) + call this%gs_h%op(e, n, GS_OP_ADD, this%event) + call bc_list_apply_scalar(this%bclst, e, n) + call device_col2(e_d,this%wt_d, n) + call device_stream_wait_event(aux_cmd_queue, this%event, 0) + else + call bc_list_apply_scalar(this%bclst, r, n) + call schwarz_toext3d(work1, r, this%Xh%lx, this%msh%nelv) - ! exchange interior nodes - call schwarz_extrude(work1, 0, zero, work1, 2, one, & + ! exchange interior nodes + call schwarz_extrude(work1, 0, zero, work1, 2, one, & enx, eny, enz, this%msh%nelv) - call this%gs_schwarz%op(work1, ns, GS_OP_ADD) - call schwarz_extrude(work1, 0, one, work1, 2, -one, & + call this%gs_schwarz%op(work1, ns, GS_OP_ADD) + call schwarz_extrude(work1, 0, one, work1, 2, -one, & enx, eny, enz, this%msh%nelv) - - call this%fdm%compute(work2, work1) ! do local solves - ! Sum overlap region (border excluded) - call schwarz_extrude(work1, 0, zero, work2, 0, one, & + call this%fdm%compute(work2, work1) ! do local solves + + ! Sum overlap region (border excluded) + call schwarz_extrude(work1, 0, zero, work2, 0, one, & enx, eny, enz, this%msh%nelv) - call this%gs_schwarz%op(work2, ns, GS_OP_ADD) - call schwarz_extrude(work2, 0, one, work1, 0, -one, & + call this%gs_schwarz%op(work2, ns, GS_OP_ADD) + call schwarz_extrude(work2, 0, one, work1, 0, -one, & enx, eny, enz, this%msh%nelv) - call schwarz_extrude(work2, 2, one, work2, 0, one, & + call schwarz_extrude(work2, 2, one, work2, 0, one, & enx, eny, enz, this%msh%nelv) - call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv) + call schwarz_toreg3d(e, work2, this%Xh%lx, this%msh%nelv) - ! sum border nodes - call this%gs_h%op(e, n, GS_OP_ADD) - call bc_list_apply_scalar(this%bclst, e, n) + ! sum border nodes + call this%gs_h%op(e, n, GS_OP_ADD) + call bc_list_apply_scalar(this%bclst, e, n) - call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv) - end if - end associate + call schwarz_wt3d(e, this%wt, this%Xh%lx, this%msh%nelv) + end if + end associate end subroutine schwarz_compute !Apply schwarz weights along the boundary of each element. diff --git a/src/math/signed_distance.f90 b/src/math/signed_distance.f90 new file mode 100644 index 00000000000..f9c18f9dabf --- /dev/null +++ b/src/math/signed_distance.f90 @@ -0,0 +1,431 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> @brief Module containing Signed Distance Functions. +module signed_distance + use num_types, only: dp, rp + use field, only: field_t + use tri, only: tri_t + use tri_mesh, only: tri_mesh_t + use aabb_tree, only: aabb_tree_t + + implicit none + +contains + + !> @brief Signed distance field + !! @details This routine computes the signed distance field for a given + !! object. + !! + !! Currently supported objects are: + !! - Triangular mesh (tri_mesh_t) + !! + !! @param[in,out] field_data Field data + !! @param[in] object Object + !! @param[in,optional] max_distance Maximum distance outside the mesh + subroutine signed_distance_field(field_data, object, max_distance) + use utils, only: neko_error + implicit none + + type(field_t), intent(inout) :: field_data + class(*), intent(in) :: object + real(kind=dp), intent(in), optional :: max_distance + + real(kind=dp) :: max_dist + + if (present(max_distance)) then + max_dist = max_distance + else + max_dist = huge(0.0_dp) + end if + + select type(object) + type is (tri_mesh_t) + call signed_distance_field_tri_mesh(field_data, object, max_dist) + + class default + call neko_error("signed_distance_field: Object type not supported.") + end select + + end subroutine signed_distance_field + + !> @brief Signed distance field for a triangular mesh + !! @details This routine computes the signed distance field for a given + !! triangular mesh. The algorithm utilizes an AABB tree to accelerate the + !! earch for potential elements. The signed distance is computed using the + !! brute force approach, where we compute the signed distance to each element + !! found through the AABB tree, and return the minimum distance. + !! + !! @param[in,out] field_data Field data + !! @param[in] mesh Triangular mesh + !! @param[in] max_distance Maximum distance outside the mesh + subroutine signed_distance_field_tri_mesh(field_data, mesh, max_distance) + use utils, only: neko_error + implicit none + + type(field_t), intent(inout) :: field_data + type(tri_mesh_t), intent(in) :: mesh + real(kind=dp), intent(in) :: max_distance + + integer :: total_size + integer :: id + type(aabb_tree_t) :: search_tree + real(kind=dp), dimension(3) :: p + real(kind=dp) :: distance + + ! Zero the field + field_data%x = 0.0_dp + total_size = field_data%dof%size() + + call search_tree%init(mesh%nelv) + call search_tree%build(mesh%el) + + if (search_tree%get_size() .ne. mesh%nelv) then + call neko_error("signed_distance_field_tri_mesh: & + & Error building the search tree.") + end if + + do id = 1, total_size + p(1) = field_data%dof%x(id, 1, 1, 1) + p(2) = field_data%dof%y(id, 1, 1, 1) + p(3) = field_data%dof%z(id, 1, 1, 1) + + distance = tri_mesh_aabb_tree(search_tree, mesh%el, p, max_distance) + + field_data%x(id, 1, 1, 1) = real(distance, kind=rp) + end do + + end subroutine signed_distance_field_tri_mesh + + !> @brief Signed distance function + !! @deprecated This routine is deprecated and will be removed in the future. + !! @details This routine computes the signed distance function for the + !! boundary mesh, to a given point (x, y, z). The algorithm is a + !! brute force approach, where we compute the signed distance to each + !! element in the mesh, and return the minimum distance. + !! + !! @param p Point + !! @param mesh Boundary mesh + !! @return Signed distance value + function tri_mesh_brute_force(mesh, p, max_distance) result(distance) + use tri, only: tri_t + use point, only: point_t + use num_types, only: dp + + implicit none + + type(tri_mesh_t), intent(in) :: mesh + real(kind=dp), intent(in) :: p(3) + real(kind=dp), intent(in) :: max_distance + + integer :: id + real(kind=dp) :: distance, weighted_sign + real(kind=dp) :: cd, cs + real(kind=dp) :: tol = 1e-6_dp + + distance = 1e10_dp + weighted_sign = 0.0_dp + + do id = 1, mesh%nelv + call element_distance(mesh%el(id), p, cd, cs) + + ! Update the weighted sign, if the relative difference is small + if (abs(cd - distance) / distance .lt. tol) then + weighted_sign = weighted_sign + cs + else if (cd .lt. distance) then + weighted_sign = cs + end if + + distance = min(cd, distance) + end do + + distance = sign(min(distance, max_distance), weighted_sign) + + end function tri_mesh_brute_force + + !> @brief Signed distance function using an AABB tree + !! @details This routine computes the signed distance function for the + !! boundary mesh, to a given point (x, y, z). The algorithm utilizes an + !! AABB tree to accelerate the search for potential elements. The signed + !! distance is computed using the brute force approach, where we compute the + !! signed distance to each element found through the AABB tree, and return + !! the minimum distance. + !! + !! @param tree AABB tree + !! @param object_list List of objects + !! @param p Point + !! @param max_distance Maximum distance outside the mesh + !! @return Signed distance value + function tri_mesh_aabb_tree(tree, object_list, p, max_distance) result(distance) + use aabb, only: aabb_t + use aabb_tree, only: aabb_node_t, AABB_NULL_NODE + use stack, only: stack_i4_t + implicit none + + class(aabb_tree_t), intent(in) :: tree + class(tri_t), dimension(:), intent(in) :: object_list + real(kind=dp), dimension(3), intent(in) :: p + real(kind=dp), intent(in) :: max_distance + + real(kind=dp) :: distance + real(kind=dp) :: weighted_sign + + real(kind=dp), parameter :: tol = 1.0e-6_dp + + type(stack_i4_t) :: simple_stack + integer :: current_index + + type(aabb_node_t) :: current_node + type(aabb_t) :: current_aabb + integer :: current_object_index + real(kind=dp) :: current_distance + real(kind=dp) :: current_sign + + type(aabb_node_t) :: left_node + type(aabb_node_t) :: right_node + + type(aabb_t) :: root_box + type(aabb_t) :: search_box + + integer :: root_index, left_index, right_index + real(kind=dp) :: random_value + + ! Initialize the stack and the search box + call simple_stack%init(size(object_list) * 2) + call search_box%init(p - max_distance, p + max_distance) + + ! Check if the root node overlaps the search box, if it does, push it to + ! the stack and update the search box to a randomly selected object. + root_index = tree%get_root_index() + root_box = tree%get_aabb(root_index) + + if (.not. root_box%overlaps(search_box)) then + distance = max_distance + weighted_sign = 1.0_dp + return + end if + + ! Grab a random object and compute the distance to it + call random_number(random_value) + current_object_index = floor(random_value * size(object_list) + 1) + call element_distance(object_list(current_object_index), p, distance, weighted_sign) + distance = distance + object_list(current_object_index)%diameter() + + ! Update the search box to the new distance and push the root node + call search_box%init(p - distance, p + distance) + call simple_stack%push(root_index) + + ! Traverse the tree and compute the signed distance to the elements + do while (.not. simple_stack%is_empty()) + current_index = simple_stack%pop() + if (current_index .eq. AABB_NULL_NODE) cycle + + current_node = tree%get_node(current_index) + current_aabb = current_node%get_aabb() + + if (current_node%is_leaf()) then + if (distance .lt. current_node%min_distance(p)) then + cycle + end if + + current_object_index = current_node%get_object_index() + call element_distance(object_list(current_object_index), p, current_distance, current_sign) + + ! Update the weighted sign, if the relative difference is small + if (abs(current_distance - distance) / distance .lt. tol) then + weighted_sign = weighted_sign + current_sign + else if (current_distance .lt. distance) then + weighted_sign = current_sign + end if + + distance = min(distance, current_distance) + + ! Update the search box to the new distance + if (distance .gt. current_aabb%get_diameter()) then + call search_box%init(p - distance, p + distance) + end if + else + + left_index = tree%get_left_index(current_index) + if (left_index .ne. AABB_NULL_NODE) then + left_node = tree%get_left_node(current_index) + if (left_node%aabb%overlaps(search_box)) then + call simple_stack%push(left_index) + end if + end if + + right_index = tree%get_right_index(current_index) + if (right_index .ne. AABB_NULL_NODE) then + right_node = tree%get_right_node(current_index) + if (right_node%aabb%overlaps(search_box)) then + call simple_stack%push(right_index) + end if + end if + end if + end do + + if (distance .gt. max_distance) then + distance = max_distance + end if + distance = sign(distance, weighted_sign) + + end function tri_mesh_aabb_tree + + ! ========================================================================== ! + ! Element distance functions + ! ========================================================================== ! + + !> @brief Main interface for the signed distance function for an element. + subroutine element_distance(element, p, distance, weighted_sign) + class(*), intent(in) :: element + real(kind=dp), dimension(3), intent(in) :: p + real(kind=dp), intent(out) :: distance + real(kind=dp), intent(out), optional :: weighted_sign + + select type(element) + type is (tri_t) + call element_distance_triangle(element, p, distance, weighted_sign) + + class default + print *, "Error: Element type not supported." + stop + end select + end subroutine element_distance + + ! -------------------------------------------------------------------------- ! + ! Specific element distance functions + + !> @brief Signed distance function for a triangle + !! @details This routine computes the signed distance function for the current + !! triangle. We compute the barycentric coordinate of the point projected onto + !! the triangle. If the projection is inside the triangle, the distance is the + !! distance to the plane. If the projection is outside the triangle, the + !! distance is the distance to the nearest edge or vertex. + !! + !! In order to improve precision of the sign estimation, we also compute the + !! weighted sign, which is the perpendicular distance to the plane divided by + !! the distance to the nearest point. + !! + !! @Note The returned distance is signed if the weighted_sign is not present. + !! + !! @param this Triangle + !! @param p Point + !! @return Distance value + !! @return[optional] Weighted sign + subroutine element_distance_triangle(triangle, p, distance, weighted_sign) + type(tri_t), intent(in) :: triangle + real(kind=dp), dimension(3), intent(in) :: p + + real(kind=dp), intent(out) :: distance + real(kind=dp), intent(out), optional :: weighted_sign + + real(kind=dp), dimension(3) :: v1, v2, v3 + real(kind=dp), dimension(3) :: normal + real(kind=dp) :: normal_length + real(kind=dp) :: b1, b2, b3 + + real(kind=dp), dimension(3) :: projection + real(kind=dp), dimension(3) :: edge + real(kind=dp) :: tol = 1e-10_dp + + real(kind=dp) :: face_distance + real(kind=dp) :: t + + ! Get vertices and the normal vector + v1 = triangle%pts(1)%p%x + v2 = triangle%pts(2)%p%x + v3 = triangle%pts(3)%p%x + + normal = cross(v2 - v1, v3 - v1) + normal_length = norm2(normal) + + if (normal_length .lt. tol) then + distance = huge(1.0_dp) + weighted_sign = 0.0_dp + return + end if + normal = normal / normal_length + + ! Compute Barycentric coordinates to determine if the point is inside the + ! triangular prism, of along an edge or by a face. + face_distance = dot_product(p - v1, normal) + + projection = p - normal * face_distance + b1 = dot_product(normal, cross(v2 - v1, projection - v1)) / normal_length + b2 = dot_product(normal, cross(v3 - v2, projection - v2)) / normal_length + b3 = dot_product(normal, cross(v1 - v3, projection - v3)) / normal_length + + if (b1 .le. tol) then + edge = v2 - v1 + t = dot_product(p - v1, edge) / norm2(edge) + t = max(0.0_dp, min(1.0_dp, t)) + + projection = v1 + t * edge + else if (b2 .le. tol) then + edge = v3 - v2 + t = dot_product(p - v2, edge) / norm2(edge) + t = max(0.0_dp, min(1.0_dp, t)) + + projection = v2 + t * edge + else if (b3 .le. tol) then + edge = v1 - v3 + t = dot_product(p - v3, edge) / norm2(edge) + t = max(0.0_dp, min(1.0_dp, t)) + + projection = v3 + t * edge + end if + + distance = norm2(projection - p) + if (present(weighted_sign)) then + weighted_sign = face_distance / distance + else + distance = sign(distance, face_distance) + end if + + end subroutine element_distance_triangle + + !> Compute cross product of two vectors + !> @param[in] a First vector + !> @param[in] b Second vector + !> @return Cross product \f$ a \times b \f$ + pure function cross(a, b) result(c) + real(kind=dp), dimension(3), intent(in) :: a + real(kind=dp), dimension(3), intent(in) :: b + real(kind=dp), dimension(3) :: c + + c(1) = a(2) * b(3) - a(3) * b(2) + c(2) = a(3) * b(1) - a(1) * b(3) + c(3) = a(1) * b(2) - a(2) * b(1) + + end function cross + +end module signed_distance diff --git a/src/math/tensor.f90 b/src/math/tensor.f90 index 5d8ad6292ce..79b0e01417d 100644 --- a/src/math/tensor.f90 +++ b/src/math/tensor.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Tensor operations. @@ -79,7 +79,7 @@ module tensor module procedure triple_tensor_product_scalar, triple_tensor_product_vector end interface triple_tensor_product -public :: tensr3, transpose, trsp, trsp1, & + public :: tensr3, transpose, trsp, trsp1, & tnsr2d_el, tnsr3d_el, tnsr3d, tnsr1_3d, addtnsr, & triple_tensor_product, tnsr3d_el_list @@ -97,7 +97,7 @@ subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w) real(kind=rp), intent(inout) :: Bt(nu, nv) real(kind=rp), intent(inout) :: Ct(nu, nv) integer :: j, k, l, nunu, nvnv, nunv - + nunu = nu**2 nvnv = nv**2 nunv = nu*nv @@ -113,7 +113,7 @@ subroutine tensr3(v, nv, u, nu, A, Bt, Ct, w) l = l + nvnv end do call mxm(w, nvnv, Ct, nu, v, nv) - + end subroutine tensr3 !> Transpose of a rectangular tensor \f$ A = B^T \f$. @@ -129,7 +129,7 @@ subroutine trsp(a, lda, b, ldb) a(i, j) = b(j, i) end do end do - + end subroutine trsp !> In-place transpose of a square tensor. @@ -146,7 +146,7 @@ subroutine trsp1(a, n) a(j, i) = tmp end do end do - + end subroutine trsp1 !> Computes \f$ v = A u B^T \f$. @@ -162,7 +162,7 @@ subroutine tnsr2d_el(v, nv, u, nu, A, Bt) else call tnsr2d_el_cpu(v, nv, u, nu, A, Bt) end if - + end subroutine tnsr2d_el !> Tensor product \f$ v =(C \otimes B \otimes A) u \f$ @@ -179,7 +179,7 @@ subroutine tnsr3d_el(v, nv, u, nu, A, Bt, Ct) else call tnsr3d_el_cpu(v, nv, u, nu, A, Bt, Ct) end if - + end subroutine tnsr3d_el !> Tensor product \f$ v =(C \otimes B \otimes A) u \f$ @@ -213,7 +213,7 @@ subroutine tnsr3d_el_list(v, nv, u, nu, A, Bt, Ct, el_list, n_pt) call tnsr3d_el_cpu(v(1,i), nv, u(1,el_list(i)+1), nu, A(1,1,i), Bt(1,1,i), Ct(1,1,i)) end do end if - + end subroutine tnsr3d_el_list @@ -224,7 +224,7 @@ subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv) real(kind=rp), intent(inout) :: v(nv*nv*nv,nelv), u(nu*nu*nu,nelv) real(kind=rp), intent(inout) :: A(nv,nu), Bt(nu, nv), Ct(nu,nv) type(c_ptr) :: v_d, u_d, A_d, Bt_d, Ct_d - + if (NEKO_BCKND_SX .eq. 1) then call tnsr3d_sx(v, nv, u, nu, A, Bt, Ct, nelv) @@ -240,7 +240,7 @@ subroutine tnsr3d(v, nv, u, nu, A, Bt, Ct, nelv) else call tnsr3d_cpu(v, nv, u, nu, A, Bt, Ct, nelv) end if - + end subroutine tnsr3d !> In place tensor product \f$ v =(C \otimes B \otimes A) v \f$. @@ -264,11 +264,11 @@ end subroutine tnsr1_3d subroutine addtnsr(s, h1, h2, h3, nx, ny, nz) integer, intent(in) :: nx, ny, nz - real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz) + real(kind=rp), intent(in) :: h1(nx), h2(ny), h3(nz) real(kind=rp), intent(inout) :: s(nx, ny, nz) real(kind=rp) :: hh integer :: ix, iy, iz - + do iz = 1,nz do iy = 1,ny hh = h2(iy)*h3(iz) @@ -277,7 +277,7 @@ subroutine addtnsr(s, h1, h2, h3, nx, ny, nz) end do end do end do - + end subroutine addtnsr !> Computes the tensor product \f$ v =(H_t \otimes H_s \otimes H_r) u \f$. diff --git a/src/math/vector.f90 b/src/math/vector.f90 index a927c2f637d..c02acaed82a 100644 --- a/src/math/vector.f90 +++ b/src/math/vector.f90 @@ -40,7 +40,7 @@ module vector use, intrinsic :: iso_c_binding implicit none private - + type, public :: vector_t real(kind=rp), allocatable :: x(:) !< Vector entries. type(c_ptr) :: x_d = C_NULL_PTR !< Device pointer. @@ -57,7 +57,7 @@ module vector type, public :: vector_ptr_t type(vector_t), pointer :: v - end type + end type vector_ptr_t contains @@ -70,14 +70,14 @@ subroutine vector_init(v, n) allocate(v%x(n)) v%x = 0.0_rp - + if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(v%x, v%x_d, n) call device_cfill(v%x_d, 0.0_rp, n) end if v%n = n - + end subroutine vector_init !> Deallocate a vector. @@ -93,14 +93,14 @@ subroutine vector_free(v) end if v%n = 0 - + end subroutine vector_free !> Return the number of entries in the vector. function vector_size(v) result(s) class(vector_t), intent(inout) :: v integer :: s - s = v%n + s = v%n end function vector_size !> Assignment \f$ v = w \f$. @@ -116,11 +116,11 @@ subroutine vector_assign_vector(v, w) v%n = w%n allocate(v%x(v%n)) - + if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(v%x, v%x_d, v%n) end if - + end if if (NEKO_BCKND_DEVICE .eq. 1) then @@ -148,5 +148,5 @@ subroutine vector_assign_scalar(v, s) end subroutine vector_assign_scalar - + end module vector diff --git a/src/mesh/aabb_tree.f90 b/src/mesh/aabb_tree.f90 new file mode 100644 index 00000000000..f7a12c0e6c4 --- /dev/null +++ b/src/mesh/aabb_tree.f90 @@ -0,0 +1,928 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! ============================================================================ ! +! Original C++ Implementation from: +! https://github.com/JamesRandall/SimpleVoxelEngine/blob/master/voxelEngine/src/AABBTree.h +! +! Translated to Fortran by: +! @author Tim Felle Olsen +! @date 9 Feb 2024 +! +! C++ Code License: +! The MIT License (MIT) +! +! Copyright (c) 2017 James Randall +! +! Permission is hereby granted, free of charge, to any person obtaining a copy of +! this software and associated documentation files (the "Software"), to deal in +! the Software without restriction, including without limitation the rights to +! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +! the Software, and to permit persons to whom the Software is furnished to do so, +! subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in all +! copies or substantial portions of the Software. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +! ============================================================================ ! + +!> @brief Axis Aligned Bounding Box (aabb) Tree data structure. +!! @details +!! This is a Fortran implementation of an Axis Aligned Bounding Box Tree +!! data structure. +!! The purpose of this is to accelerate a Signed Distance Function and other +!! spatial computations. +module aabb_tree + use aabb + use tri, only: tri_t + use num_types, only: rp, dp + + implicit none + private + + integer, parameter, public :: AABB_NULL_NODE = -1 + + ! ========================================================================== ! + ! Type definitions + ! ========================================================================== ! + + !> @brief Node type for the Axis Aligned Bounding Box (aabb) Tree + type, public :: aabb_node_t + private + type(aabb_t), public :: aabb + integer :: object_index = -1 + + ! tree links + integer :: parent_node_index = AABB_NULL_NODE + integer :: left_node_index = AABB_NULL_NODE + integer :: right_node_index = AABB_NULL_NODE + + ! node linked list link + integer :: next_node_index = AABB_NULL_NODE + + contains + procedure, pass(this), public :: init => aabb_node_init + + ! Getters + procedure, pass(this), public :: get_aabb => aabb_node_get_aabb + procedure, pass(this), public :: get_object_index => aabb_node_get_object_index + procedure, pass(this), public :: get_parent_index => aabb_node_get_parent_index + procedure, pass(this), public :: get_left_index => aabb_node_get_left_index + procedure, pass(this), public :: get_right_index => aabb_node_get_right_index + + ! Unary operations + procedure, pass(this), public :: min_distance => aabb_node_min_distance + + ! Boolean operators + procedure, pass(this), public :: is_leaf => aabb_node_is_leaf + procedure, pass(this), public :: is_valid => aabb_node_is_valid + + ! Comparison operators + generic :: operator(.lt.) => less + generic :: operator(.gt.) => greater + + procedure, pass(this) :: less => aabb_node_less + procedure, pass(this) :: greater => aabb_node_greater + + end type aabb_node_t + + !> @brief Axis Aligned Bounding Box (aabb) Tree + type, public :: aabb_tree_t + private + type(aabb_node_t), allocatable :: nodes(:) + integer :: root_node_index = AABB_NULL_NODE + integer :: allocated_node_count = 0 + integer :: next_free_node_index = AABB_NULL_NODE + integer :: node_capacity = 0 + integer :: growth_size = 1 + + contains + + ! Initializers + procedure, pass(this), public :: init => aabb_tree_init + procedure, pass(this), public :: build => aabb_tree_build_tree + procedure, pass(this), public :: insert_object => aabb_tree_insert_object + + ! Getters + procedure, pass(this), public :: get_size => aabb_tree_get_size + + procedure, pass(this), public :: get_root_index => aabb_tree_get_root_index + procedure, pass(this), public :: get_parent_index => aabb_tree_get_parent_index + procedure, pass(this), public :: get_left_index => aabb_tree_get_left_index + procedure, pass(this), public :: get_right_index => aabb_tree_get_right_index + + procedure, pass(this), public :: get_node => aabb_tree_get_node + procedure, pass(this), public :: get_root_node => aabb_tree_get_root_node + procedure, pass(this), public :: get_parent_node => aabb_tree_get_parent_node + procedure, pass(this), public :: get_left_node => aabb_tree_get_left_node + procedure, pass(this), public :: get_right_node => aabb_tree_get_right_node + + procedure, pass(this), public :: get_aabb => aabb_tree_get_aabb + + procedure, pass(this), public :: query_overlaps => aabb_tree_query_overlaps + + procedure, pass(this), public :: print => aabb_tree_print + + ! ----------------------------------------------------------------------- ! + ! Internal methods + + procedure, pass(this) :: allocate_node => aabb_tree_allocate_node + procedure, pass(this) :: deallocate_node => aabb_tree_deallocate_node + procedure, pass(this) :: resize_node_pool => aabb_tree_resize_node_pool + procedure, pass(this) :: insert_leaf => aabb_tree_insert_leaf + + procedure, pass(this) :: fix_upwards_tree => aabb_tree_fix_upwards_tree + + procedure, pass(this) :: valid_tree => aabb_tree_valid_tree + + end type aabb_tree_t + +contains + + ! ========================================================================== ! + ! Definitions of node methods + ! ========================================================================== ! + + !> @brief Initializes the AABB node. + subroutine aabb_node_init(this) + class(aabb_node_t), intent(inout) :: this + + this%object_index = -1 + this%parent_node_index = AABB_NULL_NODE + this%left_node_index = AABB_NULL_NODE + this%right_node_index = AABB_NULL_NODE + this%next_node_index = AABB_NULL_NODE + end subroutine aabb_node_init + + ! -------------------------------------------------------------------------- ! + ! Getters + + !> @brief Returns the Axis Aligned Bounding Box (aabb) of the node. + pure function aabb_node_get_aabb(this) result(res) + class(aabb_node_t), intent(in) :: this + type(aabb_t) :: res + + res = this%aabb + end function aabb_node_get_aabb + + !> @brief Returns the object index of the node. + pure function aabb_node_get_object_index(this) result(object_index) + class(aabb_node_t), intent(in) :: this + integer :: object_index + + object_index = this%object_index + end function aabb_node_get_object_index + + !> @brief Returns the parent index of the node. + pure function aabb_node_get_parent_index(this) result(parent_index) + class(aabb_node_t), intent(in) :: this + integer :: parent_index + + parent_index = this%parent_node_index + end function aabb_node_get_parent_index + + !> @brief Returns the left index of the node. + pure function aabb_node_get_left_index(this) result(left_index) + class(aabb_node_t), intent(in) :: this + integer :: left_index + + left_index = this%left_node_index + end function aabb_node_get_left_index + + !> @brief Returns the right index of the node. + pure function aabb_node_get_right_index(this) result(right_index) + class(aabb_node_t), intent(in) :: this + integer :: right_index + + right_index = this%right_node_index + end function aabb_node_get_right_index + + !> @brief Get the minimum possible distance from the aabb to a point. + function aabb_node_min_distance(this, p) result(distance) + class(aabb_node_t), intent(in) :: this + real(kind=dp), dimension(3), intent(in) :: p + real(kind=dp) :: distance + + distance = 0.5_rp * this%aabb%get_diameter() & + - norm2(this%aabb%get_center() - p) + end function aabb_node_min_distance + + ! -------------------------------------------------------------------------- ! + ! Boolean operators + + !> @brief Returns true if the node is a leaf node. + pure function aabb_node_is_leaf(this) result(res) + class(aabb_node_t), intent(in) :: this + logical :: res + + res = this%left_node_index == AABB_NULL_NODE .and. & + this%right_node_index == AABB_NULL_NODE + end function aabb_node_is_leaf + + !> @brief Returns true if the node is a valid node. + pure function aabb_node_is_valid(this) result(valid) + class(aabb_node_t), intent(in) :: this + logical :: valid + + if (this%is_leaf()) then + valid = & + & this%left_node_index .eq. AABB_NULL_NODE .and. & + & this%right_node_index .eq. AABB_NULL_NODE .and. & + & this%object_index .gt. 0 + else + valid = & + & this%left_node_index .ne. AABB_NULL_NODE .and. & + & this%right_node_index .ne. AABB_NULL_NODE .and. & + & this%object_index .eq. -1 + end if + + end function aabb_node_is_valid + + ! -------------------------------------------------------------------------- ! + ! Comparison operators + + !> @brief Returns true if the node is less than the other node. + pure function aabb_node_less(this, other) result(res) + class(aabb_node_t), intent(in) :: this + class(aabb_node_t), intent(in) :: other + logical :: res + + res = this%aabb .lt. other%aabb + + end function aabb_node_less + + !> @brief Returns true if the node is greater than the other node. + pure function aabb_node_greater(this, other) result(res) + class(aabb_node_t), intent(in) :: this + class(aabb_node_t), intent(in) :: other + logical :: res + + res = this%aabb .gt. other%aabb + + end function aabb_node_greater + + ! ========================================================================== ! + ! Definitions of tree methods + ! ========================================================================== ! + + !> @brief Initializes the AABB tree. + subroutine aabb_tree_init(this, initial_capacity) + class(aabb_tree_t), intent(inout) :: this + integer, intent(in) :: initial_capacity + + integer :: i + + this%root_node_index = AABB_NULL_NODE + this%allocated_node_count = 0 + this%next_free_node_index = 1 + this%node_capacity = initial_capacity + this%growth_size = initial_capacity + + if (allocated(this%nodes)) deallocate(this%nodes) + allocate(this%nodes(initial_capacity)) + + do i = 1, initial_capacity + this%nodes(i)%next_node_index = i + 1 + end do + this%nodes(initial_capacity)%next_node_index = AABB_NULL_NODE + end subroutine aabb_tree_init + + !> @brief Builds the tree. + subroutine aabb_tree_build_tree(this, objects) + use utils, only: neko_error + implicit none + + class(aabb_tree_t), intent(inout) :: this + class(*), dimension(:), intent(in) :: objects + + integer :: i_obj, i_node, i + logical :: done + + integer :: start_layer, end_layer + + type(aabb_t), dimension(:), allocatable :: box_list + integer, dimension(:), allocatable :: sorted_indices + + call this%init(size(objects) * 2) + + ! ------------------------------------------------------------------------ ! + ! Start by sorting the list of objects, then build a balanced binary tree + ! from the sorted list + + allocate(box_list(size(objects))) + + do i_obj = 1, size(objects) + box_list(i_obj) = get_aabb(objects(i_obj)) + end do + sorted_indices = sort(box_list) + + do i = 1, size(sorted_indices) + i_obj = sorted_indices(i) + i_node = this%allocate_node() + this%nodes(i_node)%aabb = get_aabb(objects(i_obj)) + this%nodes(i_node)%object_index = i_obj + end do + + + start_layer = 1 + end_layer = size(objects) + done = .false. + do while (.not. done) + + ! build the next layer + do i = start_layer, end_layer - 1, 2 + i_node = this%allocate_node() + + this%nodes(i_node)%aabb = merge(this%nodes(i)%aabb, this%nodes(i + 1)%aabb) + + this%nodes(i_node)%left_node_index = i + this%nodes(i_node)%right_node_index = i + 1 + + this%nodes(i)%parent_node_index = i_node + this%nodes(i + 1)%parent_node_index = i_node + end do + + ! if the number of nodes is odd, we need to create a new node to hold the + ! last node + if (mod(end_layer - start_layer, 2) .eq. 0) then + i_node = this%allocate_node() + this%nodes(i_node)%aabb = this%nodes(end_layer)%aabb + this%nodes(i_node)%left_node_index = end_layer + this%nodes(i_node)%right_node_index = AABB_NULL_NODE + + this%nodes(end_layer)%parent_node_index = i_node + end if + + ! move to the next layer + start_layer = end_layer + 1 + end_layer = this%allocated_node_count + + ! If there is only one node left, we are done + done = start_layer .eq. end_layer + end do + + ! The last node allocated is the root node + this%root_node_index = this%allocated_node_count + + if (this%get_size() .ne. size(objects)) then + print *, "this%get_size() = ", this%get_size() + print *, "size(objects) = ", size(objects) + call neko_error("Invalid tree size") + end if + + end subroutine aabb_tree_build_tree + + function sort(array) result(indices) + type(aabb_t), dimension(:), intent(in) :: array + integer, dimension(:), allocatable :: indices + logical, dimension(:), allocatable :: visited + + integer :: i, imin + integer :: minidx + + allocate(indices(size(array))) + allocate(visited(size(array))) + + visited = .false. + indices = 0 + do i = 1, size(array) + minidx = -1 + do imin = 1, size(array) + if (.not. visited(imin) .and. minidx .eq. -1) minidx = imin + + if (visited(imin) .and. array(imin) .lt. array(minidx)) minidx = imin + end do + + indices(i) = minidx + visited(minidx) = .true. + end do + + end function sort + + ! -------------------------------------------------------------------------- ! + ! Getters + + !> @brief Returns the size of the tree, in number of leaves. + function aabb_tree_get_size(this) result(size) + use stack, only: stack_i4_t + use utils, only: neko_error + class(aabb_tree_t), intent(in) :: this + integer :: size + + type(stack_i4_t) :: simple_stack + integer :: idx, tmp + + call simple_stack%init(this%allocated_node_count) + size = 0 + tmp = this%get_root_index() + if (tmp .ne. AABB_NULL_NODE) then + call simple_stack%push(tmp) + end if + + do while (.not. simple_stack%is_empty()) + idx = simple_stack%pop() + if (idx .eq. AABB_NULL_NODE) cycle + + if (this%nodes(idx)%is_leaf()) then + size = size + 1 + else + tmp = this%get_left_index(idx) + call simple_stack%push(tmp) + tmp = this%get_right_index(idx) + call simple_stack%push(tmp) + end if + end do + + end function aabb_tree_get_size + + ! -------------------------------------------------------------------------- ! + ! Get index of nodes + + !> @brief Returns the index of the root node. + pure function aabb_tree_get_root_index(this) result(root_index) + class(aabb_tree_t), intent(in) :: this + integer :: root_index + + root_index = this%root_node_index + end function aabb_tree_get_root_index + + !> @brief Returns the index of the parent node of the node at the given index. + pure function aabb_tree_get_parent_index(this, node_index) result(parent_index) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + integer :: parent_index + + parent_index = this%nodes(node_index)%parent_node_index + end function aabb_tree_get_parent_index + + !> @brief Returns the index of the left node of the node at the given index. + pure function aabb_tree_get_left_index(this, node_index) result(left_index) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + integer :: left_index + + left_index = this%nodes(node_index)%left_node_index + end function aabb_tree_get_left_index + + !> @brief Returns the index of the right node of the node at the given index. + pure function aabb_tree_get_right_index(this, node_index) result(right_index) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + integer :: right_index + + right_index = this%nodes(node_index)%right_node_index + end function aabb_tree_get_right_index + + ! -------------------------------------------------------------------------- ! + ! Get nodes + + !> @brief Returns the node at the given index. + pure function aabb_tree_get_node(this, node_index) result(node) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + type(aabb_node_t) :: node + + node = this%nodes(node_index) + end function aabb_tree_get_node + + !> @brief Returns the root node of the tree. + pure function aabb_tree_get_root_node(this) result(root_node) + class(aabb_tree_t), intent(in) :: this + type(aabb_node_t) :: root_node + + root_node = this%nodes(this%root_node_index) + end function aabb_tree_get_root_node + + !> @brief Returns the parent node of the node at the given index. + pure function aabb_tree_get_parent_node(this, node_index) result(parent_node) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + type(aabb_node_t) :: parent_node + + parent_node = this%nodes(this%nodes(node_index)%parent_node_index) + end function aabb_tree_get_parent_node + + !> @brief Returns the left node of the node at the given index. + pure function aabb_tree_get_left_node(this, node_index) result(left_node) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + type(aabb_node_t) :: left_node + + left_node = this%nodes(this%nodes(node_index)%left_node_index) + end function aabb_tree_get_left_node + + !> @brief Returns the right node of the node at the given index. + pure function aabb_tree_get_right_node(this, node_index) result(right_node) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + type(aabb_node_t) :: right_node + + right_node = this%nodes(this%nodes(node_index)%right_node_index) + end function aabb_tree_get_right_node + + pure function aabb_tree_get_aabb(this, node_index) result(out_box) + class(aabb_tree_t), intent(in) :: this + integer, intent(in) :: node_index + type(aabb_t) :: out_box + + out_box = this%nodes(node_index)%aabb + end function aabb_tree_get_aabb + + ! -------------------------------------------------------------------------- ! + + !> @brief Inserts an object into the tree. + subroutine aabb_tree_insert_object(this, object, object_index) + class(aabb_tree_t), intent(inout) :: this + class(*), intent(in) :: object + integer, intent(in) :: object_index + + integer :: node_index + + node_index = this%allocate_node() + this%nodes(node_index)%aabb = get_aabb(object) + this%nodes(node_index)%object_index = object_index + + call this%insert_leaf(node_index) + end subroutine aabb_tree_insert_object + + !> @brief Queries the tree for overlapping objects. + subroutine aabb_tree_query_overlaps(this, object, object_index, overlaps) + use stack, only: stack_i4_t + implicit none + + class(aabb_tree_t), intent(in) :: this + class(*), intent(in) :: object + integer, intent(in) :: object_index + integer, intent(out) :: overlaps(:) + + type(stack_i4_t) :: simple_stack + type(aabb_t) :: object_box + + integer :: root_index, left_index, right_index + + integer :: node_index + + object_box = get_aabb(object) + root_index = this%get_root_index() + + call simple_stack%push(root_index) + + do while (.not. simple_stack%is_empty()) + node_index = simple_stack%pop() + + if (node_index == AABB_NULL_NODE) cycle + + if (this%nodes(node_index)%aabb%overlaps(object_box)) then + if (this%nodes(node_index)%is_leaf()) then + if (.not. this%nodes(node_index)%object_index == object_index) then + overlaps = [this%nodes(node_index)%object_index, overlaps] + end if + else + left_index = this%get_left_index(node_index) + call simple_stack%push(left_index) + right_index = this%get_right_index(node_index) + call simple_stack%push(right_index) + end if + end if + end do + end subroutine aabb_tree_query_overlaps + + ! -------------------------------------------------------------------------- ! + ! Internal methods + + !> @brief Allocates a new node in the tree. + function aabb_tree_allocate_node(this) result(node_index) + class(aabb_tree_t), intent(inout) :: this + integer :: node_index + + if (this%next_free_node_index == AABB_NULL_NODE) then + call this%resize_node_pool(this%node_capacity + this%growth_size) + end if + + node_index = this%next_free_node_index + + associate(new_node => this%nodes(node_index)) + this%next_free_node_index = new_node%next_node_index + + new_node%parent_node_index = AABB_NULL_NODE + new_node%left_node_index = AABB_NULL_NODE + new_node%right_node_index = AABB_NULL_NODE + + this%next_free_node_index = new_node%next_node_index + this%allocated_node_count = this%allocated_node_count + 1 + + end associate + end function aabb_tree_allocate_node + + !> @brief Deallocates a node in the tree. + subroutine aabb_tree_deallocate_node(this, node_index) + class(aabb_tree_t), intent(inout) :: this + integer, intent(in) :: node_index + + this%nodes(node_index)%next_node_index = this%next_free_node_index + this%next_free_node_index = node_index + this%allocated_node_count = this%allocated_node_count - 1 + end subroutine aabb_tree_deallocate_node + + !> @brief Inserts a leaf into the tree. + subroutine aabb_tree_insert_leaf(this, leaf_node_index) + class(aabb_tree_t), intent(inout) :: this + integer, intent(in) :: leaf_node_index + + integer :: tree_node_index + + real(kind=rp) :: cost_left + real(kind=rp) :: cost_right + + type(aabb_node_t) :: leaf_node + type(aabb_node_t) :: tree_node + type(aabb_node_t) :: left_node + type(aabb_node_t) :: right_node + + type(aabb_t) :: combined_aabb + real(kind=rp) :: new_parent_node_cost + real(kind=rp) :: minimum_push_down_cost + type(aabb_t) :: new_left_aabb + type(aabb_t) :: new_right_aabb + integer :: leaf_sibling_index + type(aabb_node_t) :: leaf_sibling + integer :: old_parent_index + integer :: new_parent_index + type(aabb_node_t) :: new_parent + type(aabb_node_t) :: old_parent + + ! make sure were inserting a new leaf + leaf_node = this%nodes(leaf_node_index) + + ! if the tree is empty then we make the root the leaf + if (this%root_node_index .eq. AABB_NULL_NODE) then + this%root_node_index = leaf_node_index + leaf_node%parent_node_index = AABB_NULL_NODE + leaf_node%left_node_index = AABB_NULL_NODE + leaf_node%right_node_index = AABB_NULL_NODE + + return + end if + + ! search for the best place to put the new leaf in the tree + ! we use surface area and depth as search heuristics + tree_node_index = this%root_node_index + tree_node = this%get_node(tree_node_index) + do while (.not. tree_node%is_leaf()) + + ! because of the test in the while loop above we know we are never a + ! leaf inside it + left_node = this%get_left_node(tree_node_index) + right_node = this%get_right_node(tree_node_index) + + ! ------------------------------------------------------------------- ! + + combined_aabb = merge(tree_node%aabb, leaf_node%get_aabb()) + + new_parent_node_cost = 2.0_rp * combined_aabb%get_surface_area() + minimum_push_down_cost = 2.0_rp * ( & + & combined_aabb%get_surface_area() & + & - tree_node%aabb%get_surface_area()& + & ) + + ! use the costs to figure out whether to create a new parent here or + ! descend + if (left_node%is_leaf()) then + new_left_aabb = merge(leaf_node%aabb, left_node%get_aabb()) + cost_left = new_left_aabb%get_surface_area() + minimum_push_down_cost + else + new_left_aabb = merge(leaf_node%aabb, left_node%get_aabb()) + cost_left = ( & + & new_left_aabb%get_surface_area() & + & - left_node%aabb%get_surface_area()& + & ) + minimum_push_down_cost + end if + + if (right_node%is_leaf()) then + new_right_aabb = merge(leaf_node%aabb, right_node%aabb) + cost_right = new_right_aabb%get_surface_area() + minimum_push_down_cost + else + new_right_aabb = merge(leaf_node%aabb, right_node%aabb) + cost_right = ( & + & new_right_aabb%get_surface_area() & + & - right_node%aabb%get_surface_area() & + & ) + minimum_push_down_cost + end if + + ! if the cost of creating a new parent node here is less than descending + ! in either direction then we know we need to create a new parent node, + ! errrr, here and attach the leaf to that + if (new_parent_node_cost < cost_left .and. new_parent_node_cost < cost_right) then + exit + end if + + ! otherwise descend in the cheapest direction + if (cost_left .lt. cost_right) then + tree_node_index = tree_node%get_left_index() + else + tree_node_index = tree_node%get_right_index() + end if + + ! ------------------------------------------------------------------- ! + ! Update the node and continue the loop + tree_node = this%get_node(tree_node_index) + end do + + ! the leafs sibling is going to be the node we found above and we are + ! going to create a new parent node and attach the leaf and this item + leaf_sibling_index = tree_node_index + leaf_sibling = this%nodes(leaf_sibling_index) + old_parent_index = this%get_parent_index(leaf_sibling_index) + new_parent_index = this%allocate_node() + new_parent = this%nodes(new_parent_index) + new_parent%parent_node_index = old_parent_index + new_parent%aabb = merge(leaf_node%aabb, leaf_sibling%aabb) + + if (leaf_node .lt. leaf_sibling) then + new_parent%left_node_index = leaf_node_index + new_parent%right_node_index = leaf_sibling_index + else + new_parent%left_node_index = leaf_sibling_index + new_parent%right_node_index = leaf_node_index + end if + + leaf_node%parent_node_index = new_parent_index + leaf_sibling%parent_node_index = new_parent_index + + if (old_parent_index .eq. AABB_NULL_NODE) then + ! the old parent was the root and so this is now the root + this%root_node_index = new_parent_index + else + ! the old parent was not the root and so we need to patch the left or + ! right index to point to the new node + old_parent = this%nodes(old_parent_index) + if (old_parent%left_node_index .eq. leaf_sibling_index) then + old_parent%left_node_index = new_parent_index + else + old_parent%right_node_index = new_parent_index + end if + this%nodes(old_parent_index) = old_parent + end if + + this%nodes(leaf_node_index) = leaf_node + this%nodes(leaf_sibling_index) = leaf_sibling + this%nodes(new_parent_index) = new_parent + + ! finally we need to walk back up the tree fixing heights and areas + tree_node_index = leaf_node%parent_node_index + + call this%fix_upwards_tree(tree_node_index) + + end subroutine aabb_tree_insert_leaf + + !> @brief Validates the tree. + function aabb_tree_valid_tree(this) result(valid) + use stack, only: stack_i4_t + implicit none + + class(aabb_tree_t), intent(in) :: this + logical :: valid + + type(stack_i4_t) :: simple_stack + integer :: current_index + integer :: root_index, left_index, right_index + + valid = .true. + if (this%root_node_index .eq. AABB_NULL_NODE) then + valid = .false. + end if + + root_index = this%get_root_index() + + call simple_stack%init(this%node_capacity) + call simple_stack%push(root_index) + + do while (.not. simple_stack%is_empty()) + current_index = simple_stack%pop() + if (current_index == AABB_NULL_NODE) cycle + + valid = valid .and. this%nodes(current_index)%is_valid() + + if (.not. this%nodes(current_index)%is_leaf()) then + left_index = this%get_left_index(current_index) + right_index = this%get_right_index(current_index) + + call simple_stack%push(left_index) + call simple_stack%push(right_index) + end if + end do + end function aabb_tree_valid_tree + + !> @brief Fixes the tree upwards. + !! @details This method is used to fix the tree upwards after an insertion. + !! It is used to expand the nodes of the tree to fit the new leaf node. + subroutine aabb_tree_fix_upwards_tree(this, tree_start_index) + class(aabb_tree_t), intent(inout) :: this + integer, intent(in) :: tree_start_index + + type(aabb_node_t) :: left_node + type(aabb_node_t) :: right_node + integer :: tree_node_index + + tree_node_index = tree_start_index + do while (tree_node_index .ne. AABB_NULL_NODE) + left_node = this%get_left_node(tree_node_index) + right_node = this%get_right_node(tree_node_index) + + this%nodes(tree_node_index)%aabb = merge(left_node%aabb, right_node%aabb) + + tree_node_index = this%get_parent_index(tree_node_index) + end do + end subroutine aabb_tree_fix_upwards_tree + + !> @brief Prints the tree. + subroutine aabb_tree_print(this) + use stack, only: stack_i4_t + class(aabb_tree_t), intent(inout) :: this + type(stack_i4_t) :: simple_stack + + integer :: current_index + integer :: root_index, left_index, right_index + + root_index = this%get_root_index() + call simple_stack%init(this%node_capacity) + call simple_stack%push(root_index) + + do while (.not. simple_stack%is_empty()) + current_index = simple_stack%pop() + if (current_index .eq. AABB_NULL_NODE) cycle + + left_index = this%get_left_index(current_index) + right_index = this%get_right_index(current_index) + + call simple_stack%push(this%nodes(current_index)%left_node_index) + call simple_stack%push(this%nodes(current_index)%right_node_index) + + write(*, *) "i = ", current_index + write(*, *) " Parent : ", this%get_parent_index(current_index) + write(*, *) " Children: ", this%get_left_index(current_index), this%get_right_index(current_index) + + write(*, *) " object_index = ", this%nodes(current_index)%object_index + end do + + end subroutine aabb_tree_print + + !> @brief Resizes the node pool. + subroutine aabb_tree_resize_node_pool(this, new_capacity) + class(aabb_tree_t), intent(inout) :: this + integer, intent(in) :: new_capacity + + type(aabb_node_t), dimension(:), allocatable :: temp + integer :: i + + allocate(temp(new_capacity)) + temp(:this%node_capacity) = this%nodes(:this%node_capacity) + + do i = this%allocated_node_count, new_capacity + temp(i)%next_node_index = i + 1 + end do + temp(new_capacity)%next_node_index = AABB_NULL_NODE + + call move_alloc(temp, this%nodes) + + this%node_capacity = new_capacity + this%next_free_node_index = this%allocated_node_count + 1 + + end subroutine aabb_tree_resize_node_pool + +end module aabb_tree diff --git a/src/mesh/curve.f90 b/src/mesh/curve.f90 index 6d013350468..b76f034ec23 100644 --- a/src/mesh/curve.f90 +++ b/src/mesh/curve.f90 @@ -21,7 +21,7 @@ module curve procedure, pass(z) :: add_element => curve_element_add ! procedure, pass(z) :: apply_xyz => curve_apply_xyz end type curve_t - + contains !> Initialize a curved domain @@ -36,7 +36,7 @@ subroutine curve_element_init(z, size) else call z%scratch%init() end if - + end subroutine curve_element_init !> Deallocate a domain @@ -50,24 +50,23 @@ subroutine curve_element_free(z) z%size = 0 call z%scratch%free() - + end subroutine curve_element_free !> Finalize a domain list !! @details Create a static list of (facet,el) tuples subroutine curve_element_finalize(z) class(curve_t), target, intent(inout) :: z - class(struct_curve_t), pointer :: tp(:) integer :: i - + if (.not. z%finalized) then allocate(z%curve_el(z%scratch%size())) select type (tp=>z%scratch%data) - type is (struct_curve_t) - do i = 1, z%scratch%size() - z%curve_el(i) = tp(i) - end do + type is (struct_curve_t) + do i = 1, z%scratch%size() + z%curve_el(i) = tp(i) + end do end select z%size = z%scratch%size() @@ -75,9 +74,9 @@ subroutine curve_element_finalize(z) call z%scratch%clear() z%finalized = .true. - + end if - + end subroutine curve_element_finalize !> Add a (facet, el) tuple to an unfinalized domain @@ -108,14 +107,14 @@ end subroutine curve_element_add ! el_idx = this%curve_el(i)%el_idx ! do j = 1, 12 ! if (this%curve_el(i)%curve_type(j) .eq. 1) then -! !call sphere_surface(j, this%curve_el(i)%curve_data(:,j),x(1,el_idx), y(1,el_idx), z(1, el_idx)) +! !call sphere_surface(j, this%curve_el(i)%curve_data(:,j),x(1,el_idx), y(1,el_idx), z(1, el_idx)) ! else if (this%curve_el(i)%curve_type(j) .eq. 2) then -! !call generate_surface(j, this%curve_el(i)%curve_data(:,j), x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) +! !call generate_surface(j, this%curve_el(i)%curve_data(:,j), x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) ! end if ! end do ! do j = 1, 12 ! if (this%curve_el(i)%curve_type(j) .eq. 3) then -! call arc_surface(j, this%curve_el(i)%curve_data(:,j),x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) +! call arc_surface(j, this%curve_el(i)%curve_data(:,j),x(1,1,1,el_idx), y(1,1,1,el_idx), z(1,1,1, el_idx)) ! end if ! end do ! @@ -123,15 +122,15 @@ end subroutine curve_element_add ! end subroutine curve_apply_xyz -! subroutine sphere_surface(face, curve_data, x, y, z, Xh) +! subroutine sphere_surface(face, curve_data, x, y, z, Xh) !! Major overhaul. !! Martin Karp 1 March 2021 15:30:15 !! -!! 5 Aug 1988 19:29:52 +!! 5 Aug 1988 19:29:52 !! !! Program to generate spherical shell elements for NEKTON !! input. Paul F. Fischer -! +! ! NXM1 = NX-1 ! NYM1 = NY-1 ! NXY = NX*NZ @@ -146,8 +145,8 @@ end subroutine curve_element_add ! ! CALL CRN3D(XCV,XC(1,IE),YC(1,IE),ZC(1,IE),CURVE(1,IFCE,IE),IFACE) ! -! Generate edge vectors on the sphere RR=1.0, -! for (r,s) = (-1,*),(1,*),(*,-1),(*,1) +! Generate edge vectors on the sphere RR=1.0, +! for (r,s) = (-1,*),(1,*),(*,-1),(*,1) ! ! CALL EDG3D(XYSRF,XCV(1,1,1),XCV(1,1,2), 1, 1, 1,NY,NX,NY) ! CALL EDG3D(XYSRF,XCV(1,2,1),XCV(1,2,2),NX,NX, 1,NY,NX,NY) diff --git a/src/mesh/element.f90 b/src/mesh/element.f90 index a353b9ea3c4..165ff7413d1 100644 --- a/src/mesh/element.f90 +++ b/src/mesh/element.f90 @@ -36,7 +36,7 @@ module element use tuple, only : tuple_t use point, only : point_ptr, point_t implicit none - private + private !> Base type for an element !! @details An element is a collection of @a npts_ points forming an @@ -44,13 +44,13 @@ module element type, public, extends(entity_t), abstract :: element_t integer, private :: gdim_ !< Geometric dimension integer, private :: npts_ !< number of points - type(point_ptr), allocatable :: pts(:) !< Points of an element + type(point_ptr), allocatable :: pts(:) !< Points of an element contains procedure, pass(this) :: element => element_init procedure, pass(this) :: free => element_free procedure, pass(this) :: gdim => element_gdim procedure, pass(this) :: npts => element_npts - procedure, pass(this) :: p => element_point + procedure, pass(this) :: p => element_point procedure, pass(this) :: n_points => element_npts procedure, pass(this), non_overridable :: element_point procedure(element_equal), pass(this), deferred :: equal @@ -88,7 +88,7 @@ end function element_equal end interface abstract interface - subroutine element_facet_id(this, t, side) + subroutine element_facet_id(this, t, side) import :: element_t import :: tuple_t class(element_t), intent(in) :: this @@ -98,7 +98,7 @@ end subroutine element_facet_id end interface abstract interface - subroutine element_facet_order(this, t, side) + subroutine element_facet_order(this, t, side) import :: element_t import :: tuple_t class(element_t), intent(in) :: this @@ -118,7 +118,7 @@ subroutine element_init(this, id, gdim, npts) call this%free() call this%set_id(id) - + this%gdim_ = gdim this%npts_ = npts @@ -133,9 +133,9 @@ subroutine element_free(this) if (allocated(this%pts)) then deallocate(this%pts) end if - + end subroutine element_free - + !> Get the geometric dimension of an element pure function element_gdim(this) result(gdim) class(element_t), intent(in) :: this diff --git a/src/mesh/entity.f90 b/src/mesh/entity.f90 index 8d003497b59..c9340a77161 100644 --- a/src/mesh/entity.f90 +++ b/src/mesh/entity.f90 @@ -33,13 +33,13 @@ module entity implicit none private - + !> Base type for an entity - type, public, abstract :: entity_t + type, public, abstract :: entity_t integer, private :: id_ = -1 !< Entity index contains procedure, pass(this) :: id => entity_id - procedure, pass(this) :: set_id => entity_set_id + procedure, pass(this) :: set_id => entity_set_id end type entity_t contains diff --git a/src/mesh/facet_zone.f90 b/src/mesh/facet_zone.f90 index f7812f53294..9c6c44b5777 100644 --- a/src/mesh/facet_zone.f90 +++ b/src/mesh/facet_zone.f90 @@ -37,7 +37,7 @@ module facet_zone use utils, only : neko_error implicit none private - + type, public :: facet_zone_t type(tuple_i4_t), allocatable :: facet_el(:) integer :: size @@ -63,7 +63,7 @@ module facet_zone procedure, pass(z) :: finalize => facet_zone_periodic_finalize procedure, pass(z) :: add_periodic_facet => facet_zone_periodic_add_facet end type facet_zone_periodic_t - + contains !> Initialize a facet zone @@ -78,7 +78,7 @@ subroutine facet_zone_init(z, size) else call z%scratch%init() end if - + end subroutine facet_zone_init !> Deallocate a facet zone @@ -92,7 +92,7 @@ subroutine facet_zone_free(z) z%size = 0 call z%scratch%free() - + end subroutine facet_zone_free !> Finalize a zone list @@ -101,11 +101,11 @@ subroutine facet_zone_finalize(z) class(facet_zone_t), intent(inout) :: z type(tuple_i4_t), pointer :: tp(:) integer :: i - + if (.not. z%finalized) then allocate(z%facet_el(z%scratch%size())) - + tp => z%scratch%array() do i = 1, z%scratch%size() z%facet_el(i) = tp(i) @@ -116,9 +116,9 @@ subroutine facet_zone_finalize(z) call z%scratch%clear() z%finalized = .true. - + end if - + end subroutine facet_zone_finalize !> Add a (facet, el) tuple to an unfinalized zone @@ -134,10 +134,10 @@ subroutine facet_zone_add_facet(z, facet, el) t%x = (/ facet, el /) call z%scratch%push(t) - + end subroutine facet_zone_add_facet - !> Initialize a periodic zone + !> Initialize a periodic zone subroutine facet_zone_periodic_init(z, size) class(facet_zone_periodic_t), intent(inout) :: z integer, optional :: size @@ -155,7 +155,7 @@ subroutine facet_zone_periodic_init(z, size) call z%p_id_scratch%init() call z%org_id_scratch%init() end if - + end subroutine facet_zone_periodic_init !> Deallocate a zone @@ -178,7 +178,7 @@ subroutine facet_zone_periodic_free(z) call z%p_scratch%free() call z%p_id_scratch%free() call z%org_id_scratch%free() - + end subroutine facet_zone_periodic_free !> Finalize a periodic zone list @@ -189,7 +189,7 @@ subroutine facet_zone_periodic_finalize(z) type(tuple4_i4_t), pointer :: tp2(:) type(tuple4_i4_t), pointer :: tp3(:) integer :: i - + if (.not. z%finalized) then call facet_zone_finalize(z) @@ -201,7 +201,7 @@ subroutine facet_zone_periodic_finalize(z) allocate(z%p_facet_el(z%size)) allocate(z%p_ids(z%size)) allocate(z%org_ids(z%size)) - + tp => z%p_scratch%array() do i = 1, z%size z%p_facet_el(i) = tp(i) @@ -220,7 +220,7 @@ subroutine facet_zone_periodic_finalize(z) call z%org_id_scratch%clear() end if - + end subroutine facet_zone_periodic_finalize !> Add a (facet, el) tuple to an unfinalized zone @@ -248,7 +248,7 @@ subroutine facet_zone_periodic_add_facet(z, facet, el, p_facet, p_el, pids, org_ call z%p_id_scratch%push(t2) t3%x = org_ids call z%org_id_scratch%push(t3) - + end subroutine facet_zone_periodic_add_facet end module facet_zone diff --git a/src/mesh/hex.f90 b/src/mesh/hex.f90 index b849c0863ac..a71d6ddd519 100644 --- a/src/mesh/hex.f90 +++ b/src/mesh/hex.f90 @@ -51,12 +51,12 @@ module hex !! @verbatim !! Node numbering (NEKTON symmetric notation) !! - !! 3+-----+4 ^ s - !! / /| | - !! / / | | - !! 7+-----+8 +2 +----> r - !! | | / / - !! | |/ / + !! 3+-----+4 ^ s + !! / /| | + !! / / | | + !! 7+-----+8 +2 +----> r + !! | | / / + !! | |/ / !! 5+-----+6 t !! !! @endverbatim @@ -67,7 +67,7 @@ module hex procedure, pass(this) :: facet_order => hex_facet_order procedure, pass(this) :: diameter => hex_diameter procedure, pass(this) :: centroid => hex_centroid - procedure, pass(this) :: edge_id => hex_edge_id + procedure, pass(this) :: edge_id => hex_edge_id procedure, pass(this) :: equal => hex_equal generic :: operator(.eq.) => equal end type hex_t @@ -76,7 +76,7 @@ module hex !! @details !! @verbatim !! Face numbering (NEKTON symmetric notation) - !! + !! !! +--------+ ^ S !! / /| | !! / 4 / | | @@ -97,13 +97,13 @@ module hex 1,2,4,3,& 5,6,8,7/),& (/4,6/)) - + !> Edge node ids !! @details !! @verbatim !! Edge numbering (similar to NEKTON symmetric notation) !! - !! 2 + !! 2 !! +--------+ ^ S !! / /| | !! 11--> / 12-->/ | <--6 | @@ -129,9 +129,9 @@ module hex 3,7,& 4,8/),& (/2,12/)) - + contains - + !> Create a hexahedron element based upon eight points subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8) class(hex_t), intent(inout) :: this @@ -139,7 +139,7 @@ subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8) type(point_t), target, intent(in) :: p1, p2, p3, p4, p5, p6, p7, p8 call this%element(id, NEKO_HEX_GDIM, NEKO_HEX_NPTS) - + this%pts(1)%p => p1 this%pts(2)%p => p2 this%pts(3)%p => p3 @@ -152,7 +152,7 @@ subroutine hex_init(this, id, p1, p2, p3, p4, p5, p6, p7, p8) end subroutine hex_init !> Return the facet id for face @a i as a 4-tuple @a t - subroutine hex_facet_id(this, t, side) + subroutine hex_facet_id(this, t, side) class(hex_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -167,7 +167,7 @@ subroutine hex_facet_id(this, t, side) select type(t) type is(tuple4_i4_t) t%x = (/ p1%id(), p2%id(), p3%id(), p4%id() /) - do i = 1, 3 + do i = 1, 3 do j = i+1,4 if(t%x(j) .lt. t%x(i)) then temp = t%x(i) @@ -181,7 +181,7 @@ subroutine hex_facet_id(this, t, side) end subroutine hex_facet_id !> Return the ordered points for face @a i as a 4-tuple @a t - subroutine hex_facet_order(this, t, side) + subroutine hex_facet_order(this, t, side) class(hex_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -201,7 +201,7 @@ end subroutine hex_facet_order !> Return the edge id for an edge @a i as a 2-tuple @a t - subroutine hex_edge_id(this, t, side) + subroutine hex_edge_id(this, t, side) class(hex_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -214,14 +214,14 @@ subroutine hex_edge_id(this, t, side) type is(tuple_i4_t) if (p1%id() .lt. p2%id()) then t%x = (/ p1%id(), p2%id() /) - else + else t%x = (/ p2%id(), p1%id() /) - endif + endif end select end subroutine hex_edge_id - + !> Compute the diameter of a hexahedron element function hex_diameter(this) result(res) class(hex_t), intent(in) :: this @@ -275,7 +275,7 @@ function hex_centroid(this) result(res) res%x(i) = 0.125 * (p1%x(i) + p2%x(i) + p3%x(i) + p4%x(i) + & p5%x(i) + p6%x(i) + p7%x(i) + p8%x(i)) end do - + end function hex_centroid !> Check if two hex elements are equal @@ -301,5 +301,5 @@ pure function hex_equal(this, other) result(res) end select end function hex_equal - + end module hex diff --git a/src/mesh/mesh.f90 b/src/mesh/mesh.f90 index 15ade4b52fc..4f60b823fb2 100644 --- a/src/mesh/mesh.f90 +++ b/src/mesh/mesh.f90 @@ -43,15 +43,18 @@ module mesh use htable use datadist use distdata - use comm + use comm use facet_zone, only : facet_zone_t, facet_zone_periodic_t use math use uset, only : uset_i8_t use curve, only : curve_t implicit none private - - integer, public, parameter :: NEKO_MSH_MAX_ZLBLS = 20 !< Max num. zone labels + + !> Max num. zone labels + integer, public, parameter :: NEKO_MSH_MAX_ZLBLS = 20 + !> Max length of a zone label + integer, public, parameter :: NEKO_MSH_MAX_ZLBL_LEN = 40 type, private :: mesh_element_t class(element_t), allocatable :: e @@ -69,14 +72,14 @@ module mesh integer :: glb_mpts !< Global number of unique points integer :: glb_mfcs !< Global number of unique faces integer :: glb_meds !< Global number of unique edges - + integer :: offset_el !< Element offset integer :: max_pts_id !< Max local point id - + type(point_t), allocatable :: points(:) !< list of points type(mesh_element_t), allocatable :: elements(:) !< List of elements logical, allocatable :: dfrmd_el(:) !< List of elements - + type(htable_i4_t) :: htp !< Table of unique points (global->local) type(htable_i4t4_t) :: htf !< Table of unique faces (facet->local id) type(htable_i4t2_t) :: hte !< Table of unique edges (edge->local id) @@ -86,15 +89,15 @@ module mesh !> Facet to element's id tuple and the mapping of the !! points between lower id element and higher !! \f$ t=(low_id element, element with higher global id) \f$ - class(htable_t), allocatable :: facet_map + class(htable_t), allocatable :: facet_map type(stack_i4_t), allocatable :: point_neigh(:) !< Point to neigh. table type(distdata_t) :: ddata !< Mesh distributed data logical, allocatable :: neigh(:) !< Neighbouring ranks integer, allocatable :: neigh_order(:) !< Neighbour order - integer(2), allocatable :: facet_type(:,:) !< Facet type - + integer(2), allocatable :: facet_type(:,:) !< Facet type + type(facet_zone_t) :: wall !< Zone of wall facets type(facet_zone_t) :: inlet !< Zone of inlet facets type(facet_zone_t) :: outlet !< Zone of outlet facets @@ -162,7 +165,7 @@ module mesh abstract interface subroutine mesh_deform(msh, x, y, z, lx, ly, lz) - import mesh_t + import mesh_t import rp class(mesh_t) :: msh integer, intent(in) :: lx, ly, lz @@ -173,8 +176,8 @@ end subroutine mesh_deform end interface public :: mesh_deform - -contains + +contains !> Initialise a mesh @a this with @a nelv elements subroutine mesh_init_nelv(this, gdim, nelv) @@ -182,7 +185,7 @@ subroutine mesh_init_nelv(this, gdim, nelv) integer, intent(in) :: gdim !< Geometric dimension integer, intent(in) :: nelv !< Local number of elements integer :: ierr - + call this%free() this%nelv = nelv @@ -196,7 +199,7 @@ subroutine mesh_init_nelv(this, gdim, nelv) MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) call mesh_init_common(this) - + end subroutine mesh_init_nelv !> Initialise a mesh @a this based on a distribution @a dist @@ -206,14 +209,14 @@ subroutine mesh_init_dist(this, gdim, dist) type(linear_dist_t), intent(in) :: dist !< Data distribution call this%free() - + this%nelv = dist%num_local() this%glb_nelv = dist%num_global() this%offset_el = dist%start_idx() this%gdim = gdim call mesh_init_common(this) - + end subroutine mesh_init_dist subroutine mesh_init_common(this) @@ -222,7 +225,7 @@ subroutine mesh_init_common(this) type(tuple_i4_t) :: facet_data this%max_pts_id = 0 - + allocate(this%elements(this%nelv)) allocate(this%dfrmd_el(this%nelv)) if (this%gdim .eq. 3) then @@ -249,7 +252,7 @@ subroutine mesh_init_common(this) end do this%npts = NEKO_QUAD_NPTS if (this%lgenc) then - allocate(htable_i4t2_t::this%facet_map) + allocate(htable_i4t2_t::this%facet_map) select type (fmp => this%facet_map) type is(htable_i4t2_t) call fmp%init(this%nelv, facet_data) @@ -277,7 +280,7 @@ subroutine mesh_init_common(this) allocate(this%facet_type(2 * this%gdim, this%nelv)) this%facet_type = 0 - + call this%htp%init(this%npts*this%nelv, i) call this%wall%init(this%nelv) @@ -293,31 +296,29 @@ subroutine mesh_init_common(this) end do call this%curve%init(this%nelv) - + call distdata_init(this%ddata) allocate(this%neigh(0:pe_size-1)) this%neigh = .false. - + this%mpts = 0 this%mfcs = 0 this%meds = 0 end subroutine mesh_init_common - + !> Deallocate a mesh @a this subroutine mesh_free(this) class(mesh_t), intent(inout) :: this integer :: i - + call this%htp%free() call this%htf%free() call this%hte%free() call distdata_free(this%ddata) - - if (allocated(this%points)) then - deallocate(this%points) - end if + + if (allocated(this%dfrmd_el)) then deallocate(this%dfrmd_el) end if @@ -369,20 +370,27 @@ subroutine mesh_free(this) deallocate(this%neigh_order) end if + if (allocated(this%points)) then + deallocate(this%points) + end if + call this%wall%free() call this%inlet%free() call this%outlet%free() call this%outlet_normal%free() call this%sympln%free() call this%periodic%free() - + this%lconn = .false. + this%lnumr = .false. + this%ldist = .false. + this%lgenc = .true. + end subroutine mesh_free subroutine mesh_finalize(this) class(mesh_t), target, intent(inout) :: this integer :: i - - + call mesh_generate_flags(this) call mesh_generate_conn(this) @@ -436,7 +444,7 @@ subroutine mesh_all_deformed(this) class(mesh_t), intent(inout) :: this this%dfrmd_el = .true. end subroutine mesh_all_deformed - + !> Generate element-to-element connectivity subroutine mesh_generate_conn(this) class(mesh_t), target, intent(inout) :: this @@ -444,12 +452,56 @@ subroutine mesh_generate_conn(this) type(tuple4_i4_t) :: face, face_comp type(tuple_i4_t) :: facet_data type(stack_i4_t) :: neigh_order - + class(element_t), pointer :: ep + type(tuple_i4_t) :: e + type(tuple4_i4_t) :: f + integer :: p_local_idx, res + integer :: el, id integer :: i, j, k, ierr, el_glb_idx, n_sides, n_nodes, src, dst if (this%lconn) return if (.not. this%lgenc) return + + !If we generate connectivity, we do that here. + do el = 1, this%nelv + ep => this%elements(el)%e + select type(ep) + type is (hex_t) + do i = 1, NEKO_HEX_NPTS + !Only for getting the id + call this%add_point(ep%pts(i)%p, id) + p_local_idx = this%get_local(this%points(id)) + !should stack have inout on what we push? would be neat with in + id = ep%id() + call this%point_neigh(p_local_idx)%push(id) + end do + do i = 1, NEKO_HEX_NFCS + call ep%facet_id(f, i) + call this%add_face(f) + end do + + do i = 1, NEKO_HEX_NEDS + call ep%edge_id(e, i) + call this%add_edge(e) + end do + type is (quad_t) + do i = 1, NEKO_QUAD_NPTS + !Only for getting the id + call this%add_point(ep%pts(i)%p, id) + p_local_idx = this%get_local(this%points(id)) + !should stack have inout on what we push? would be neat with in + id = ep%id() + call this%point_neigh(p_local_idx)%push(id) + end do + + do i = 1, NEKO_QUAD_NEDS + call ep%facet_id(e, i) + call this%add_edge(e) + end do + end select + end do + if (this%gdim .eq. 2) then n_sides = 4 @@ -466,90 +518,90 @@ subroutine mesh_generate_conn(this) ! ! Find all (local) boundaries ! - + !> @note We have to sweep through the facet map twice to make sure !! that both odd and even sides are marked !! @todo These loop nests needs a lot of love... select type (fmp => this%facet_map) type is(htable_i4t2_t) - do k = 1, 2 + do k = 1, 2 do i = 1, this%nelv el_glb_idx = i + this%offset_el do j = 1, n_sides call this%elements(i)%e%facet_id(edge, j) - + ! Assume that all facets are on the exterior facet_data%x = (/ 0, 0/) - + !check it this face has shown up earlier if (fmp%get(edge, facet_data) .eq. 0) then - !if element is already recognized on face - if (facet_data%x(1) .eq. el_glb_idx ) then - this%facet_neigh(j, i) = facet_data%x(2) - else if( facet_data%x(2) .eq. el_glb_idx) then - this%facet_neigh(j, i) = facet_data%x(1) - !if this is the second element, arrange so low id is first - else if(facet_data%x(1) .gt. el_glb_idx) then - facet_data%x(2) = facet_data%x(1) - facet_data%x(1) = el_glb_idx - this%facet_neigh(j, i) = facet_data%x(2) - call fmp%set(edge, facet_data) - else if(facet_data%x(1) .lt. el_glb_idx) then - facet_data%x(2) = el_glb_idx - this%facet_neigh(j, i) = facet_data%x(1) - call fmp%set(edge, facet_data) - end if + !if element is already recognized on face + if (facet_data%x(1) .eq. el_glb_idx ) then + this%facet_neigh(j, i) = facet_data%x(2) + else if( facet_data%x(2) .eq. el_glb_idx) then + this%facet_neigh(j, i) = facet_data%x(1) + !if this is the second element, arrange so low id is first + else if(facet_data%x(1) .gt. el_glb_idx) then + facet_data%x(2) = facet_data%x(1) + facet_data%x(1) = el_glb_idx + this%facet_neigh(j, i) = facet_data%x(2) + call fmp%set(edge, facet_data) + else if(facet_data%x(1) .lt. el_glb_idx) then + facet_data%x(2) = el_glb_idx + this%facet_neigh(j, i) = facet_data%x(1) + call fmp%set(edge, facet_data) + end if else facet_data%x(1) = el_glb_idx this%facet_neigh(j, i) = facet_data%x(2) - call fmp%set(edge, facet_data) + call fmp%set(edge, facet_data) end if end do end do end do type is(htable_i4t4_t) - - do k = 1, 2 + + do k = 1, 2 do i = 1, this%nelv el_glb_idx = i + this%offset_el do j = 1, n_sides call this%elements(i)%e%facet_id(face, j) - + facet_data%x = (/ 0, 0/) !check it this face has shown up earlier if (fmp%get(face, facet_data) .eq. 0) then - !if element is already recognized on face - if (facet_data%x(1) .eq. el_glb_idx ) then - this%facet_neigh(j, i) = facet_data%x(2) - call this%elements(i)%e%facet_id(face_comp, & - j+(2*mod(j,2)-1)) - if (face_comp .eq. face) then - facet_data%x(2) = el_glb_idx - this%facet_neigh(j, i) = facet_data%x(1) - call fmp%set(face, facet_data) - end if - else if( facet_data%x(2) .eq. el_glb_idx) then - this%facet_neigh(j, i) = facet_data%x(1) - !if this is the second element, arrange so low id is first - else if(facet_data%x(1) .gt. el_glb_idx) then - facet_data%x(2) = facet_data%x(1) - facet_data%x(1) = el_glb_idx - this%facet_neigh(j, i) = facet_data%x(2) - call fmp%set(face, facet_data) - else if(facet_data%x(1) .lt. el_glb_idx) then - facet_data%x(2) = el_glb_idx - this%facet_neigh(j, i) = facet_data%x(1) - call fmp%set(face, facet_data) - end if + !if element is already recognized on face + if (facet_data%x(1) .eq. el_glb_idx ) then + this%facet_neigh(j, i) = facet_data%x(2) + call this%elements(i)%e%facet_id(face_comp, & + j+(2*mod(j,2)-1)) + if (face_comp .eq. face) then + facet_data%x(2) = el_glb_idx + this%facet_neigh(j, i) = facet_data%x(1) + call fmp%set(face, facet_data) + end if + else if( facet_data%x(2) .eq. el_glb_idx) then + this%facet_neigh(j, i) = facet_data%x(1) + !if this is the second element, arrange so low id is first + else if(facet_data%x(1) .gt. el_glb_idx) then + facet_data%x(2) = facet_data%x(1) + facet_data%x(1) = el_glb_idx + this%facet_neigh(j, i) = facet_data%x(2) + call fmp%set(face, facet_data) + else if(facet_data%x(1) .lt. el_glb_idx) then + facet_data%x(2) = el_glb_idx + this%facet_neigh(j, i) = facet_data%x(1) + call fmp%set(face, facet_data) + end if else facet_data%x(1) = el_glb_idx this%facet_neigh(j, i) = 0 - call fmp%set(face, facet_data) + call fmp%set(face, facet_data) end if + end do end do end do - end do class default call neko_error('Invalid facet map') end select @@ -559,20 +611,20 @@ subroutine mesh_generate_conn(this) ! Find all external (between PEs) boundaries ! if (pe_size .gt. 1) then - + call mesh_generate_external_point_conn(this) ! ! Generate neighbour exchange order ! call neigh_order%init(pe_size) - + do i = 1, pe_size - 1 src = modulo(pe_rank - i + pe_size, pe_size) dst = modulo(pe_rank + i, pe_size) if (this%neigh(src) .or. this%neigh(dst)) then j = i ! adhere to standards... - call neigh_order%push(j) + call neigh_order%push(j) end if end do @@ -584,7 +636,7 @@ subroutine mesh_generate_conn(this) end do end select call neigh_order%free() - + call mesh_generate_external_facet_conn(this) else allocate(this%neigh_order(1)) @@ -599,14 +651,14 @@ subroutine mesh_generate_conn(this) if (this%gdim .eq. 3) then call mesh_generate_edge_conn(this) end if - + call mesh_generate_facet_numbering(this) - + this%lconn = .true. - + end subroutine mesh_generate_conn - + !> Generate element-element connectivity via facets between PEs subroutine mesh_generate_external_facet_conn(this) type(mesh_t), intent(inout) :: this @@ -630,7 +682,7 @@ subroutine mesh_generate_external_facet_conn(this) end if call buffer%init() - + ! Build send buffers containing ! [el_glb_idx, side number, facet_id (global ids of points)] do i = 1, this%nelv @@ -639,7 +691,7 @@ subroutine mesh_generate_external_facet_conn(this) facet = j ! Adhere to standards... if (this%facet_neigh(j, i) .eq. 0) then if (n_nodes .eq. 2) then - call this%elements(i)%e%facet_id(edge, j) + call this%elements(i)%e%facet_id(edge, j) call buffer%push(el_glb_idx) call buffer%push(facet) do k = 1, n_nodes @@ -662,7 +714,7 @@ subroutine mesh_generate_external_facet_conn(this) MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr) allocate(recv_buffer(max_recv)) - + do i = 1, size(this%neigh_order) src = modulo(pe_rank - this%neigh_order(i) + pe_size, pe_size) dst = modulo(pe_rank + this%neigh_order(i), pe_size) @@ -741,7 +793,7 @@ subroutine mesh_generate_external_facet_conn(this) end if end do this%facet_neigh(facet, element) = -neigh_el - facet_data%x(2) = -neigh_el + facet_data%x(2) = -neigh_el ! Update facet map call fmp%set(face, facet_data) @@ -764,10 +816,10 @@ subroutine mesh_generate_external_facet_conn(this) if (this%neigh(dst)) then call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr) end if - + end do - + deallocate(recv_buffer) call buffer%free() @@ -783,13 +835,13 @@ subroutine mesh_generate_external_point_conn(this) integer :: i, j, k integer :: max_recv, ierr, src, dst, n_recv, neigh_el integer :: pt_glb_idx, pt_loc_idx, num_neigh - integer, pointer :: neighs(:) + integer, contiguous, pointer :: neighs(:) + - call send_buffer%init(this%mpts * 2) - + ! Build send buffers containing - ! [pt_glb_idx, #neigh, neigh id_1 ....neigh_id_n] + ! [pt_glb_idx, #neigh, neigh id_1 ....neigh_id_n] do i = 1, this%mpts pt_glb_idx = this%points(i)%id() ! Adhere to standards... num_neigh = this%point_neigh(i)%size() @@ -805,7 +857,7 @@ subroutine mesh_generate_external_point_conn(this) call MPI_Allreduce(send_buffer%size(), max_recv, 1, & MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr) allocate(recv_buffer(max_recv)) - + do i = 1, pe_size - 1 src = modulo(pe_rank - i + pe_size, pe_size) dst = modulo(pe_rank + i, pe_size) @@ -830,14 +882,14 @@ subroutine mesh_generate_external_point_conn(this) call distdata_set_shared_point(this%ddata, pt_loc_idx) end do end if - j = j + (2 + num_neigh) + j = j + (2 + num_neigh) end do - + end do deallocate(recv_buffer) call send_buffer%free() - + end subroutine mesh_generate_external_point_conn !> Generate element-element connectivity via edges @@ -852,7 +904,7 @@ subroutine mesh_generate_edge_conn(this) type(htable_i8_t) :: glb_to_loc type(MPI_Status) :: status type(MPI_Request) :: send_req, recv_req - integer, pointer :: p1(:), p2(:), ns_id(:) + integer, contiguous, pointer :: p1(:), p2(:), ns_id(:) integer :: i, j, id, ierr, num_edge_glb, edge_offset, num_edge_loc integer :: k, l , shared_offset, glb_nshared, n_glb_id integer(kind=i8) :: C, glb_max, glb_id @@ -874,7 +926,7 @@ subroutine mesh_generate_edge_conn(this) ! ! Determine/ constants used to generate unique global edge numbers - ! for shared edges + ! for shared edges ! C = int(this%glb_nelv, i8) * int(NEKO_HEX_NEDS, i8) @@ -887,7 +939,7 @@ subroutine mesh_generate_edge_conn(this) call non_shared_edges%init(this%hte%num_entries()) call it%init(this%hte) - do while(it%next()) + do while(it%next()) edge => it%key() call it%data(id) @@ -897,8 +949,8 @@ subroutine mesh_generate_edge_conn(this) p2 => this%point_neigh(l)%array() shared_edge = .false. - - ! Find edge neighbor from point neighbors + + ! Find edge neighbor from point neighbors do i = 1, this%point_neigh(k)%size() do j = 1, this%point_neigh(l)%size() if ((p1(i) .eq. p2(j)) .and. & @@ -911,7 +963,7 @@ subroutine mesh_generate_edge_conn(this) ! Generate a unique id for the shared edge as, ! ((e1 * C) + e2 )) + glb_max if e1 > e2 - ! ((e2 * C) + e1 )) + glb_max if e2 > e1 + ! ((e2 * C) + e1 )) + glb_max if e2 > e1 if (shared_edge) then glb_id = ((int(edge%x(1), i8)) + int(edge%x(2), i8)*C) + glb_max call glb_to_loc%set(glb_id, id) @@ -934,14 +986,14 @@ subroutine mesh_generate_edge_conn(this) ns_id => non_shared_edges%array() do i = 1, non_shared_edges%size() call distdata_set_local_to_global_edge(this%ddata, ns_id(i), edge_offset) - edge_offset = edge_offset + 1 + edge_offset = edge_offset + 1 end do nullify(ns_id) - + ! ! Renumber shared edges into integer range ! - + call MPI_Allreduce(send_buff%size(), max_recv, 1, & MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr) @@ -972,7 +1024,7 @@ subroutine mesh_generate_edge_conn(this) if (this%neigh(src)) then call MPI_Wait(recv_req, status, ierr) call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr) - + do j = 1, n_recv if ((edge_idx%element(recv_buff(j))) .and. (src .lt. pe_rank)) then call ghost%add(recv_buff(j)) @@ -986,7 +1038,7 @@ subroutine mesh_generate_edge_conn(this) end if end do - + ! Determine start offset for global numbering of shared edges glb_nshared = num_edge_loc call MPI_Allreduce(MPI_IN_PLACE, glb_nshared, 1, & @@ -996,7 +1048,7 @@ subroutine mesh_generate_edge_conn(this) call MPI_Exscan(owner%size(), shared_offset, 1, & MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) shared_offset = shared_offset + glb_nshared + 1 - + ! Renumber locally owned set of shared edges call send_buff%clear() call owner%iter_init() @@ -1016,12 +1068,12 @@ subroutine mesh_generate_edge_conn(this) end if end do nullify(glb_ptr) - + ! Determine total number of unique edges in the mesh ! (This can probably be done in a clever way...) - this%glb_meds = shared_offset -1 + this%glb_meds = shared_offset -1 call MPI_Allreduce(MPI_IN_PLACE, this%glb_meds, 1, & - MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR) + MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR) ! ! Update ghosted edges with new global id @@ -1053,11 +1105,11 @@ subroutine mesh_generate_edge_conn(this) dst, 0, NEKO_COMM, send_req, ierr) end select end if - + if (this%neigh(src)) then call MPI_Wait(recv_req, status, ierr) call MPI_Get_count(status, MPI_INTEGER8, n_recv, ierr) - + do j = 1, n_recv, 2 if (ghost%element(recv_buff(j))) then if (glb_to_loc%get(recv_buff(j), id) .eq. 0) then @@ -1104,7 +1156,7 @@ subroutine mesh_generate_facet_numbering(this) type(MPI_Status) :: status type(MPI_Request) :: send_req, recv_req integer, allocatable :: recv_buff(:) - integer :: non_shared_facets, shared_facets, facet_offset + integer :: non_shared_facets, shared_facets, facet_offset integer :: id, glb_nshared, shared_offset, owned_facets integer :: i, j, ierr, max_recv, src, dst, n_recv @@ -1114,29 +1166,29 @@ subroutine mesh_generate_facet_numbering(this) if (this%gdim .eq. 2) then allocate(this%ddata%local_to_global_facet(this%meds)) call edge_owner%init(this%meds) - call edge_ghost%init(64, i) + call edge_ghost%init(64, i) non_shared_facets = this%hte%num_entries() - shared_facets else allocate(this%ddata%local_to_global_facet(this%mfcs)) call face_owner%init(this%mfcs) - call face_ghost%init(64, i) + call face_ghost%init(64, i) non_shared_facets = this%htf%num_entries() - shared_facets end if - + !> @todo Move this into distdata as a method... - + facet_offset = 0 call MPI_Exscan(non_shared_facets, facet_offset, 1, & MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) facet_offset = facet_offset + 1 - + ! Determine ownership of shared facets if (this%gdim .eq. 2) then call edge_it%init(this%hte) do while (edge_it%next()) call edge_it%data(id) edge => edge_it%key() - if (.not. this%ddata%shared_facet%element(id)) then + if (.not. this%ddata%shared_facet%element(id)) then call distdata_set_local_to_global_facet(this%ddata, & id, facet_offset) facet_offset = facet_offset + 1 @@ -1163,7 +1215,7 @@ subroutine mesh_generate_facet_numbering(this) do while (face_it%next()) call face_it%data(id) face => face_it%key() - if (.not. this%ddata%shared_facet%element(id)) then + if (.not. this%ddata%shared_facet%element(id)) then call distdata_set_local_to_global_facet(this%ddata, & id, facet_offset) facet_offset = facet_offset + 1 @@ -1186,50 +1238,50 @@ subroutine mesh_generate_facet_numbering(this) end do owned_facets = face_owner%size() end if - + ! Determine start offset for global numbering of shared facets glb_nshared = non_shared_facets call MPI_Allreduce(MPI_IN_PLACE, glb_nshared, 1, & MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) - + shared_offset = 0 call MPI_Exscan(owned_facets, shared_offset, 1, & MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) shared_offset = shared_offset + glb_nshared + 1 if (this%gdim .eq. 2) then - + if (owned_facets .gt. 32) then call send_buff%init(owned_facets) else call send_buff%init() end if - + ed => edge_owner%array() do i = 1, edge_owner%size() if (this%hte%get(ed(i), id) .eq. 0) then call distdata_set_local_to_global_facet(this%ddata, id, & shared_offset) - + ! Add new number to send buffer ! [edge id1 ... edge idn new_glb_id] do j = 1, 2 call send_buff%push(ed(i)%x(j)) end do call send_buff%push(shared_offset) - + shared_offset = shared_offset + 1 end if end do - + else - + if (owned_facets .gt. 32) then call send_buff%init(owned_facets) else call send_buff%init() end if - + fd => face_owner%array() do i = 1, face_owner%size() if (this%htf%get(fd(i), id) .eq. 0) then @@ -1242,28 +1294,28 @@ subroutine mesh_generate_facet_numbering(this) call send_buff%push(fd(i)%x(j)) end do call send_buff%push(shared_offset) - + shared_offset = shared_offset + 1 end if end do nullify(fd) - + end if ! Determine total number of unique facets in the mesh ! (This can probably be done in a clever way...) this%glb_mfcs = shared_offset - 1 call MPI_Allreduce(MPI_IN_PLACE, this%glb_mfcs, 1, & - MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR) + MPI_INTEGER, MPI_MAX, NEKO_COMM, IERR) ! ! Update ghosted facets with new global id ! - + call MPI_Allreduce(send_buff%size(), max_recv, 1, & MPI_INTEGER, MPI_MAX, NEKO_COMM, ierr) - allocate(recv_buff(max_recv)) + allocate(recv_buff(max_recv)) !> @todo Since we now the neigh. we can actually do p2p here... do i = 1, size(this%neigh_order) @@ -1279,28 +1331,28 @@ subroutine mesh_generate_facet_numbering(this) call MPI_Isend(send_buff%array(), send_buff%size(), MPI_INTEGER, & dst, 0, NEKO_COMM, send_req, ierr) end if - + if (this%neigh(src)) then call MPI_Wait(recv_req, status, ierr) call MPI_Get_count(status, MPI_INTEGER, n_recv, ierr) - + if (this%gdim .eq. 2) then do j = 1, n_recv, 3 - + recv_edge = (/recv_buff(j), recv_buff(j+1)/) - + ! Check if the PE has the shared edge if (edge_ghost%get(recv_edge, id) .eq. 0) then call distdata_set_local_to_global_facet(this%ddata, & id, recv_buff(j+2)) end if end do - else + else do j = 1, n_recv, 5 - + recv_face = (/recv_buff(j), recv_buff(j+1), & recv_buff(j+2), recv_buff(j+3) /) - + ! Check if the PE has the shared face if (face_ghost%get(recv_face, id) .eq. 0) then call distdata_set_local_to_global_facet(this%ddata, & @@ -1313,7 +1365,7 @@ subroutine mesh_generate_facet_numbering(this) if (this%neigh(dst)) then call MPI_Wait(send_req, MPI_STATUS_IGNORE, ierr) end if - + end do if (this%gdim .eq. 2) then @@ -1323,28 +1375,28 @@ subroutine mesh_generate_facet_numbering(this) call face_owner%free() call face_ghost%free() end if - + call send_buff%free() deallocate(recv_buff) - + end subroutine mesh_generate_facet_numbering - - + + !> Add a quadrilateral element to the mesh @a this subroutine mesh_add_quad(this, el, p1, p2, p3, p4) class(mesh_t), target, intent(inout) :: this integer, value :: el - type(point_t), intent(inout) :: p1, p2, p3, p4 + type(point_t), target, intent(inout) :: p1, p2, p3, p4 class(element_t), pointer :: ep integer :: p(4), el_glb_idx, i, p_local_idx type(tuple_i4_t) :: e - ! Connectivity invalidated if a new element is added + ! Connectivity invalidated if a new element is added this%lconn = .false. ! Numbering invalidated if a new element is added this%lnumr = .false. - + call this%add_point(p1, p(1)) call this%add_point(p2, p(2)) call this%add_point(p3, p(3)) @@ -1353,44 +1405,35 @@ subroutine mesh_add_quad(this, el, p1, p2, p3, p4) ep => this%elements(el)%e el_glb_idx = el + this%offset_el - do i = 1, NEKO_QUAD_NPTS - p_local_idx = this%get_local(this%points(p(i))) - call this%point_neigh(p_local_idx)%push(el_glb_idx) - end do - select type(ep) type is (quad_t) call ep%init(el_glb_idx, & this%points(p(1)), this%points(p(2)), & this%points(p(3)), this%points(p(4))) - do i = 1, NEKO_QUAD_NEDS - call ep%facet_id(e, i) - call this%add_edge(e) - end do class default call neko_error('Invalid element type') end select - + end subroutine mesh_add_quad !> Add a hexahedral element to the mesh @a this subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8) class(mesh_t), target, intent(inout) :: this integer, value :: el - type(point_t), intent(inout) :: p1, p2, p3, p4, p5, p6, p7, p8 + type(point_t), target, intent(inout) :: p1, p2, p3, p4, p5, p6, p7, p8 class(element_t), pointer :: ep integer :: p(8), el_glb_idx, i, p_local_idx type(tuple4_i4_t) :: f type(tuple_i4_t) :: e - ! Connectivity invalidated if a new element is added + ! Connectivity invalidated if a new element is added this%lconn = .false. ! Numbering invalidated if a new element is added this%lnumr = .false. - + call this%add_point(p1, p(1)) call this%add_point(p2, p(2)) call this%add_point(p3, p(3)) @@ -1402,12 +1445,6 @@ subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8) ep => this%elements(el)%e el_glb_idx = el + this%offset_el - if (this%lgenc) then - do i = 1, NEKO_HEX_NPTS - p_local_idx = this%get_local(this%points(p(i))) - call this%point_neigh(p_local_idx)%push(el_glb_idx) - end do - end if select type(ep) type is (hex_t) call ep%init(el_glb_idx, & @@ -1415,19 +1452,6 @@ subroutine mesh_add_hex(this, el, p1, p2, p3, p4, p5, p6, p7, p8) this%points(p(3)), this%points(p(4)), & this%points(p(5)), this%points(p(6)), & this%points(p(7)), this%points(p(8))) - - if (this%lgenc) then - do i = 1, NEKO_HEX_NFCS - call ep%facet_id(f, i) - call this%add_face(f) - end do - - do i = 1, NEKO_HEX_NEDS - call ep%edge_id(e, i) - call this%add_edge(e) - end do - end if - class default call neko_error('Invalid element type') end select @@ -1440,11 +1464,11 @@ subroutine mesh_add_point(this, p, idx) type(point_t), intent(inout) :: p integer, intent(inout) :: idx integer :: tmp - + tmp = p%id() this%max_pts_id = max(this%max_pts_id, tmp) - + if (tmp .le. 0) then call neko_error("Invalid point id") end if @@ -1455,7 +1479,7 @@ subroutine mesh_add_point(this, p, idx) this%points(this%mpts) = p idx = this%mpts end if - + end subroutine mesh_add_point !> Add a unique face represented as a 4-tuple to the mesh @@ -1468,9 +1492,9 @@ subroutine mesh_add_face(this, f) this%mfcs = this%mfcs + 1 call this%htf%set(f, this%mfcs) end if - + end subroutine mesh_add_face - + !> Add a unique edge represented as a 2-tuple to the mesh subroutine mesh_add_edge(this, e) class(mesh_t), intent(inout) :: this @@ -1481,7 +1505,7 @@ subroutine mesh_add_edge(this, e) this%meds = this%meds + 1 call this%hte%set(e, this%meds) end if - + end subroutine mesh_add_edge !> Mark facet @a f in element @a e as a wall @@ -1500,15 +1524,15 @@ subroutine mesh_mark_wall_facet(this, f, e) end if this%facet_type(f, e) = 2 call this%wall%add_facet(f, e) - + end subroutine mesh_mark_wall_facet !> Mark element @a e as a curve element subroutine mesh_mark_curve_element(this, e, curve_data, curve_type) class(mesh_t), intent(inout) :: this integer, intent(in) :: e - real(kind=dp), dimension(5,12), intent(in) :: curve_data - integer, dimension(12), intent(in) :: curve_type + real(kind=dp), dimension(5,12), intent(in) :: curve_data + integer, dimension(12), intent(in) :: curve_type if (e .gt. this%nelv) then call neko_error('Invalid element index') @@ -1517,7 +1541,7 @@ subroutine mesh_mark_curve_element(this, e, curve_data, curve_type) call neko_error('Invalid curve element') end if call this%curve%add_element(e, curve_data, curve_type) - + end subroutine mesh_mark_curve_element @@ -1537,10 +1561,10 @@ subroutine mesh_mark_inlet_facet(this, f, e) end if this%facet_type(f, e) = 2 call this%inlet%add_facet(f, e) - + end subroutine mesh_mark_inlet_facet - - !> Mark facet @a f in element @a e with label + + !> Mark facet @a f in element @a e with label subroutine mesh_mark_labeled_facet(this, f, e, label) class(mesh_t), intent(inout) :: this integer, intent(in) :: f @@ -1557,10 +1581,10 @@ subroutine mesh_mark_labeled_facet(this, f, e, label) end if call this%labeled_zones(label)%add_facet(f, e) this%facet_type(f,e) = -label - + end subroutine mesh_mark_labeled_facet - + !> Mark facet @a f in element @a e as an outlet normal subroutine mesh_mark_outlet_normal_facet(this, f, e) class(mesh_t), intent(inout) :: this @@ -1577,7 +1601,7 @@ subroutine mesh_mark_outlet_normal_facet(this, f, e) end if this%facet_type(f, e) = 1 call this%outlet_normal%add_facet(f, e) - + end subroutine mesh_mark_outlet_normal_facet @@ -1597,7 +1621,7 @@ subroutine mesh_mark_outlet_facet(this, f, e) end if this%facet_type(f, e) = 1 call this%outlet%add_facet(f, e) - + end subroutine mesh_mark_outlet_facet !> Mark facet @a f in element @a e as a symmetry plane @@ -1616,7 +1640,7 @@ subroutine mesh_mark_sympln_facet(this, f, e) end if this%facet_type(f, e) = 2 call this%sympln%add_facet(f, e) - + end subroutine mesh_mark_sympln_facet !> Mark facet @a f in element @a e as periodic with (@a pf, @a pe) @@ -1628,7 +1652,7 @@ subroutine mesh_mark_periodic_facet(this, f, e, pf, pe, pids) integer, intent(in) :: pe integer, intent(inout) :: pids(4) integer, dimension(4) :: org_ids - + call this%get_facet_ids(f, e, org_ids) call this%periodic%add_periodic_facet(f, e, pf, pe, pids, org_ids) end subroutine mesh_mark_periodic_facet @@ -1656,7 +1680,7 @@ subroutine mesh_get_facet_ids(this, f, e, pids) pids(4) = 0 end select end subroutine mesh_get_facet_ids - + !> Reset ids of periodic points to their original ids subroutine mesh_reset_periodic_ids(this) class(mesh_t), intent(inout) :: this @@ -1679,9 +1703,9 @@ subroutine mesh_reset_periodic_ids(this) 1,2,& 3,4 /),& (/2,4/)) - + do i = 1, this%periodic%size - e = this%periodic%facet_el(i)%x(2) + e = this%periodic%facet_el(i)%x(2) f = this%periodic%facet_el(i)%x(1) pe = this%periodic%p_facet_el(i)%x(2) pf = this%periodic%p_facet_el(i)%x(1) @@ -1690,24 +1714,24 @@ subroutine mesh_reset_periodic_ids(this) this%periodic%p_ids(i)%x = pids end do do i = 1, this%periodic%size - e = this%periodic%facet_el(i)%x(2) + e = this%periodic%facet_el(i)%x(2) f = this%periodic%facet_el(i)%x(1) org_ids = this%periodic%org_ids(i)%x select type(ele => this%elements(e)%e) type is(hex_t) - do j = 1, 4 - pi => ele%pts(face_nodes(j,f))%p - call pi%set_id(org_ids(j)) - end do + do j = 1, 4 + pi => ele%pts(face_nodes(j,f))%p + call pi%set_id(org_ids(j)) + end do type is(quad_t) - do j = 1, 2 - pi => ele%pts(edge_nodes(j,f))%p - call pi%set_id(org_ids(j)) - end do + do j = 1, 2 + pi => ele%pts(edge_nodes(j,f))%p + call pi%set_id(org_ids(j)) + end do end select end do end subroutine mesh_reset_periodic_ids - + !> Creates common ids for matching periodic points. subroutine mesh_create_periodic_ids(this, f, e, pf, pe) class(mesh_t), intent(inout) :: this @@ -1717,7 +1741,7 @@ subroutine mesh_create_periodic_ids(this, f, e, pf, pe) integer, intent(in) :: pe type(point_t), pointer :: pi, pj real(kind=dp) :: L(3) - integer :: i, j, id, p_local_idx + integer :: i, j, id, p_local_idx, match type(tuple4_i4_t) :: ft type(tuple_i4_t) :: et integer, dimension(4, 6) :: face_nodes = reshape((/1,5,7,3,& @@ -1732,89 +1756,60 @@ subroutine mesh_create_periodic_ids(this, f, e, pf, pe) 1,2,& 3,4 /),& (/2,4/)) - + select type(ele => this%elements(e)%e) type is(hex_t) - select type(elp => this%elements(pe)%e) - type is(hex_t) - L = 0d0 - do i = 1, 4 - L = L + ele%pts(face_nodes(i,f))%p%x(1:3) - & + select type(elp => this%elements(pe)%e) + type is(hex_t) + L = 0d0 + do i = 1, 4 + L = L + ele%pts(face_nodes(i,f))%p%x(1:3) - & elp%pts(face_nodes(i,pf))%p%x(1:3) - end do - L = L/4 - do i = 1, 4 - pi => ele%pts(face_nodes(i,f))%p - do j = 1, 4 - pj => elp%pts(face_nodes(j,pf))%p - if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then - id = min(pi%id(), pj%id()) - call pi%set_id(id) - call pj%set_id(id) - p_local_idx = this%get_local(this%points(id)) - if (this%lgenc) then - id = ele%id() - call this%point_neigh(p_local_idx)%push(id) - id = elp%id() - call this%point_neigh(p_local_idx)%push(id) + end do + L = L/4 + do i = 1, 4 + pi => ele%pts(face_nodes(i,f))%p + match = 0 + do j = 1, 4 + pj => elp%pts(face_nodes(j,pf))%p + if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then + id = min(pi%id(), pj%id()) + call pi%set_id(id) + call pj%set_id(id) + p_local_idx = this%get_local(this%points(id)) + match = match + 1 end if + end do + if ( match .gt. 1) then + call neko_error('Multiple matches when creating periodic ids') + else if (match .eq. 0) then + call neko_error('Cannot find matching periodic point') end if end do - end do - - if (this%lgenc) then - do i = 1, NEKO_HEX_NFCS - call ele%facet_id(ft, i) - call this%add_face(ft) - call elp%facet_id(ft, i) - call this%add_face(ft) - end do - - do i = 1, NEKO_HEX_NEDS - call ele%edge_id(et, i) - call this%add_edge(et) - call elp%edge_id(et, i) - call this%add_edge(et) - end do - end if - end select - type is(quad_t) - select type(elp => this%elements(pe)%e) + end select type is(quad_t) - L = 0d0 - do i = 1, 2 - L = L + ele%pts(edge_nodes(i,f))%p%x(1:3) - & + select type(elp => this%elements(pe)%e) + type is(quad_t) + L = 0d0 + do i = 1, 2 + L = L + ele%pts(edge_nodes(i,f))%p%x(1:3) - & elp%pts(edge_nodes(i,pf))%p%x(1:3) - end do - L = L/2 - do i = 1, 2 - pi => ele%pts(edge_nodes(i,f))%p - do j = 1, 2 - pj => elp%pts(edge_nodes(j,pf))%p - !whatabout thie tolerance? - if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then - id = min(pi%id(), pj%id()) - call pi%set_id(id) - call pj%set_id(id) - p_local_idx = this%get_local(this%points(id)) - if (this%lgenc) then - id = ele%id() - call this%point_neigh(p_local_idx)%push(id) - id = elp%id() - call this%point_neigh(p_local_idx)%push(id) - end if - end if end do - end do - if (this%lgenc) then - do i = 1, NEKO_QUAD_NEDS - call ele%facet_id(et, i) - call this%add_edge(et) - call elp%facet_id(et, i) - call this%add_edge(et) + L = L/2 + do i = 1, 2 + pi => ele%pts(edge_nodes(i,f))%p + do j = 1, 2 + pj => elp%pts(edge_nodes(j,pf))%p + !whatabout thie tolerance? + if (norm2(pi%x(1:3) - pj%x(1:3) - L) .lt. 1d-7) then + id = min(pi%id(), pj%id()) + call pi%set_id(id) + call pj%set_id(id) + p_local_idx = this%get_local(this%points(id)) + end if + end do end do - end if - end select + end select end select end subroutine mesh_create_periodic_ids @@ -1838,7 +1833,6 @@ subroutine mesh_apply_periodic_facet(this, f, e, pf, pe, pids) 1,2,4,3,& 5,6,8,7/),& (/4,6/)) - select type(ele => this%elements(e)%e) type is(hex_t) do i = 1, 4 @@ -1846,22 +1840,7 @@ subroutine mesh_apply_periodic_facet(this, f, e, pf, pe, pids) call pi%set_id(pids(i)) call this%add_point(pi, id) p_local_idx = this%get_local(this%points(id)) - id = ele%id() - if (this%lgenc) then - call this%point_neigh(p_local_idx)%push(id) - end if end do - if (this%lgenc) then - do i = 1, NEKO_HEX_NFCS - call ele%facet_id(ft, i) - call this%add_face(ft) - end do - - do i = 1, NEKO_HEX_NEDS - call ele%edge_id(et, i) - call this%add_edge(et) - end do - end if end select end subroutine mesh_apply_periodic_facet @@ -1879,7 +1858,7 @@ function mesh_get_local_point(this, p) result(local_id) if (this%htp%get(tmp, local_id) .gt. 0) then call neko_error('Invalid global id (local point)') end if - + end function mesh_get_local_point !> Return the local id of an edge @a e @@ -1892,7 +1871,7 @@ function mesh_get_local_edge(this, e) result(local_id) if (this%hte%get(e, local_id) .gt. 0) then call neko_error('Invalid global id (local edge)') end if - + end function mesh_get_local_edge !> Return the local id of a face @a f @@ -1904,7 +1883,7 @@ function mesh_get_local_facet(this, f) result(local_id) if (this%htf%get(f, local_id) .gt. 0) then call neko_error('Invalid global id (local facet)') end if - + end function mesh_get_local_facet !> Return the global id of an edge @a e @@ -1919,7 +1898,7 @@ function mesh_get_global_edge(this, e) result(global_id) if (pe_size .gt. 1) then global_id = this%ddata%local_to_global_facet(global_id) end if - else + else if (pe_size .gt. 1) then global_id = this%ddata%local_to_global_edge(global_id) end if @@ -1934,26 +1913,26 @@ function mesh_get_global_facet(this, f) result(global_id) integer :: global_id global_id = this%get_local_facet(f) - + if (pe_size .gt. 1) then global_id = this%ddata%local_to_global_facet(global_id) end if - + end function mesh_get_global_facet - + !> Check if the mesh has a point given its global index !! @return The local id of the point (if present) otherwise -1 !! @todo Consider moving this to distdata function mesh_have_point_glb_idx(this, index) result(local_id) - class(mesh_t), intent(inout) :: this + class(mesh_t), intent(inout) :: this integer, intent(inout) :: index !< Global index integer :: local_id if (this%htp%get(index, local_id) .eq. 1) then local_id = -1 end if - + end function mesh_have_point_glb_idx @@ -1966,9 +1945,9 @@ function mesh_is_shared_point(this, p) result(shared) local_index = this%get_local(p) shared = this%ddata%shared_point%element(local_index) - + end function mesh_is_shared_point - + !> Check if an edge is shared !! @attention only defined for gdim .ne. 2 @@ -1980,7 +1959,7 @@ function mesh_is_shared_edge(this, e) result(shared) local_index = this%get_local(e) if (this%gdim .eq. 2) then shared = this%ddata%shared_facet%element(local_index) - else + else shared = this%ddata%shared_edge%element(local_index) end if end function mesh_is_shared_edge @@ -1994,7 +1973,7 @@ function mesh_is_shared_facet(this, f) result(shared) local_index = this%get_local(f) shared = this%ddata%shared_facet%element(local_index) - + end function mesh_is_shared_facet end module mesh diff --git a/src/mesh/octree.f90 b/src/mesh/octree.f90 index f026333d88e..a2cf677f262 100644 --- a/src/mesh/octree.f90 +++ b/src/mesh/octree.f90 @@ -71,12 +71,12 @@ subroutine octree_init(t, width) real(kind=dp), intent(in) :: width type(point_t) :: origin integer, parameter :: top_level = 0 - + call octree_free(t) origin = (/ 0d0, 0d0, 0d0 /) call octree_oct_init(t%root, origin, width, top_level) - + end subroutine octree_init !> Destroy an octree @@ -84,7 +84,7 @@ subroutine octree_free(t) class(octree_t), intent(inout) :: t call octree_free_oct(t%root) - + end subroutine octree_free !> Insert a point @a p into the octree @@ -93,7 +93,7 @@ subroutine octree_insert(t, p) type(point_t), intent(in) :: p call octree_oct_insert(t%root, p) - + end subroutine octree_insert !> Find a point @a p in an octree @@ -103,9 +103,9 @@ function octree_find(t, p) result(rcode) logical rcode rcode = (octree_oct_find(t%root, p) .eq. 0) - + end function octree_find - + !> Insert a point @a p into the octree rooted at @a o recursive subroutine octree_oct_insert(o, p) @@ -113,7 +113,7 @@ recursive subroutine octree_oct_insert(o, p) type(point_t), intent(in) :: p type(point_t) :: tmp_pt, offset, new_origin integer :: i - + if (.not. associated(o%oct(1)%ptr)) then if (.not. o%valid) then o%point = p @@ -124,14 +124,14 @@ recursive subroutine octree_oct_insert(o, p) return else tmp_pt = o%point - o%valid = .false. + o%valid = .false. do i = 1, 8 offset = (/ -0.5d0, -0.5d0, -0.5d0 /) if (iand((i - 1), 4) .gt. 0) offset%x(1) = 0.5d0 if (iand((i - 1), 2) .gt. 0) offset%x(2) = 0.5d0 if (iand((i - 1), 1) .gt. 0) offset%x(3) = 0.5d0 - + new_origin = o%origin%x + (o%width * offset%x) call octree_oct_init(o%oct(i)%ptr, new_origin, & o%width * 0.5d0, o%level + 1) @@ -139,7 +139,7 @@ recursive subroutine octree_oct_insert(o, p) call octree_oct_insert(o%oct(octree_oct(o, tmp_pt))%ptr, tmp_pt) call octree_oct_insert(o%oct(octree_oct(o, p))%ptr, p) - + end if end if else @@ -165,7 +165,7 @@ recursive function octree_oct_find(o, p) result(rcode) oct_idx = octree_oct(o, p) rcode = octree_oct_find(o%oct(oct_idx)%ptr, p) end if - + end function octree_oct_find !> Initialize an octant width a given width, origin and level @@ -175,7 +175,7 @@ subroutine octree_oct_init(o, origin, width, level) real(kind=dp), intent(in) :: width integer, intent(in) :: level integer :: i - + if (associated(o)) then call neko_error('Octree octant already initialized') else @@ -189,14 +189,14 @@ subroutine octree_oct_init(o, origin, width, level) nullify(o%oct(i)%ptr) end do end if - + end subroutine octree_oct_init - + !> Deallocate an oct in an octree recursive subroutine octree_free_oct(o) type(oct_t), pointer, intent(inout) :: o integer :: i - + if (.not. associated(o)) then return else if (.not. associated(o%oct(1)%ptr)) then @@ -211,7 +211,7 @@ recursive subroutine octree_free_oct(o) nullify(o) end if - + end subroutine octree_free_oct !> Return the octant for a given point @@ -225,7 +225,7 @@ pure function octree_oct(oct, point) result(oct_idx) if (point%x(2) .ge. oct%origin%x(2)) oct_idx = ior(oct_idx, 2) if (point%x(3) .ge. oct%origin%x(3)) oct_idx = ior(oct_idx, 1) oct_idx = oct_idx + 1 - + end function octree_oct !> Return if a point is inside an octant @@ -244,5 +244,5 @@ pure function octree_oct_inside(oct, point) result(inside) inside = .true. end if end function octree_oct_inside - + end module octree diff --git a/src/mesh/point.f90 b/src/mesh/point.f90 index 7fca3d63def..5c69f12fd32 100644 --- a/src/mesh/point.f90 +++ b/src/mesh/point.f90 @@ -208,7 +208,7 @@ function point_subtract(p1, p2) result(res) res%x(2) = p1%x(2) - p2%x(2) res%x(3) = p1%x(3) - p2%x(3) - end function point_subtract + end function point_subtract !> Returns the multiplication of a point by a scalar \f$ a*p_{1} \f$. function point_scalar_mult(p, a) result(res) @@ -229,10 +229,10 @@ pure function point_euclid_dist(p1, p2) result(res) real(kind=rp) :: res res = sqrt( (p1%x(1) - p2%x(1))**2 & - + (p1%x(2) - p2%x(2))**2 & + + (p1%x(2) - p2%x(2))**2 & + (p1%x(3) - p2%x(3))**2 ) end function point_euclid_dist - + !> Computes matrix-vector product in \f$ \mathbb{R}^3 \f$: \f$ b = Ax \f$. function point_mat_mult(A,x) result(b) class(point_t), intent(in) :: x diff --git a/src/mesh/point_zone.f90 b/src/mesh/point_zone.f90 index fac110b64f3..f0316d675bf 100644 --- a/src/mesh/point_zone.f90 +++ b/src/mesh/point_zone.f90 @@ -45,7 +45,7 @@ module point_zone !> Base abstract type for point zones. type, public, abstract :: point_zone_t - !> List of linear indices of the GLL points in the zone. + !> List of linear indices of the GLL points in the zone. integer, allocatable :: mask(:) !> List of linear indices of the GLL points in the zone on the device. type(c_ptr) :: mask_d = c_null_ptr @@ -166,7 +166,7 @@ subroutine point_zone_free_base(this) if (c_associated(this%mask_d)) then call device_free(this%mask_d) end if - + end subroutine point_zone_free_base !> Builds the mask from the scratch stack. @@ -174,45 +174,46 @@ subroutine point_zone_finalize(this) class(point_zone_t), intent(inout) :: this integer, pointer :: tp(:) integer :: i - + if (.not. this%finalized) then allocate(this%mask(this%scratch%size())) - + tp => this%scratch%array() do i = 1, this%scratch%size() this%mask(i) = tp(i) end do this%size = this%scratch%size() - + call this%scratch%clear() if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%mask, this%mask_d, this%size) - call device_memcpy(this%mask, this%mask_d, this%size, HOST_TO_DEVICE) + call device_memcpy(this%mask, this%mask_d, this%size, & + HOST_TO_DEVICE, sync=.false.) end if this%finalized = .true. - + end if - + end subroutine point_zone_finalize !> Adds a point's linear index to the scratch stack. !! @param idx Linear index of the point to add. - !! @note The linear index of a point `(j,k,l,e)` can be retrieved using the + !! @note The linear index of a point `(j,k,l,e)` can be retrieved using the !! subroutine `linear_index(j,k,l,e,lx)` in the `utils` module. subroutine point_zone_add(this, idx) class(point_zone_t), intent(inout) :: this integer, intent(inout) :: idx - + if (this%finalized) then call neko_error('Point zone already finalized') end if - + call this%scratch%push(idx) - + end subroutine point_zone_add !> Maps the GLL points that verify a point_zone's `criterion` by adding diff --git a/src/mesh/point_zone_factory.f90 b/src/mesh/point_zone_fctry.f90 similarity index 63% rename from src/mesh/point_zone_factory.f90 rename to src/mesh/point_zone_fctry.f90 index 90ee9da41e3..1ab15696397 100644 --- a/src/mesh/point_zone_factory.f90 +++ b/src/mesh/point_zone_fctry.f90 @@ -36,6 +36,7 @@ module point_zone_fctry use point_zone, only: point_zone_t use box_point_zone, only: box_point_zone_t use sphere_point_zone, only: sphere_point_zone_t + use cylinder_point_zone, only: cylinder_point_zone_t use json_module, only: json_file use json_utils, only: json_get use dofmap, only: dofmap_t @@ -45,34 +46,36 @@ module point_zone_fctry public :: point_zone_factory - contains - - !> Point zone factory. Constructs, initializes, and maps the - !! point zone object. - !! @param json JSON object initializing the point zone. - !! @param dof Dofmap from which to map the point zone. - subroutine point_zone_factory(point_zone, json, dof) - class(point_zone_t), allocatable, intent(inout) :: point_zone - type(json_file), intent(inout) :: json - type(dofmap_t), intent(inout) :: dof - character(len=:), allocatable :: zone_type +contains - call json_get(json, "geometry", zone_type) + !> Point zone factory. Constructs, initializes, and maps the + !! point zone object. + !! @param json JSON object initializing the point zone. + !! @param dof Dofmap from which to map the point zone. + subroutine point_zone_factory(point_zone, json, dof) + class(point_zone_t), allocatable, intent(inout) :: point_zone + type(json_file), intent(inout) :: json + type(dofmap_t), intent(inout) :: dof + character(len=:), allocatable :: zone_type - if (trim(zone_type) .eq. "box") then - allocate(box_point_zone_t::point_zone) - else if (trim(zone_type) .eq. "sphere") then - allocate(sphere_point_zone_t::point_zone) - else - call neko_error("Unknown source term "//trim(zone_type)//"! Valid & - &source terms are 'box', 'sphere'.") - end if + call json_get(json, "geometry", zone_type) - call point_zone%init(json, dof%size()) + if (trim(zone_type) .eq. "box") then + allocate(box_point_zone_t::point_zone) + else if (trim(zone_type) .eq. "sphere") then + allocate(sphere_point_zone_t::point_zone) + else if (trim(zone_type) .eq. "cylinder") then + allocate(cylinder_point_zone_t::point_zone) + else + call neko_error("Unknown source term "//trim(zone_type)//"! Valid & + &source terms are 'box', 'sphere', 'cylinder'.") + end if - call point_zone%map(dof) - call point_zone%finalize() + call point_zone%init(json, dof%size()) - end subroutine point_zone_factory + call point_zone%map(dof) + call point_zone%finalize() + + end subroutine point_zone_factory end module point_zone_fctry diff --git a/src/mesh/point_zone_registry.f90 b/src/mesh/point_zone_registry.f90 index 763466e9719..adc0e965400 100644 --- a/src/mesh/point_zone_registry.f90 +++ b/src/mesh/point_zone_registry.f90 @@ -32,10 +32,11 @@ ! ! Implements a point zone registry for storing point zones. module point_zone_registry - use point_zone, only : point_zone_t, point_zone_wrapper_t + use point_zone, only: point_zone_t, point_zone_wrapper_t use point_zone_fctry, only: point_zone_factory - use dofmap, only : dofmap_t - use utils, only : neko_error + use dofmap, only: dofmap_t + use mesh, only: mesh_t + use space, only: space_t, GLL use utils, only: neko_error use json_utils, only: json_get use json_module, only: json_file, json_core, json_value @@ -60,7 +61,7 @@ module point_zone_registry procedure, pass(this) :: add_point_zone_from_json !> Returns the number of point zones in the registry. procedure, pass(this) :: n_point_zones - !> Retrieves a point zone in the registry by its index in the + !> Retrieves a point zone in the registry by its index in the !! `point_zones` array. procedure, pass(this) :: get_point_zone_by_index !> Retrieves a point zone in the registry by its name. @@ -68,7 +69,7 @@ module point_zone_registry !> Returns the expansion size with which the `point_zone_registry_t` !! was initialized. procedure, pass(this) :: get_expansion_size - !> Returns the total size of the `point_zones` array (not the number of + !> Returns the total size of the `point_zones` array (not the number of !! point zones in the registry!). procedure, pass(this) :: get_size !> Checks if a point zone exists in the registry. @@ -83,17 +84,17 @@ module point_zone_registry contains !> Constructor, reading from json point zones. !! @param json Json file object. - !! @param dof Dofmap to map the point zone from GLL points. + !! @param msh Mesh associated with the point zone. !! @param size Size of the point zone registry. !! @param expansion_size Expansion size for the point zone registry. !! @note At this stage, the point_zone registry is only allocated !! if we find anything in the `case.point_zones` json path. Any !! point_zones that are not defined in that way will need to be added !! using the `add_point_zone` subroutine. - subroutine point_zone_registry_init(this, json, dof, expansion_size) + subroutine point_zone_registry_init(this, json, msh, expansion_size) class(point_zone_registry_t), intent(inout):: this type(json_file), intent(inout) :: json - type(dofmap_t), intent(inout) :: dof + type(mesh_t), target, intent(inout) :: msh integer, optional, intent(in) :: expansion_size ! Json low-level manipulator. @@ -104,13 +105,28 @@ subroutine point_zone_registry_init(this, json, dof, expansion_size) character(len=:), allocatable :: buffer ! A single source term as its own json_file. type(json_file) :: source_subdict - character(len=:), allocatable :: type logical :: found integer :: n_zones, i + ! Parameters used to setup the GLL space. + integer :: order + type(space_t), target :: Xh + type(dofmap_t) :: dof + + + call json_get(json, 'case.numerics.polynomial_order', order) + order = order + 1 ! add 1 to get poly order + + if (msh%gdim .eq. 2) then + call Xh%init(GLL, order, order) + else + call Xh%init(GLL, order, order, order) + end if + dof = dofmap_t(msh, Xh) + call this%free() - if (present(expansion_size)) then + if (present(expansion_size)) then this%expansion_size = expansion_size else this%expansion_size = 10 @@ -199,7 +215,7 @@ subroutine add_point_zone_from_json(this, json, dof) ! Check if point zone exists with the input name if (this%point_zone_exists(trim(str_read))) then call neko_error("Field with name " // trim(str_read) // & - " is already registered") + " is already registered") end if ! @@ -207,7 +223,7 @@ subroutine add_point_zone_from_json(this, json, dof) ! init. ! if (this%n_point_zones() .eq. this%get_size()) then - call this%expand() + call this%expand() end if this%n = this%n + 1 @@ -230,7 +246,7 @@ pure function n_point_zones(this) result(n) n = this%n end function n_point_zones - !> Returns the total size of the `point_zones` array (not the number of + !> Returns the total size of the `point_zones` array (not the number of !! point zones in the registry!). !! @note Use `n_point_zones()` to retrieve the actual number of point !! zones in the registry. @@ -250,7 +266,7 @@ pure function get_expansion_size(this) result(n) n = this%expansion_size end function get_expansion_size - !> Retrieves a point zone in the registry by its index in the + !> Retrieves a point zone in the registry by its index in the !! `point_zones` array. !! @param i Index in the `point_zones` array. function get_point_zone_by_index(this, i) result(pz) @@ -287,7 +303,7 @@ function get_point_zone_by_name(this, name) result(pz) if (.not. found) then call neko_error("Point zone " // trim(name) // & - " could not be found in the registry") + " could not be found in the registry") end if end function get_point_zone_by_name diff --git a/src/mesh/point_zones/box_point_zone.f90 b/src/mesh/point_zones/box_point_zone.f90 index 43494b46543..b70b890f101 100644 --- a/src/mesh/point_zones/box_point_zone.f90 +++ b/src/mesh/point_zones/box_point_zone.f90 @@ -42,7 +42,7 @@ module box_point_zone !> A box-shaped point zone. !! @details As defined here, a box is described by its `x,y,z` bounds, - !! specified in the json file as e.g. `"x_bounds": [, ]"`, + !! specified in the json file as e.g. `"x_bounds": [, ]"`, !! etc for `y` and `z` coordinates. type, public, extends(point_zone_t) :: box_point_zone_t real(kind=rp) :: xmin @@ -138,7 +138,7 @@ subroutine box_point_zone_free(this) end subroutine box_point_zone_free !> Defines the criterion of selection of a GLL point in the box point zone. - !! In the case of a box point zone, an `x,y,z` GLL point is considered as + !! In the case of a box point zone, an `x,y,z` GLL point is considered as !! being inside the zone if: !! \f{eqnarray*}{ !! x_{min} \le x \le x_{max} \\ diff --git a/src/mesh/point_zones/cylinder_point_zone.f90 b/src/mesh/point_zones/cylinder_point_zone.f90 new file mode 100644 index 00000000000..b8385d64580 --- /dev/null +++ b/src/mesh/point_zones/cylinder_point_zone.f90 @@ -0,0 +1,186 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements a cylinder geometry subset. +module cylinder_point_zone + use point_zone, only: point_zone_t + use num_types, only: rp + use json_utils, only: json_get + use json_module, only: json_file + use utils, only: neko_error + implicit none + private + + !> A cylindrical point zone. + !! @details As defined here, a cylinder is described by its two end points and + !! its radius, specified in the json file + !! as e.g. `"start": [, , ]", + !! "start": [, , ]", "radius": `. + type, public, extends(point_zone_t) :: cylinder_point_zone_t + real(kind=rp), dimension(3) :: p0 + real(kind=rp), dimension(3) :: p1 + real(kind=rp) :: radius + contains + !> Constructor from json object file. + procedure, pass(this) :: init => cylinder_point_zone_init_from_json + !> Destructor. + procedure, pass(this) :: free => cylinder_point_zone_free + !> Defines the criterion of selection of a GLL point in the sphere point zone. + procedure, pass(this) :: criterion => cylinder_point_zone_criterion + end type cylinder_point_zone_t + +contains + + !> Constructor from json object file. + !! @param json Json object file. + !! @param size Size with which to initialize the stack + subroutine cylinder_point_zone_init_from_json(this, json, size) + class(cylinder_point_zone_t), intent(inout) :: this + type(json_file), intent(inout) :: json + integer, intent(in) :: size + + character(len=:), allocatable :: name + real(kind=rp), dimension(:), allocatable :: p0, p1 + real(kind=rp) :: radius + + call json_get(json, "name", name) + call json_get(json, "start", p0) + call json_get(json, "end", p1) + call json_get(json, "radius", radius) + + ! Needed to use `shape` because of input name. + if (all(shape(p0) .ne. (/3/))) then + call neko_error("Cylinder point zone: invalid start point") + end if + + if (all(shape(p1) .ne. (/3/))) then + call neko_error("Cylinder point zone: invalid end point") + end if + + if (radius .lt. 0.0_rp) then + call neko_error("Cylinder point zone: invalid radius") + end if + + call cylinder_point_zone_init_common(this, size, trim(name), p0, p1, & + radius) + + end subroutine cylinder_point_zone_init_from_json + + !> Initializes a cylinder point zone from its endpoint coordinates and radius. + !! @param size Size of the scratch stack. + !! @param name Name of the cylinder point zone. + !! @param p0 Coordinates of the first endpoint. + !! @param p1 Coordinates of the second endpoint. + !! @param radius Sphere radius. + subroutine cylinder_point_zone_init_common(this, size, name, p0, p1, radius) + class(cylinder_point_zone_t), intent(inout) :: this + integer, intent(in), optional :: size + character(len=*), intent(in) :: name + real(kind=rp), intent(in), dimension(3) :: p0 + real(kind=rp), intent(in), dimension(3) :: p1 + real(kind=rp), intent(in) :: radius + + call this%init_base(size, name) + + this%p0 = p0 + this%p1 = p1 + this%radius = radius + + end subroutine cylinder_point_zone_init_common + + !> Destructor. + subroutine cylinder_point_zone_free(this) + class(cylinder_point_zone_t), intent(inout) :: this + + call this%free_base() + + this%p0 = 0.0_rp + this%p1 = 0.0_rp + this%radius = 0.0_rp + + end subroutine cylinder_point_zone_free + + !> @brief Defines the criterion of selection of a GLL point in the cylinder + !! point zone. + !! @details A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is + !! considered as being inside the cylinder defined by endpoints + !! \f$ \vec{p_0} \f$ and \f$ \vec{p_1} \f$ and radius \f$ r \f$ if it + !! satisfies the following conditions: + !! \f{eqnarray*}{ + !! ||\vec{X} - \vec{X_0}|| &\le& r\\ + !! 0 &\le& t \le 1 + !! \f} + !! where, + !! \f{eqnarray*}{ + !! t &=& (\vec{X} - \vec{p_0}) \cdot (\vec{p_1} - \vec{p_0}) + !! / ||\vec{p_1} - \vec{p_0}|| \\ + !! \vec{X_0} &=& \vec{p_0} + t \cdot (\vec{p_1} - \vec{p_0}) + !! \f} + !! @param x x-coordinate of the GLL point. + !! @param y y-coordinate of the GLL point. + !! @param z z-coordinate of the GLL point. + !! @param j 1st nonlinear index of the GLL point. + !! @param k 2nd nonlinear index of the GLL point. + !! @param l 3rd nonlinear index of the GLL point. + !! @param e element index of the GLL point. + pure function cylinder_point_zone_criterion(this, x, y, z, j, k, l, e) result(is_inside) + class(cylinder_point_zone_t), intent(in) :: this + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: y + real(kind=rp), intent(in) :: z + integer, intent(in) :: j + integer, intent(in) :: k + integer, intent(in) :: l + integer, intent(in) :: e + logical :: is_inside + + real(kind=rp), dimension(3) :: p + real(kind=rp), dimension(3) :: centerline + real(kind=rp), dimension(3) :: vec_p + real(kind=rp) :: t + real(kind=rp), dimension(3) :: projection + real(kind=rp) :: distance + + p = [x, y, z] + + centerline = this%p1 - this%p0 + vec_p = p - this%p0 + t = dot_product(vec_p, centerline) / dot_product(centerline, centerline) + + projection = this%p0 + t * centerline + distance = norm2(projection - p) + + is_inside = t >= 0.0_rp .and. t <= 1.0_rp .and. distance <= this%radius + + end function cylinder_point_zone_criterion + +end module cylinder_point_zone diff --git a/src/mesh/point_zones/sphere_point_zone.f90 b/src/mesh/point_zones/sphere_point_zone.f90 index d29672694a9..2da12c73810 100644 --- a/src/mesh/point_zones/sphere_point_zone.f90 +++ b/src/mesh/point_zones/sphere_point_zone.f90 @@ -39,9 +39,9 @@ module sphere_point_zone use math, only: abscmp implicit none private - + !> A sphere-shaped point zone. - !! @details As defined here, a sphere is described by its center of + !! @details As defined here, a sphere is described by its center of !! coordinates `x0,y0,z0` and its radius, specified in the json file !! as e.g. `"center": [, , ]", "radius": `. type, public, extends(point_zone_t) :: sphere_point_zone_t @@ -85,7 +85,7 @@ subroutine sphere_point_zone_init_from_json(this, json, size) y0, z0, radius) end subroutine sphere_point_zone_init_from_json - + !> Initializes a sphere point zone from its center coordinates and radius. !! @param size Size of the scratch stack. !! @param name Name of the sphere point zone. @@ -125,7 +125,7 @@ subroutine sphere_point_zone_free(this) end subroutine sphere_point_zone_free !> Defines the criterion of selection of a GLL point in the sphere point zone. - !! A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is considered as being + !! A GLL point of coordinates \f$ \vec{X} = (x, y, z) \f$ is considered as being !! inside the zone if: !! \f{eqnarray*}{ !! |\vec{X} - \vec{X_0}|^2 \le r diff --git a/src/mesh/quad.f90 b/src/mesh/quad.f90 index 56a05639a37..3b97f2c88bf 100644 --- a/src/mesh/quad.f90 +++ b/src/mesh/quad.f90 @@ -49,10 +49,10 @@ module quad !! @verbatim !! Node numbering (NEKTON symmetric notation) !! - !! 3+-----+4 ^ s - !! | | | - !! | | | - !! 1+-----+2 +----> r + !! 3+-----+4 ^ s + !! | | | + !! | | | + !! 1+-----+2 +----> r !! !! @endverbatim type, public, extends(element_t) :: quad_t @@ -71,11 +71,11 @@ module quad !! @verbatim !! Edge numbering (similar to NEKTON symmetric notation) !! 4 - !! +------+ ^ s + !! +------+ ^ s !! | | | !! 1 | | 2 | - !! | | | - !! +------+ +-----> r + !! | | | + !! +------+ +-----> r !! 3 !! @endverbatim integer, parameter, dimension(2, 4) :: edge_nodes = reshape((/1,3,& @@ -83,7 +83,7 @@ module quad 1,2,& 3,4 /),& (/2,4/)) - + contains !> Create a quadrilateral element based upon four points @@ -116,13 +116,13 @@ subroutine quad_facet_id(this, t, side) type is(tuple_i4_t) if (p1%id() .lt. p2%id()) then t%x = (/ p1%id(), p2%id() /) - else + else t%x = (/ p2%id(), p1%id() /) - endif - end select + endif + end select end subroutine quad_facet_id - !> Return the ordered edge for face @a i as a 2-tuple @a t + !> Return the ordered edge for face @a i as a 2-tuple @a t subroutine quad_facet_order(this, t, side) class(quad_t), intent(in) :: this class(tuple_t), intent(inout) :: t @@ -136,7 +136,7 @@ subroutine quad_facet_order(this, t, side) type is(tuple_i4_t) t%x = (/ p1%id(), p2%id() /) end select - + end subroutine quad_facet_order !> Compute the diameter of a quadrilateral element @@ -186,7 +186,7 @@ end function quad_centroid pure function quad_equal(this, other) result(res) class(quad_t), intent(in) :: this class(element_t), intent(in) :: other - integer :: i + integer :: i logical :: res res = .false. @@ -202,7 +202,7 @@ pure function quad_equal(this, other) result(res) res = .true. end if end select - + end function quad_equal - + end module quad diff --git a/src/mesh/search_tree/aabb.f90 b/src/mesh/search_tree/aabb.f90 new file mode 100644 index 00000000000..854c1c555dc --- /dev/null +++ b/src/mesh/search_tree/aabb.f90 @@ -0,0 +1,600 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! ============================================================================ ! +! Original C++ Implementation from: +! https://github.com/JamesRandall/SimpleVoxelEngine/blob/master/voxelEngine/include/AABB.h +! +! Translated to Fortran by: +! @author Tim Felle Olsen +! @date 9 Feb 2024 +! +! C++ Code License: +! The MIT License (MIT) +! +! Copyright (c) 2017 James Randall +! +! Permission is hereby granted, free of charge, to any person obtaining a copy of +! this software and associated documentation files (the "Software"), to deal in +! the Software without restriction, including without limitation the rights to +! use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +! the Software, and to permit persons to whom the Software is furnished to do so, +! subject to the following conditions: +! +! The above copyright notice and this permission notice shall be included in all +! copies or substantial portions of the Software. +! +! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +! FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +! COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +! IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +! CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +! ============================================================================ ! + +!> @brief Axis Aligned Bounding Box (aabb) implementation in Fortran +!! @details +!! This is a Fortran implementation of an Axis Aligned Bounding Box (aabb) data +!! structure. The aabb is a box that is aligned to the x, y and z axes. It is +!! defined by two points, the lower left front corner and the upper right back +!! corner. This is the base data structure for the aabb_Tree, which is used to +!! accelerate a Signed Distance Function. +module aabb + use num_types, only: dp + use element, only: element_t + use point, only: point_t + use tri, only: tri_t + use quad, only: quad_t + use tet, only: tet_t + use hex, only: hex_t + use mesh, only: mesh_t + use tri_mesh, only: tri_mesh_t + use tet_mesh, only: tet_mesh_t + use utils, only: neko_error + + implicit none + private + public :: aabb_t, get_aabb, merge, intersection + + ! ========================================================================== ! + ! Public interface for free functions + ! ========================================================================== ! + + !> @brief Merge two aabbs. + interface merge + module procedure merge_aabb + end interface merge + + !> @brief Intersect two aabbs. + interface intersection + module procedure intersection_aabb + end interface intersection + + !> @brief Axis Aligned Bounding Box (aabb) data structure. + !! @details The aabb is a box that is aligned to the x, y and z axes. It is + !! defined by two points, the lower left front corner and the upper right back + !! corner. The purpose of this is to accelerate a Signed Distance Function, + !! through an aabb_Tree. + type :: aabb_t + private + + logical :: initialized = .false. + real(kind=dp) :: box_min(3) = huge(0.0_dp) + real(kind=dp) :: box_max(3) = -huge(0.0_dp) + real(kind=dp) :: center(3) = 0.0_dp + real(kind=dp) :: diameter = huge(0.0_dp) + real(kind=dp) :: surface_area = 0.0_dp + + contains + + ! Initializers + procedure, pass(this), public :: init => aabb_init + + ! Getters + procedure, pass(this), public :: get_min => aabb_get_min + procedure, pass(this), public :: get_max => aabb_get_max + procedure, pass(this), public :: get_width => aabb_get_width + procedure, pass(this), public :: get_height => aabb_get_height + procedure, pass(this), public :: get_depth => aabb_get_depth + procedure, pass(this), public :: get_diameter => aabb_get_diameter + procedure, pass(this), public :: get_surface_area => aabb_get_surface_area + procedure, pass(this), public :: get_center => aabb_get_center + procedure, pass(this), public :: get_diagonal => aabb_get_diagonal + + procedure, pass(this), public :: add_padding + + ! Comparison operators + generic :: operator(.lt.) => less + generic :: operator(.gt.) => greater + + !> @brief Check if two aabbs are overlapping. + procedure, pass(this), public :: overlaps => aabb_overlaps + !> @brief Check if this aabb fully contains another aabb. + procedure, pass(this), public :: contains => aabb_contains_other + !> @brief Check if this aabb contains a point. + procedure, pass(this), public :: contains_point => aabb_contains_point + + ! Private comparison operators + procedure, pass(this) :: less => aabb_less + procedure, pass(this) :: greater => aabb_greater + + ! Private operations + procedure, pass(this), private :: calculate_surface_area + + end type aabb_t + +contains + + ! ========================================================================== ! + ! Constructors + ! ========================================================================== ! + + !> @brief Construct the aabb of a predefined object. + !! + !! @details This function is used to get the aabb of a predefined object. + !! Optionally, the user can define the padding of the aabb, which is a + !! multiple of the diameter of the aabb. This is used to avoid numerical + !! issues when the object itself it axis aligned. + !! + !! Current support: + !! - Triangle (tri_t) + !! - Quadrilateral (quad_t) + !! - Tetrahedron (tet_t) + !! - Hexahedron (hex_t) + !! - Mesh (mesh_t) + !! - Triangular mesh (tri_mesh_t) + !! - Tetrahedral mesh (tet_mesh_t) + !! + !! @param[in] object The object to get the aabb of. + !! @param[in] padding The padding of the aabb. + !! @return The aabb of the object. + function get_aabb(object, padding) result(box) + use utils, only: neko_error + implicit none + + class(*), intent(in) :: object + real(kind=dp), intent(in), optional :: padding + type(aabb_t) :: box + + select type(object) + + type is (tri_t) + box = get_aabb_element(object) + type is (hex_t) + box = get_aabb_element(object) + type is (tet_t) + box = get_aabb_element(object) + type is (quad_t) + box = get_aabb_element(object) + + type is (mesh_t) + box = get_aabb_mesh(object) + type is (tri_mesh_t) + box = get_aabb_tri_mesh(object) + type is (tet_mesh_t) + box = get_aabb_tet_mesh(object) + + class default + call neko_error("get_aabb: Unsupported object type") + end select + + if (present(padding)) then + call box%add_padding(padding) + end if + + end function get_aabb + + !> @brief Add padding to the aabb. + !! @details This function adds padding to the aabb. The padding is a multiple + !! of the diameter of the aabb. This is used to avoid numerical issues when + !! the object itself it axis aligned. + !! @param[in] padding The padding of the aabb. + subroutine add_padding(this, padding) + class(aabb_t), intent(inout) :: this + real(kind=dp), intent(in) :: padding + real(kind=dp) :: box_min(3), box_max(3) + + box_min = this%box_min - padding * (this%diameter) + box_max = this%box_max + padding * (this%diameter) + + call this%init(box_min, box_max) + end subroutine add_padding + + !> @brief Get the aabb of an arbitrary element. + !! + !! @details This function calculates the aabb of an element. The aabb is + !! defined by the lower left front corner and the upper right back corner. + !! The aabb is calculated by finding the minimum and maximum x, y and z + !! coordinate for all points in the arbitrary element type. + !! + !! @param object The arbitrary element to get the aabb of. + !! @return The aabb of the element. + function get_aabb_element(object) result(box) + class(element_t), intent(in) :: object + type(aabb_t) :: box + + integer :: i + type(point_t), pointer :: pi + real(kind=dp) :: box_min(3), box_max(3) + + box_min = huge(0.0_dp); box_max = -huge(0.0_dp) + + do i = 1, object%n_points() + pi => object%p(i) + box_min = min(box_min, pi%x) + box_max = max(box_max, pi%x) + end do + + call box%init(box_min, box_max) + end function get_aabb_element + + !> @brief Get the aabb of a mesh. + !! + !! @details This function calculates the aabb of a mesh. The aabb is + !! defined by the lower left front corner and the upper right back corner. + !! The aabb is calculated by merging the aabb of all elements in the mesh. + !! + !! @param object The mesh to get the aabb of. + !! @return The aabb of the mesh. + function get_aabb_mesh(object) result(box) + type(mesh_t), intent(in) :: object + type(aabb_t) :: box + + integer :: i + type(aabb_t) :: temp_box + + do i = 1, object%nelv + temp_box = get_aabb(object%elements(i)) + box = merge(box, temp_box) + end do + + end function get_aabb_mesh + + !> @brief Get the aabb of a triangular mesh. + !! + !! @details This function calculates the aabb of a mesh. The aabb is + !! defined by the lower left front corner and the upper right back corner. + !! The aabb is calculated by merging the aabb of all elements in the mesh. + !! + !! @param object The triangular mesh to get the aabb of. + !! @return The aabb of the mesh. + function get_aabb_tri_mesh(object) result(box) + type(tri_mesh_t), intent(in) :: object + type(aabb_t) :: box + + integer :: i + type(aabb_t) :: temp_box + + do i = 1, object%nelv + temp_box = get_aabb(object%el(i)) + box = merge(box, temp_box) + end do + + end function get_aabb_tri_mesh + + !> @brief Get the aabb of a tetrahedral mesh. + !! + !! @details This function calculates the aabb of a mesh. The aabb is + !! defined by the lower left front corner and the upper right back corner. + !! The aabb is calculated by merging the aabb of all elements in the mesh. + !! + !! @param object The tetrahedral mesh to get the aabb of. + !! @return The aabb of the mesh. + function get_aabb_tet_mesh(object) result(box) + type(tet_mesh_t), intent(in) :: object + type(aabb_t) :: box + + integer :: i + type(aabb_t) :: temp_box + + do i = 1, object%nelv + temp_box = get_aabb(object%el(i)) + box = merge(box, temp_box) + end do + + end function get_aabb_tet_mesh + + + ! ========================================================================== ! + ! Initializers + ! ========================================================================== ! + + !> @brief Initialize the aabb. + !! @param lower_left_front The lower left front corner of the aabb. + !! @param upper_right_back The upper right back corner of the aabb. + subroutine aabb_init(this, lower_left_front, upper_right_back) + class(aabb_t), intent(inout) :: this + real(kind=dp), dimension(3), intent(in) :: lower_left_front + real(kind=dp), dimension(3), intent(in) :: upper_right_back + + this%initialized = .true. + this%box_min = lower_left_front + this%box_max = upper_right_back + this%center = (this%box_min + this%box_max) / 2.0_dp + this%diameter = norm2(this%box_max - this%box_min) + + this%surface_area = this%calculate_surface_area() + + end subroutine aabb_init + + ! ========================================================================== ! + ! Getters + ! ========================================================================== ! + + !> @brief Get the minimum point of the aabb. + pure function aabb_get_min(this) result(min) + class(aabb_t), intent(in) :: this + real(kind=dp), dimension(3) :: min + + min = this%box_min + end function aabb_get_min + + !> @brief Get the maximum point of the aabb. + pure function aabb_get_max(this) result(max) + class(aabb_t), intent(in) :: this + real(kind=dp), dimension(3) :: max + + max = this%box_max + end function aabb_get_max + + !> @brief Get the width of the aabb. Also known as the x-axis length. + pure function aabb_get_width(this) result(width) + class(aabb_t), intent(in) :: this + real(kind=dp) :: width + + width = this%box_max(1) - this%box_min(1) + end function aabb_get_width + + !> @brief Get the depth of the aabb. Also known as the y-axis length. + pure function aabb_get_depth(this) result(depth) + class(aabb_t), intent(in) :: this + real(kind=dp) :: depth + + depth = this%box_max(2) - this%box_min(2) + end function aabb_get_depth + + !> @brief Get the height of the aabb. Also known as the z-axis length. + pure function aabb_get_height(this) result(height) + class(aabb_t), intent(in) :: this + real(kind=dp) :: height + + height = this%box_max(3) - this%box_min(3) + end function aabb_get_height + + !> @brief Get the diameter length of the aabb. + pure function aabb_get_diameter(this) result(diameter) + class(aabb_t), intent(in) :: this + real(kind=dp) :: diameter + + diameter = this%diameter + end function aabb_get_diameter + + !> @brief Get the surface area of the aabb. + pure function aabb_get_surface_area(this) result(surface_area) + class(aabb_t), intent(in) :: this + real(kind=dp) :: surface_area + + surface_area = this%surface_area + end function aabb_get_surface_area + + !> @brief Get the center of the aabb. + pure function aabb_get_center(this) result(center) + class(aabb_t), intent(in) :: this + real(kind=dp), dimension(3) :: center + + center = this%center + end function aabb_get_center + + !> @brief Get the diagonal of the aabb. + pure function aabb_get_diagonal(this) result(diagonal) + class(aabb_t), intent(in) :: this + real(kind=dp), dimension(3) :: diagonal + + diagonal = this%box_max - this%box_min + end function aabb_get_diagonal + + ! ========================================================================== ! + ! Operations + ! ========================================================================== ! + + !> @brief Check if two aabbs are overlapping. + function aabb_overlaps(this, other) result(is_overlapping) + class(aabb_t), intent(in) :: this + class(aabb_t), intent(in) :: other + logical :: is_overlapping + + if (.not. this%initialized .or. .not. other%initialized) then + ! call neko_error("aabb_overlaps: One or both aabbs are not initialized") + is_overlapping = .false. + else + + is_overlapping = all(this%box_min .le. other%box_max) .and. & + all(this%box_max .ge. other%box_min) + end if + + end function aabb_overlaps + + !> @brief Check if this aabb contains another aabb. + function aabb_contains_other(this, other) result(is_contained) + class(aabb_t), intent(in) :: this + class(aabb_t), intent(in) :: other + logical :: is_contained + + ! if (.not. this%initialized .or. .not. other%initialized) then + ! call neko_error("aabb_contains: One or both aabbs are not initialized") + ! end if + + is_contained = all(this%box_min .le. other%box_min) .and. & + all(this%box_max .ge. other%box_max) + + end function aabb_contains_other + + !> @brief Check if this aabb contains a point. + function aabb_contains_point(this, p) result(is_contained) + class(aabb_t), intent(in) :: this + real(kind=dp), dimension(3), intent(in) :: p + logical :: is_contained + + ! if (.not. this%initialized) then + ! call neko_error("aabb_contains_point: One or both aabbs are not initialized") + ! end if + + is_contained = all(p .ge. this%box_min) .and. all(p .le. this%box_max) + end function aabb_contains_point + + ! ========================================================================== ! + ! Binary operations + ! ========================================================================== ! + + !> @brief Merge two aabbs. + function merge_aabb(box1, box2) result(merged) + class(aabb_t), intent(in) :: box1 + class(aabb_t), intent(in) :: box2 + type(aabb_t) :: merged + + real(kind=dp), dimension(3) :: box_min, box_max + + box_min = min(box1%box_min, box2%box_min) + box_max = max(box1%box_max, box2%box_max) + + call merged%init(box_min, box_max) + end function merge_aabb + + !> @brief Get the intersection of two aabbs. + function intersection_aabb(box1, box2) result(intersected) + class(aabb_t), intent(in) :: box1 + class(aabb_t), intent(in) :: box2 + type(aabb_t) :: intersected + + real(kind=dp), dimension(3) :: box_min, box_max + + box_min = max(box1%box_min, box2%box_min) + box_max = min(box1%box_max, box2%box_max) + + call intersected%init(box_min, box_max) + end function intersection_aabb + + ! ========================================================================== ! + ! Private operations + ! ========================================================================== ! + + !> @brief Calculate the surface area of the aabb. + pure function calculate_surface_area(this) result(surface_area) + class(aabb_t), intent(in) :: this + real(kind=dp) :: surface_area + + surface_area = 2.0 * (& + & this%get_width() * this%get_height() & + & + this%get_width() * this%get_depth() & + & + this%get_height() * this%get_depth() & + &) + end function calculate_surface_area + + ! ========================================================================== ! + ! Comparison operators + ! ========================================================================== ! + + !> @brief Less than comparison operator. + pure function aabb_less(this, other) + class(aabb_t), intent(in) :: this + class(aabb_t), intent(in) :: other + logical :: aabb_less + logical :: equal + + if (.not. this%initialized .or. .not. other%initialized) then + aabb_less = .false. + return + end if + + aabb_less = this%box_min(1) .lt. other%box_min(1) + equal = this%box_min(1) .le. other%box_min(1) + + if (.not. aabb_less .and. equal) then + aabb_less = this%box_min(2) .lt. other%box_min(2) + equal = this%box_min(2) .le. other%box_min(2) + end if + if (.not. aabb_less .and. equal) then + aabb_less = this%box_min(3) .lt. other%box_min(3) + equal = this%box_min(3) .le. other%box_min(3) + end if + if (.not. aabb_less .and. equal) then + aabb_less = this%box_max(1) .lt. other%box_max(1) + equal = this%box_max(1) .le. other%box_max(1) + end if + if (.not. aabb_less .and. equal) then + aabb_less = this%box_max(2) .lt. other%box_max(2) + equal = this%box_max(2) .le. other%box_max(2) + end if + if (.not. aabb_less .and. equal) then + aabb_less = this%box_max(3) .lt. other%box_max(3) + end if + + end function aabb_less + + !> @brief Greater than comparison operator. + pure function aabb_greater(this, other) + class(aabb_t), intent(in) :: this + class(aabb_t), intent(in) :: other + logical :: aabb_greater + logical :: equal + + if (.not. this%initialized .or. .not. other%initialized) then + aabb_greater = .false. + return + end if + + aabb_greater = this%box_min(1) .gt. other%box_min(1) + equal = this%box_min(1) .ge. other%box_min(1) + + if (.not. aabb_greater .and. equal) then + aabb_greater = this%box_min(2) .gt. other%box_min(2) + equal = this%box_min(2) .ge. other%box_min(2) + end if + if (.not. aabb_greater .and. equal) then + aabb_greater = this%box_min(3) .gt. other%box_min(3) + equal = this%box_min(3) .ge. other%box_min(3) + end if + if (.not. aabb_greater .and. equal) then + aabb_greater = this%box_max(1) .gt. other%box_max(1) + equal = this%box_max(1) .ge. other%box_max(1) + end if + if (.not. aabb_greater .and. equal) then + aabb_greater = this%box_max(2) .gt. other%box_max(2) + equal = this%box_max(2) .ge. other%box_max(2) + end if + if (.not. aabb_greater .and. equal) then + aabb_greater = this%box_max(3) .gt. other%box_max(3) + end if + + end function aabb_greater + +end module aabb diff --git a/src/mesh/tet.f90 b/src/mesh/tet.f90 index 57afabd6a37..0957489554d 100644 --- a/src/mesh/tet.f90 +++ b/src/mesh/tet.f90 @@ -49,14 +49,14 @@ module tet !! @details !! 3D element composed of 4 points !! @verbatim - !! Node numbering + !! Node numbering !! - !! 3 + ^ s - !! /|\ | - !! / | \ | - !! 1 +..|..+ 2 +----> r - !! \ | / / - !! \|/ / + !! 3 + ^ s + !! /|\ | + !! / | \ | + !! 1 +..|..+ 2 +----> r + !! \ | / / + !! \|/ / !! 4 + t !! !! @endverbatim @@ -67,7 +67,7 @@ module tet procedure, pass(this) :: facet_order => tet_facet_order procedure, pass(this) :: diameter => tet_diameter procedure, pass(this) :: centroid => tet_centroid - procedure, pass(this) :: edge_id => tet_edge_id + procedure, pass(this) :: edge_id => tet_edge_id procedure, pass(this) :: equal => tet_equal generic :: operator(.eq.) => equal end type tet_t @@ -75,16 +75,16 @@ module tet !> Face node ids !! @details !! @verbatim - !! Face numbering + !! Face numbering !! - !! + 4 ^ s + !! + 4 ^ s !! /|\ / | !! / | \ | - !! / 1|2 \ | - !! +...|...+ +----> r + !! / 1|2 \ | + !! +...|...+ +----> r !! \ |3 / / !! \ | / / - !! \|/ / + !! \|/ / !! + t !! !! @endverbatim @@ -94,20 +94,20 @@ module tet 1,2,4,& 1,2,3/),& (/3,4/)) - + !> Edge node ids !! @details !! @verbatim - !! Edge numbering + !! Edge numbering !! - !! 2 + 3 ^ s + !! 2 + 3 ^ s !! \ /|\ / | !! / | \ | - !! / | \ | - !! +.1.|...+ +----> r + !! / | \ | + !! +.1.|...+ +----> r !! \ 4 / / !! 5--> \ | / <--6 / - !! \|/ / + !! \|/ / !! + t !! !! @endverbatim @@ -118,9 +118,9 @@ module tet 1,4,& 2,4/),& (/2,6/)) - + contains - + !> Create a tetrahedral element based upon four points subroutine tet_init(this, id, p1, p2, p3, p4) class(tet_t), intent(inout) :: this @@ -128,7 +128,7 @@ subroutine tet_init(this, id, p1, p2, p3, p4) type(point_t), target, intent(in) :: p1, p2, p3, p4 call this%element(id, NEKO_TET_GDIM, NEKO_TET_NPTS) - + this%pts(1)%p => p1 this%pts(2)%p => p2 this%pts(3)%p => p3 @@ -137,7 +137,7 @@ subroutine tet_init(this, id, p1, p2, p3, p4) end subroutine tet_init !> Return the facet id for face @a i as a 3-tuple @a t - subroutine tet_facet_id(this, t, side) + subroutine tet_facet_id(this, t, side) class(tet_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -165,7 +165,7 @@ subroutine tet_facet_id(this, t, side) end subroutine tet_facet_id !> Return the ordered points for face @a i as a 3-tuple @a t - subroutine tet_facet_order(this, t, side) + subroutine tet_facet_order(this, t, side) class(tet_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -179,12 +179,12 @@ subroutine tet_facet_order(this, t, side) type is(tuple3_i4_t) t%x = (/ p1%id(), p2%id(), p3%id() /) end select - + end subroutine tet_facet_order !> Return the edge id for an edge @a i as a 2-tuple @a t - subroutine tet_edge_id(this, t, side) + subroutine tet_edge_id(this, t, side) class(tet_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side @@ -197,14 +197,14 @@ subroutine tet_edge_id(this, t, side) type is(tuple_i4_t) if (p1%id() .lt. p2%id()) then t%x = (/ p1%id(), p2%id() /) - else + else t%x = (/ p2%id(), p1%id() /) - endif + endif end select end subroutine tet_edge_id - + !> Compute the diameter of a tetrahedral element function tet_diameter(this) result(res) class(tet_t), intent(in) :: this @@ -260,7 +260,7 @@ function tet_centroid(this) result(res) do i = 1, this%gdim() res%x(i) = 0.25 * (p1%x(i) + p2%x(i) + p3%x(i) + p4%x(i)) end do - + end function tet_centroid !> Check if two tet elements are equal @@ -286,5 +286,5 @@ pure function tet_equal(this, other) result(res) end select end function tet_equal - + end module tet diff --git a/src/mesh/tet_mesh.f90 b/src/mesh/tet_mesh.f90 index 92699c500b6..b1899561460 100644 --- a/src/mesh/tet_mesh.f90 +++ b/src/mesh/tet_mesh.f90 @@ -39,7 +39,7 @@ module tet_mesh use utils implicit none private - + integer, public, parameter :: TET_MSH_OTPV = 1, TET_MSH_FVTC = 2, & TET_MSH_SVTC = 3 @@ -60,9 +60,9 @@ subroutine tet_mesh_init(this, msh, mthd) type(mesh_t), intent(in), target :: msh integer, intent(in), optional :: mthd integer :: bsct_mthd - + call this%free() - + this%msh => msh if (present(mthd)) then @@ -98,7 +98,7 @@ subroutine tet_mesh_free(this) end if nullify(this%msh) - + end subroutine tet_mesh_free ! Bisect hexahedral mesh into a tetrahedral mesh using @@ -109,7 +109,7 @@ subroutine tet_mesh_bisect_otpv(tet_msh) type(tet_mesh_t), intent(inout) :: tet_msh integer :: i, j type(point_t), pointer :: p1, p2, p3, p4 - + j = 0 do i = 1, tet_msh%msh%nelv @@ -170,7 +170,7 @@ subroutine tet_mesh_bisect_otpv(tet_msh) call tet_msh%el(j)%init(j, p1, p2, p3, p4) end do - + end subroutine tet_mesh_bisect_otpv !> Bisect each hexahedron into five tetrahedrons @@ -269,10 +269,10 @@ subroutine tet_mesh_bisect_svtc(tet_msh) p3 => tet_msh%msh%elements(i)%e%pts(3)%p p4 => tet_msh%msh%elements(i)%e%pts(4)%p call tet_msh%el(j)%init(j, p1, p2, p3, p4) - + end do - - + + end subroutine tet_mesh_bisect_svtc - + end module tet_mesh diff --git a/src/mesh/tri.f90 b/src/mesh/tri.f90 index 3ab95fdfa83..45168d811f1 100644 --- a/src/mesh/tri.f90 +++ b/src/mesh/tri.f90 @@ -50,9 +50,9 @@ module tri !! Node numbering !! !! 3+ - !! |\ + !! |\ !! | \ ^ s - !! | \ | + !! | \ | !! | \ | !! 1+----+2 +---> r !! @@ -74,9 +74,9 @@ module tri !! Edge numbering !! !! + - !! |\ + !! |\ !! | \ ^ s - !! 1 | \ 2 | + !! 1 | \ 2 | !! | \ | !! +----+ +---> r !! 3 @@ -109,7 +109,7 @@ subroutine tri_facet_id(this, t, side) class(tri_t), intent(in) :: this class(tuple_t), intent(inout) :: t integer, intent(in) :: side - type(point_t), pointer :: p1, p2 + type(point_t), pointer :: p1, p2 p1 => this%p(edge_nodes(1, side)) p2 => this%p(edge_nodes(2, side)) @@ -122,7 +122,7 @@ subroutine tri_facet_id(this, t, side) t%x = (/ p2%id(), p1%id() /) end if end select - + end subroutine tri_facet_id !> Return the ordered edge for face @a i as a 2-tuple @a t @@ -189,7 +189,7 @@ end function tri_centroid pure function tri_equal(this, other) result(res) class(tri_t), intent(in) :: this class(element_t), intent(in) :: other - integer :: i + integer :: i logical :: res res = .false. @@ -205,7 +205,7 @@ pure function tri_equal(this, other) result(res) res = .true. end if end select - + end function tri_equal - + end module tri diff --git a/src/mesh/tri_mesh.f90 b/src/mesh/tri_mesh.f90 index 7a044f56ad2..a6367fee49f 100644 --- a/src/mesh/tri_mesh.f90 +++ b/src/mesh/tri_mesh.f90 @@ -31,7 +31,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! !> Defines a triangular surface mesh -!! @details Mesh derived from a surface geometry +!! @details Mesh derived from a surface geometry module tri_mesh use tri use point, only : point_t @@ -43,7 +43,6 @@ module tri_mesh type(point_t), allocatable :: points(:) !< List of points integer :: nelv integer :: mpts - integer, private :: melv contains procedure, pass(this) :: init => tri_mesh_init procedure, pass(this) :: free => tri_mesh_free @@ -64,8 +63,8 @@ subroutine tri_mesh_init(this, nelv) allocate(this%points(nelv * NEKO_TRI_NPTS)) this%mpts = 0 - this%melv = 0 - + this%nelv = 0 + end subroutine tri_mesh_init !> Deallocate a triangular surface mesh @@ -79,7 +78,7 @@ subroutine tri_mesh_free(this) if (allocated(this%points)) then deallocate(this%points) end if - + end subroutine tri_mesh_free !> Add an element to a mesh @@ -91,13 +90,13 @@ subroutine tri_mesh_add_element(this, p1, p2, p3) this%points(this%mpts + 2) = p2 this%points(this%mpts + 3) = p3 - this%melv = this%melv + 1 - call this%el(this%melv)%init(this%melv, & + this%nelv = this%nelv + 1 + call this%el(this%nelv)%init(this%nelv, & this%points(this%mpts + 1), & this%points(this%mpts + 2), & this%points(this%mpts + 3)) this%mpts = this%mpts + 3 end subroutine tri_mesh_add_element - + end module tri_mesh diff --git a/src/neko.f90 b/src/neko.f90 index 483cb07aa5e..bfef342f551 100644 --- a/src/neko.f90 +++ b/src/neko.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2019-2023, The Neko Authors +! Copyright (c) 2019-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -51,7 +51,7 @@ module neko use mxm_wrapper use global_interpolation use file - use field, only : field_t + use field, only : field_t, field_ptr_t use neko_mpi_types use gather_scatter use coefs @@ -76,19 +76,23 @@ module neko use jobctrl use device use device_math + use map_1d use cpr use fluid_stats use field_list, only : field_list_t use fluid_user_source_term + use scalar_user_source_term use vector + use matrix use tensor use simulation_component use probes use spectral_error_indicator use system + use drag_torque use field_registry, only : neko_field_registry use scratch_registry, only : neko_scratch_registry - use simulation_component_global, only : simcomps_global_init + use simcomp_executor, only : neko_simcomps use data_streamer use time_interpolator use point_interpolator, only : point_interpolator_t @@ -111,7 +115,7 @@ subroutine neko_init(C) character(8) :: date integer :: argc, nthrds, rw, sw - call date_and_time(time=time, date=date) + call date_and_time(time=time, date=date) call comm_init call neko_mpi_types_init @@ -153,12 +157,12 @@ subroutine neko_init(C) ! ! Job information ! - call neko_log%section("Job Information") + call neko_log%section("Job Information") write(log_buf, '(A,A,A,A,1x,A,1x,A,A,A,A,A)') 'Start time: ',& time(1:2),':',time(3:4), '/', date(1:4),'-', date(5:6),'-',date(7:8) - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) write(log_buf, '(a)') 'Running on: ' - sw = 10 + sw = 10 if (pe_size .lt. 1e1) then write(log_buf(13:), '(i1,a)') pe_size, ' MPI ' if (pe_size .eq. 1) then @@ -184,7 +188,7 @@ subroutine neko_init(C) write(log_buf(13:), '(i6,a)') pe_size, ' MPI ranks' rw = 6 end if - + nthrds = 1 !$omp parallel !$omp master @@ -193,7 +197,7 @@ subroutine neko_init(C) !$omp end parallel if (nthrds .gt. 1) then - if (nthrds .lt. 1e1) then + if (nthrds .lt. 1e1) then write(log_buf(13 + rw + sw:), '(a,i1,a)') ', using ', & nthrds, ' thrds each' else if (nthrds .lt. 1e2) then @@ -207,11 +211,11 @@ subroutine neko_init(C) nthrds, ' thrds each' end if end if - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) write(log_buf, '(a)') 'CPU type : ' call system_cpu_name(log_buf(13:)) - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) write(log_buf, '(a)') 'Bcknd type: ' if (NEKO_BCKND_SX .eq. 1) then @@ -227,13 +231,13 @@ subroutine neko_init(C) else write(log_buf(13:), '(a)') 'CPU' end if - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) if (NEKO_BCKND_HIP .eq. 1 .or. NEKO_BCKND_CUDA .eq. 1 .or. & NEKO_BCKND_OPENCL .eq. 1) then write(log_buf, '(a)') 'Dev. name : ' call device_name(log_buf(13:)) - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) end if write(log_buf, '(a)') 'Real type : ' @@ -245,7 +249,7 @@ subroutine neko_init(C) case (real128) write(log_buf(13:), '(a)') 'quad precision' end select - call neko_log%message(log_buf) + call neko_log%message(log_buf, NEKO_LOG_QUIET) call neko_log%end() @@ -257,10 +261,10 @@ subroutine neko_init(C) ! ! Create simulation components ! - call simcomps_global_init(C) - + call neko_simcomps%init(C) + end if - + end subroutine neko_init subroutine neko_finalize(C) @@ -269,7 +273,7 @@ subroutine neko_finalize(C) if (present(C)) then call case_free(C) end if - + call neko_field_registry%free() call neko_scratch_registry%free() call device_finalize diff --git a/src/qoi/drag_torque.f90 b/src/qoi/drag_torque.f90 new file mode 100644 index 00000000000..a87f9f38b35 --- /dev/null +++ b/src/qoi/drag_torque.f90 @@ -0,0 +1,367 @@ +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! +! The UChicago Argonne, LLC as Operator of Argonne National +! Laboratory holds copyright in the Software. The copyright holder +! reserves all rights except those expressly granted to licensees, +! and U.S. Government license rights. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the disclaimer below. +! +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions and the disclaimer (as noted below) +! in the documentation and/or other materials provided with the +! distribution. +! +! 3. Neither the name of ANL nor the names of its contributors +! may be used to endorse or promote products derived from this software +! without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +! +! Additional BSD Notice +! --------------------- +! 1. This notice is required to be provided under our contract with +! the U.S. Department of Energy (DOE). This work was produced at +! Argonne National Laboratory under Contract +! No. DE-AC02-06CH11357 with the DOE. +! +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, +! express or implied, or assumes any liability or responsibility for the +! accuracy, completeness, or usefulness of any information, apparatus, +! product, or process disclosed, or represents that its use would not +! infringe privately-owned rights. +! +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall +! not be used for advertising or product endorsement purposes. +! + +module drag_torque + use field, only: field_t + use coefs, only: coef_t + use mesh + use facet_zone + use comm + use math + use space, only: space_t + use num_types, only: rp + use operators + implicit none + private + !> Some functions to calculate the lift/drag and torque + !! Calculation can be done on a zone, a facet, or a point + !! Currently everything is CPU only + public :: drag_torque_zone, drag_torque_facet, drag_torque_pt + +contains + !> Calculate drag and torque over a zone. + !! @param dgtq, the computed drag and torque + !! @param tstep, the time step + !! @param zone, the zone which we compute the drag and toqure over + !! @param center, the point around which we calculate the torque + !! @param s11-s23, the strain rate tensor + !! @param p, the pressure + !! @param coef, coefficents + !! @param visc, the viscosity + subroutine drag_torque_zone(dgtq, tstep, zone, center, s11, s22, s33, s12, s13, s23,& + p, coef, visc) + integer, intent(in) :: tstep + type(facet_zone_t) :: zone + type(coef_t), intent(inout) :: coef + real(kind=rp), intent(inout) :: s11(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + real(kind=rp), intent(inout) :: s22(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + real(kind=rp), intent(inout) :: s33(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + real(kind=rp), intent(inout) :: s12(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + real(kind=rp), intent(inout) :: s13(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + real(kind=rp), intent(inout) :: s23(coef%Xh%lx,coef%Xh%lx,coef%Xh%lz,coef%msh%nelv) + type(field_t), intent(inout) :: p + real(kind=rp), intent(in) :: visc, center(3) + real(kind=rp) :: dgtq(3,4) + real(kind=rp) :: dragpx = 0.0_rp ! pressure + real(kind=rp) :: dragpy = 0.0_rp + real(kind=rp) :: dragpz = 0.0_rp + real(kind=rp) :: dragvx = 0.0_rp ! viscous + real(kind=rp) :: dragvy = 0.0_rp + real(kind=rp) :: dragvz = 0.0_rp + real(kind=rp) :: torqpx = 0.0_rp ! pressure + real(kind=rp) :: torqpy = 0.0_rp + real(kind=rp) :: torqpz = 0.0_rp + real(kind=rp) :: torqvx = 0.0_rp ! viscous + real(kind=rp) :: torqvy = 0.0_rp + real(kind=rp) :: torqvz = 0.0_rp + real(kind=rp) :: dragx, dragy, dragz + real(kind=rp) :: torqx, torqy, torqz + integer :: ie, ifc, mem, ierr + dragx = 0.0 + dragy = 0.0 + dragz = 0.0 + +! +! Fill up viscous array w/ default +! + dragpx = 0.0 + dragpy = 0.0 + dragpz = 0.0 + dragvx = 0.0 + dragvy = 0.0 + dragvz = 0.0 + do mem = 1,zone%size + ie = zone%facet_el(mem)%x(2) + ifc = zone%facet_el(mem)%x(1) + call drag_torque_facet(dgtq,coef%dof%x,coef%dof%y,coef%dof%z,& + center,& + s11, s22, s33, s12, s13, s23,& + p%x,visc,ifc,ie, coef, coef%Xh) + + dragpx = dragpx + dgtq(1,1) ! pressure + dragpy = dragpy + dgtq(2,1) + dragpz = dragpz + dgtq(3,1) + + dragvx = dragvx + dgtq(1,2) ! viscous + dragvy = dragvy + dgtq(2,2) + dragvz = dragvz + dgtq(3,2) + + torqpx = torqpx + dgtq(1,3) ! pressure + torqpy = torqpy + dgtq(2,3) + torqpz = torqpz + dgtq(3,3) + + torqvx = torqvx + dgtq(1,4) ! viscous + torqvy = torqvy + dgtq(2,4) + torqvz = torqvz + dgtq(3,4) + enddo +! +! Sum contributions from all processors +! + call MPI_Allreduce(MPI_IN_PLACE,dragpx, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,dragpy, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,dragpz, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,dragvx, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,dragvy, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,dragvz, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + !Torque + call MPI_Allreduce(MPI_IN_PLACE,torqpx, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,torqpy, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,torqpz, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,torqvx, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,torqvy, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + call MPI_Allreduce(MPI_IN_PLACE,torqvz, 1, & + MPI_REAL_PRECISION, MPI_SUM, NEKO_COMM, ierr) + + dgtq(1,1) = dragpx ! pressure + dgtq(2,1) = dragpy + dgtq(3,1) = dragpz + + dgtq(1,2) = dragvx ! viscous + dgtq(2,2) = dragvy + dgtq(3,2) = dragvz + + dgtq(1,3) = torqpx ! pressure + dgtq(2,3) = torqpy + dgtq(3,3) = torqpz + + dgtq(1,4) = torqvx ! viscous + dgtq(2,4) = torqvy + dgtq(3,4) = torqvz + + end subroutine drag_torque_zone + + !> Calculate drag and torque over a facet. + !! @param dgtq, the computed drag and torque + !! @param tstep, the time step + !! @param xm0, the x coords + !! @param ym0, the y coords + !! @param zm0, the z coords + !! @param center, the point around which we calculate the torque + !! @param s11-s23, the strain rate tensor + !! @param p, the pressure + !! @param coef, coefficents + !! @param visc, the viscosity + subroutine drag_torque_facet(dgtq,xm0,ym0,zm0, center,& + s11, s22, s33, s12, s13, s23,& + pm1,visc,f,e, coef, Xh) + type(coef_t), intent(in) :: coef + type(space_t), intent(in) :: Xh + real(kind=rp), intent(out) :: dgtq(3,4) + real(kind=rp), intent(in) :: center(3) + real(kind=rp), intent(in) :: xm0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: ym0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: zm0 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s11 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s22 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s33 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s12 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s13 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: s23 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: pm1 (Xh%lx,xh%ly,Xh%lz,coef%msh%nelv) + real(kind=rp), intent(in) :: visc + integer, intent(in) :: f,e + integer :: pf,l, k, i, j1, j2 + real(kind=rp) :: n1,n2,n3, j, a, r1, r2, r3, v, dgtq_i(3,4) + integer :: skpdat(6,6), NX, NY, NZ + integer :: js1 + integer :: jf1 + integer :: jskip1 + integer :: js2 + integer :: jf2 + integer :: jskip2 + real(kind=rp) :: s11_, s21_, s31_, s12_, s22_, s32_, s13_, s23_, s33_ + + + NX = Xh%lx + NY = Xh%ly + NZ = Xh%lz + SKPDAT(1,1)=1 + SKPDAT(2,1)=NX*(NY-1)+1 + SKPDAT(3,1)=NX + SKPDAT(4,1)=1 + SKPDAT(5,1)=NY*(NZ-1)+1 + SKPDAT(6,1)=NY + + SKPDAT(1,2)=1 + (NX-1) + SKPDAT(2,2)=NX*(NY-1)+1 + (NX-1) + SKPDAT(3,2)=NX + SKPDAT(4,2)=1 + SKPDAT(5,2)=NY*(NZ-1)+1 + SKPDAT(6,2)=NY + + SKPDAT(1,3)=1 + SKPDAT(2,3)=NX + SKPDAT(3,3)=1 + SKPDAT(4,3)=1 + SKPDAT(5,3)=NY*(NZ-1)+1 + SKPDAT(6,3)=NY + + SKPDAT(1,4)=1 + NX*(NY-1) + SKPDAT(2,4)=NX + NX*(NY-1) + SKPDAT(3,4)=1 + SKPDAT(4,4)=1 + SKPDAT(5,4)=NY*(NZ-1)+1 + SKPDAT(6,4)=NY + + SKPDAT(1,5)=1 + SKPDAT(2,5)=NX + SKPDAT(3,5)=1 + SKPDAT(4,5)=1 + SKPDAT(5,5)=NY + SKPDAT(6,5)=1 + + SKPDAT(1,6)=1 + NX*NY*(NZ-1) + SKPDAT(2,6)=NX + NX*NY*(NZ-1) + SKPDAT(3,6)=1 + SKPDAT(4,6)=1 + SKPDAT(5,6)=NY + SKPDAT(6,6)=1 + pf = f + js1 = skpdat(1,pf) + jf1 = skpdat(2,pf) + jskip1 = skpdat(3,pf) + js2 = skpdat(4,pf) + jf2 = skpdat(5,pf) + jskip2 = skpdat(6,pf) + call rzero(dgtq,12) + i = 0 + a = 0 + do j2=js2,jf2,jskip2 + do j1=js1,jf1,jskip1 + i = i+1 + n1 = coef%nx(i,1,f,e)*coef%area(i,1,f,e) + n2 = coef%ny(i,1,f,e)*coef%area(i,1,f,e) + n3 = coef%nz(i,1,f,e)*coef%area(i,1,f,e) + a = a + coef%area(i,1,f,e) + v = visc + s11_ = s11(j1,j2,1,e) + s12_ = s12(j1,j2,1,e) + s22_ = s22(j1,j2,1,e) + s13_ = s13(j1,j2,1,e) + s23_ = s23(j1,j2,1,e) + s33_ = s33(j1,j2,1,e) + call drag_torque_pt(dgtq_i,xm0(j1,j2,1,e), ym0(j1,j2,1,e),zm0(j1,j2,1,e), center,& + s11_, s22_, s33_, s12_, s13_, s23_,& + pm1(j1,j2,1,e), n1, n2, n3, v) + dgtq = dgtq + dgtq_i + end do + end do + end subroutine drag_torque_facet + + !> Calculate drag and torque from one point + !! @param dgtq, the computed drag and torque + !! @param xm0, the x coord + !! @param ym0, the y coord + !! @param zm0, the z coord + !! @param center, the point around which we calculate the torque + !! @param s11-s23, the strain rate tensor + !! @param p, the pressure + !! @param n1, normal vector x + !! @param n2, normal vector y + !! @param n3, normal vector z + !! @param v, the viscosity + subroutine drag_torque_pt(dgtq,x,y,z, center, s11, s22, s33, s12, s13, s23,& + p,n1, n2, n3,v) + real(kind=rp), intent(inout) :: dgtq(3,4) + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: y + real(kind=rp), intent(in) :: z + real(kind=rp), intent(in) :: p + real(kind=rp), intent(in) :: v + real(kind=rp), intent(in) :: n1, n2, n3, center(3) + real(kind=rp), intent(in) :: s11, s12, s22, s13, s23, s33 + real(kind=rp) :: s21, s31, s32, r1, r2, r3 + call rzero(dgtq,12) + s21 = s12 + s32 = s23 + s31 = s13 + !pressure drag + dgtq(1,1) = p*n1 + dgtq(2,1) = p*n2 + dgtq(3,1) = p*n3 + ! viscous drag + dgtq(1,2) = -v*(s11*n1 + s12*n2 + s13*n3) + dgtq(2,2) = -v*(s21*n1 + s22*n2 + s23*n3) + dgtq(3,2) = -v*(s31*n1 + s32*n2 + s33*n3) + r1 = x-center(1) + r2 = y-center(2) + r3 = z-center(3) + !pressure torque + dgtq(1,3) = (r2*dgtq(3,1)-r3*dgtq(2,1)) + dgtq(2,3) = (r3*dgtq(1,1)-r1*dgtq(3,1)) + dgtq(3,3) = (r1*dgtq(2,1)-r2*dgtq(1,1)) + !viscous torque + dgtq(1,4) = (r2*dgtq(3,2)-r3*dgtq(2,2)) + dgtq(2,4) = (r3*dgtq(1,2)-r1*dgtq(3,2)) + dgtq(3,4) = (r1*dgtq(2,2)-r2*dgtq(1,2)) + end subroutine drag_torque_pt + +end module drag_torque diff --git a/src/scalar/bcknd/cpu/scalar_residual_cpu.f90 b/src/scalar/bcknd/cpu/scalar_residual_cpu.f90 index bc8d8dd1a9d..e0609d2c69f 100644 --- a/src/scalar/bcknd/cpu/scalar_residual_cpu.f90 +++ b/src/scalar/bcknd/cpu/scalar_residual_cpu.f90 @@ -1,26 +1,41 @@ -!> Residuals in the Pn-Pn formulation (CPU version) +!> Residuals in the scalar equation (CPU version). module scalar_residual_cpu use gather_scatter use scalar_residual use operators implicit none private - + + !> Wrapper type for the routine to compute the scalar residual on the CPU. type, public, extends(scalar_residual_t) :: scalar_residual_cpu_t contains + !> Compute the residual. procedure, nopass :: compute => scalar_residual_cpu_compute end type scalar_residual_cpu_t contains + !> Compute the residual. + !! @param Ax The Helmholtz operator. + !! @param s The values of the scalar. + !! @param s_res The values of the scalar residual. + !! @param f_xH The right hand side. + !! @param c_xH The SEM coefficients. + !! @param msh The mesh. + !! @param Xh The SEM function space. + !! @param lambda The thermal conductivity. + !! @param rhocp The density multiplied by the specific heat capacity. + !! @param bd The coefficeints from the BDF differencing scheme. + !! @param dt The timestep. + !! @param n The total number of degrees of freedom. subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & lambda, rhocp, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: s type(field_t), intent(inout) :: s_res - type(source_scalar_t), intent(inout) :: f_Xh + type(field_t), intent(inout) :: f_Xh type(coef_t), intent(inout) :: c_Xh real(kind=rp), intent(in) :: lambda real(kind=rp), intent(in) :: rhocp @@ -32,6 +47,7 @@ subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & do i = 1, n c_Xh%h1(i,1,1,1) = lambda ! todo :should not be just rho here. + ! Tim M. 2023-12-19: What is this todo? c_Xh%h2(i,1,1,1) = rhocp * (bd / dt) end do c_Xh%ifh2 = .true. @@ -39,9 +55,9 @@ subroutine scalar_residual_cpu_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh) do i = 1, n - s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%s(i,1,1,1) + s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%x(i,1,1,1) end do - - end subroutine scalar_residual_cpu_compute - + + end subroutine scalar_residual_cpu_compute + end module scalar_residual_cpu diff --git a/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h b/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h index 8e5e9dcbc85..7151c818a3d 100644 --- a/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h +++ b/src/scalar/bcknd/device/cuda/scalar_residual_update_kernel.h @@ -1,3 +1,5 @@ +#ifndef __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ +#define __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -46,3 +48,5 @@ __global__ void scalar_residual_update_kernel(T * __restrict__ s_res, } + +#endif // __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ diff --git a/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h b/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h index 8e5e9dcbc85..7151c818a3d 100644 --- a/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h +++ b/src/scalar/bcknd/device/hip/scalar_residual_update_kernel.h @@ -1,3 +1,5 @@ +#ifndef __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ +#define __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -46,3 +48,5 @@ __global__ void scalar_residual_update_kernel(T * __restrict__ s_res, } + +#endif // __SCALAR_SCALAR_RESIDUAL_UPDATE_KERNEL_H__ diff --git a/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl b/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl index fb36fc63cde..0ca3857f397 100644 --- a/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl +++ b/src/scalar/bcknd/device/opencl/scalar_residual_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__ +#define __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__ /* Copyright (c) 2022-2023, The Neko Authors All rights reserved. @@ -40,3 +42,5 @@ __kernel void scalar_residual_update_kernel(__global real * __restrict__ s_res, } } + +#endif // __SCALAR_SCALAR_RESIDUAL_KERNEL_CL__ diff --git a/src/scalar/bcknd/device/scalar_residual_device.F90 b/src/scalar/bcknd/device/scalar_residual_device.F90 index e8f328260a1..9e65f812ce0 100644 --- a/src/scalar/bcknd/device/scalar_residual_device.F90 +++ b/src/scalar/bcknd/device/scalar_residual_device.F90 @@ -39,7 +39,7 @@ module scalar_residual_device use, intrinsic :: iso_c_binding implicit none private - + type, public, extends(scalar_residual_t) :: scalar_residual_device_t contains procedure, nopass :: compute => scalar_residual_device_compute @@ -57,7 +57,7 @@ subroutine scalar_residual_update_hip(s_res_d,f_s_d, n) & end subroutine scalar_residual_update_hip end interface #elif HAVE_CUDA - + interface subroutine scalar_residual_update_cuda(s_res_d,f_s_d, n) & bind(c, name='scalar_residual_update_cuda') @@ -69,7 +69,7 @@ subroutine scalar_residual_update_cuda(s_res_d,f_s_d, n) & end subroutine scalar_residual_update_cuda end interface #elif HAVE_OPENCL - + interface subroutine scalar_residual_update_opencl(s_res_d,f_s_d, n) & bind(c, name='scalar_residual_update_opencl') @@ -82,7 +82,7 @@ end subroutine scalar_residual_update_opencl end interface #endif - + contains @@ -90,31 +90,31 @@ subroutine scalar_residual_device_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & lambda, rhocp, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: s type(field_t), intent(inout) :: s_res - type(source_scalar_t), intent(inout) :: f_Xh + type(field_t), intent(inout) :: f_Xh type(coef_t), intent(inout) :: c_Xh real(kind=rp), intent(in) :: lambda real(kind=rp), intent(in) :: rhocp real(kind=rp), intent(in) :: bd real(kind=rp), intent(in) :: dt integer, intent(in) :: n - + call device_cfill(c_Xh%h1_d, lambda, n) call device_cfill(c_Xh%h2_d, rhocp * (bd / dt), n) c_Xh%ifh2 = .true. - + call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh) #ifdef HAVE_HIP - call scalar_residual_update_hip(s_res%x_d, f_Xh%s_d, n) + call scalar_residual_update_hip(s_res%x_d, f_Xh%x_d, n) #elif HAVE_CUDA - call scalar_residual_update_cuda(s_res%x_d, f_Xh%s_d, n) + call scalar_residual_update_cuda(s_res%x_d, f_Xh%x_d, n) #elif HAVE_OPENCL - call scalar_residual_update_opencl(s_res%x_d, f_Xh%s_d, n) + call scalar_residual_update_opencl(s_res%x_d, f_Xh%x_d, n) #endif - + end subroutine scalar_residual_device_compute - + end module scalar_residual_device diff --git a/src/scalar/bcknd/sx/scalar_residual_sx.f90 b/src/scalar/bcknd/sx/scalar_residual_sx.f90 index 959a1dccc9a..ff64bc3cf74 100644 --- a/src/scalar/bcknd/sx/scalar_residual_sx.f90 +++ b/src/scalar/bcknd/sx/scalar_residual_sx.f90 @@ -5,7 +5,7 @@ module scalar_residual_sx use operators implicit none private - + type, public, extends(scalar_residual_t) :: scalar_residual_sx_t contains procedure, nopass :: compute => scalar_residual_sx_compute @@ -17,10 +17,10 @@ subroutine scalar_residual_sx_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & lambda, rhocp, bd, dt, n) class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: s type(field_t), intent(inout) :: s_res - type(source_scalar_t), intent(inout) :: f_Xh + type(field_t), intent(inout) :: f_Xh type(coef_t), intent(inout) :: c_Xh real(kind=rp), intent(in) :: lambda real(kind=rp), intent(in) :: rhocp @@ -39,9 +39,9 @@ subroutine scalar_residual_sx_compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & call Ax%compute(s_res%x, s%x, c_Xh, msh, Xh) do i = 1, n - s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%s(i,1,1,1) + s_res%x(i,1,1,1) = (-s_res%x(i,1,1,1)) + f_Xh%x(i,1,1,1) end do - - end subroutine scalar_residual_sx_compute - + + end subroutine scalar_residual_sx_compute + end module scalar_residual_sx diff --git a/src/scalar/scalar_aux.f90 b/src/scalar/scalar_aux.f90 index 740b967d893..8911199eae9 100644 --- a/src/scalar/scalar_aux.f90 +++ b/src/scalar/scalar_aux.f90 @@ -9,7 +9,7 @@ module scalar_aux contains !> Prints for prs, velx, vely, velz the following: - !! Number of iterations, start residual, end residual + !! Number of iterations, start residual, end residual subroutine scalar_step_info(step, t, dt, ksp_results) type(ksp_monitor_t), intent(in) :: ksp_results(1) integer, intent(in) :: step diff --git a/src/scalar/scalar_ic.f90 b/src/scalar/scalar_ic.f90 new file mode 100644 index 00000000000..b9e18619c06 --- /dev/null +++ b/src/scalar/scalar_ic.f90 @@ -0,0 +1,151 @@ +! Copyright (c) 2021, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Scalar initial condition +module scalar_ic + use gather_scatter, only : gs_t, GS_OP_ADD + use neko_config, only : NEKO_BCKND_DEVICE + use num_types, only : rp + use device_math, only : device_col2 + use device, only : device_memcpy, HOST_TO_DEVICE + use field, only : field_t + use utils, only : neko_error + use coefs, only : coef_t + use math, only : col2, cfill + use user_intf, only : useric_scalar + use json_module, only : json_file + use json_utils, only: json_get + implicit none + private + + interface set_scalar_ic + module procedure set_scalar_ic_int, set_scalar_ic_usr + end interface set_scalar_ic + + public :: set_scalar_ic + +contains + + !> Set scalar initial condition (builtin) + !! @details Set scalar initial condition using one of the builtin types + !! currently supported: + !! - uniform. + !! @param s Scalar field. + !! @param coef Coefficient. + !! @param gs Gather-Scatter object. + !! @param type Type of initial condition. + !! @param params JSON parameters. + subroutine set_scalar_ic_int(s, coef, gs, type, params) + type(field_t), intent(inout) :: s + type(coef_t), intent(in) :: coef + type(gs_t), intent(inout) :: gs + character(len=*) :: type + type(json_file), intent(inout) :: params + + ! Variables for retrieving JSON parameters + real(kind=rp) :: ic_value + + if (trim(type) .eq. 'uniform') then + call json_get(params, 'case.scalar.initial_condition.value', ic_value) + call set_scalar_ic_uniform(s, ic_value) + else + call neko_error('Invalid initial condition') + end if + + call set_scalar_ic_common(s, coef, gs) + + end subroutine set_scalar_ic_int + + !> Set scalar intial condition (user defined) + !! @details Set scalar initial condition using a user defined function. + !! @param s Scalar field. + !! @param coef Coefficient. + !! @param gs Gather-Scatter object. + !! @param usr_ic User defined initial condition function. + !! @param params JSON parameters. + subroutine set_scalar_ic_usr(s, coef, gs, usr_ic, params) + type(field_t), intent(inout) :: s + type(coef_t), intent(in) :: coef + type(gs_t), intent(inout) :: gs + procedure(useric_scalar) :: usr_ic + type(json_file), intent(inout) :: params + + call usr_ic(s, params) + + call set_scalar_ic_common(s, coef, gs) + + end subroutine set_scalar_ic_usr + + !> Set scalar initial condition (common) + !! @details Finalize scalar initial condition by distributing the initial + !! condition across elements and multiplying by the coefficient (if any). + !! @param s Scalar field. + !! @param coef Coefficient. + !! @param gs Gather-Scatter object. + subroutine set_scalar_ic_common(s, coef, gs) + type(field_t), intent(inout) :: s + type(coef_t), intent(in) :: coef + type(gs_t), intent(inout) :: gs + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(s%x, s%x_d, s%dof%size(), & + HOST_TO_DEVICE, sync=.false.) + end if + + ! Ensure continuity across elements for initial conditions + call gs%op(s%x, s%dof%size(), GS_OP_ADD) + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_col2(s%x_d, coef%mult_d, s%dof%size()) + else + call col2(s%x, coef%mult, s%dof%size()) + end if + + end subroutine set_scalar_ic_common + + !> Uniform initial condition + !! @details Set scalar initial condition to a uniform value across the domain. + !! @param s Scalar field. + !! @param ic_value Desired value of the scalar field. + subroutine set_scalar_ic_uniform(s, ic_value) + type(field_t), intent(inout) :: s + real(kind=rp), intent(in) :: ic_value + integer :: n + s = ic_value + n = s%dof%size() + if (NEKO_BCKND_DEVICE .eq. 1) then + call cfill(s%x, ic_value, n) + end if + + end subroutine set_scalar_ic_uniform + +end module scalar_ic diff --git a/src/scalar/scalar_pnpn.f90 b/src/scalar/scalar_pnpn.f90 index e7be30c7129..44863049325 100644 --- a/src/scalar/scalar_pnpn.f90 +++ b/src/scalar/scalar_pnpn.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2022-2023, The Neko Authors +! Copyright (c) 2022-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -30,83 +30,95 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Modular version of the Classic Nek5000 Pn/Pn formulation for scalars +!> Containts the scalar_pnpn_t type. + module scalar_pnpn + use num_types, only: rp use scalar_residual_fctry, only : scalar_residual_factory use ax_helm_fctry, only: ax_helm_factory - use rhs_maker_fctry + use rhs_maker_fctry, only : rhs_maker_ext_fctry, rhs_maker_bdf_fctry + use rhs_maker, only : rhs_maker_bdf_t, rhs_maker_ext_t use scalar_scheme, only : scalar_scheme_t use dirichlet, only : dirichlet_t + use neumann, only : neumann_t use field, only : field_t use bc, only : bc_list_t, bc_list_init, bc_list_free, bc_list_apply_scalar, & bc_list_add use mesh, only : mesh_t use checkpoint, only : chkp_t use coefs, only : coef_t - use device + use device, only : HOST_TO_DEVICE, device_memcpy use gather_scatter, only : gs_t, GS_OP_ADD use scalar_residual, only :scalar_residual_t use ax_product, only : ax_t - use field_series - use facet_normal - use device_math - use device_mathops - use scalar_aux - use time_scheme_controller - use projection - use math - use logger - use advection - use profiler - use json_utils, only: json_get, json_get_or_default + use field_series, only: field_series_t + use facet_normal, only : facet_normal_t + use krylov, only : ksp_monitor_t + use device_math, only : device_add2s2, device_col2 + use scalar_aux, only : scalar_step_info + use time_scheme_controller, only : time_scheme_controller_t + use projection, only : projection_t + use math, only : glsc2, col2, add2s2 + use logger, only : neko_log, LOG_SIZE, NEKO_LOG_DEBUG + use advection, only : advection_t + use advection_fctry, only : advection_factory + use profiler, only : profiler_start_region, profiler_end_region + use json_utils, only: json_get use json_module, only : json_file use user_intf, only : user_t use material_properties, only : material_properties_t + use neko_config, only : NEKO_BCKND_DEVICE + use time_step_controller implicit none private type, public, extends(scalar_scheme_t) :: scalar_pnpn_t - - type(field_t) :: s_res - type(field_series_t) :: slag + !> The residual of the transport equation. + type(field_t) :: s_res + !> Solution increment. type(field_t) :: ds - type(field_t) :: wa1 - type(field_t) :: ta1 - - + !> Helmholz operator. class(ax_t), allocatable :: Ax + !> Solution projection. type(projection_t) :: proj_s - type(dirichlet_t) :: bc_res !< Dirichlet condition for scala + !> Dirichlet conditions for the residual + !! Collects all the Dirichlet condition facets into one bc and applies 0, + !! Since the values never change there during the solve. + type(dirichlet_t) :: bc_res + + !> A bc list for the bc_res. Contains only that, essentially just to wrap + !! the if statement determining whether to apply on the device or CPU. type(bc_list_t) :: bclst_ds - class(advection_t), allocatable :: adv + !> Advection operator. + class(advection_t), allocatable :: adv - ! Time variables - type(field_t) :: abx1 - type(field_t) :: abx2 + ! Lag arrays for the RHS. + type(field_t) :: abx1, abx2 - !> Residual + !> Computes the residual. class(scalar_residual_t), allocatable :: res - !> Contributions to kth order extrapolation scheme + !> Contributions to kth order extrapolation scheme. class(rhs_maker_ext_t), allocatable :: makeext - !> Contributions to F from lagged BD terms + !> Contributions to the RHS from lagged BDF terms. class(rhs_maker_bdf_t), allocatable :: makebdf contains !> Constructor. procedure, pass(this) :: init => scalar_pnpn_init !> To restart - procedure, pass(this) :: restart=> scalar_pnpn_restart + procedure, pass(this) :: restart => scalar_pnpn_restart !> Destructor. procedure, pass(this) :: free => scalar_pnpn_free + !> Solve for the current timestep. procedure, pass(this) :: step => scalar_pnpn_step end type scalar_pnpn_t @@ -161,19 +173,13 @@ subroutine scalar_pnpn_init(this, msh, coef, gs, params, user, & call this%abx2%init(dm_Xh, "abx2") - call this%wa1%init(dm_Xh, 'wa1') - - call this%ta1%init(dm_Xh, 'ta1') - call this%ds%init(dm_Xh, 'ds') - call this%slag%init(this%s, 2) - end associate ! Initialize dirichlet bcs for scalar residual ! todo: look that this works - call this%bc_res%init(this%dm_Xh) + call this%bc_res%init(this%c_Xh) do i = 1, this%n_dir_bcs call this%bc_res%mark_facets(this%dir_bcs(i)%marked_facet) end do @@ -182,55 +188,50 @@ subroutine scalar_pnpn_init(this, msh, coef, gs, params, user, & if (this%user_bc%msk(0) .gt. 0) then call this%bc_res%mark_facets(this%user_bc%marked_facet) end if + + call this%bc_res%mark_zones_from_list(msh%labeled_zones, 'd_s', & + this%bc_labels) call this%bc_res%finalize() call this%bc_res%set_g(0.0_rp) + call bc_list_init(this%bclst_ds) call bc_list_add(this%bclst_ds, this%bc_res) - ! @todo not param stuff again, using velocity stuff - ! Intialize projection space thingy - if (this%projection_dim .gt. 0) then - call this%proj_s%init(this%dm_Xh%size(), this%projection_dim) - end if + + ! Intialize projection space + call this%proj_s%init(this%dm_Xh%size(), this%projection_dim, & + this%projection_activ_step) ! Add lagged term to checkpoint ! @todo Init chkp object, note, adding 3 slags - ! call this%chkp%add_lag(this%slag, this%slag, this%slag) - - ! Uses sthe same parameter as the fluid to set dealiasing - call json_get(params, 'case.numerics.dealias', logical_val) - call params%get('case.numerics.dealiased_polynomial_order', integer_val, & - found) - if (.not. found) then - call json_get(params, 'case.numerics.polynomial_order', integer_val) - integer_val = 3.0_rp / 2.0_rp * (integer_val + 1) - 1 - end if - call advection_factory(this%adv, this%c_Xh, logical_val, integer_val + 1) + ! call this%chkp%add_lag(this%slag, this%slag, this%slag) + call advection_factory(this%adv, params, this%c_Xh) end subroutine scalar_pnpn_init - !> Restart method + !> I envision the arguments to this func might need to be expanded subroutine scalar_pnpn_restart(this, dtlag, tlag) class(scalar_pnpn_t), target, intent(inout) :: this real(kind=rp) :: dtlag(10), tlag(10) integer :: n + + n = this%s%dof%size() - call col2(this%s%x, this%c_Xh%mult, n) - call col2(this%slag%lf(1)%x, this%c_Xh%mult, n) + call col2(this%s%x, this%c_Xh%mult, n) + call col2(this%slag%lf(1)%x, this%c_Xh%mult, n) call col2(this%slag%lf(2)%x, this%c_Xh%mult, n) - if (NEKO_BCKND_DEVICE .eq. 1) then call device_memcpy(this%s%x, this%s%x_d, & - n, HOST_TO_DEVICE) + n, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%slag%lf(1)%x, this%slag%lf(1)%x_d, & - n, HOST_TO_DEVICE) + n, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%slag%lf(2)%x, this%slag%lf(2)%x_d, & - n, HOST_TO_DEVICE) + n, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%abx1%x, this%abx1%x_d, & - n, HOST_TO_DEVICE) + n, HOST_TO_DEVICE, sync=.false.) call device_memcpy(this%abx2%x, this%abx2%x_d, & - n, HOST_TO_DEVICE) + n, HOST_TO_DEVICE, sync=.false.) end if call this%gs_Xh%op(this%s,GS_OP_ADD) @@ -249,10 +250,6 @@ subroutine scalar_pnpn_free(this) call this%s_res%free() - call this%wa1%free() - - call this%ta1%free() - call this%ds%free() call this%abx1%free() @@ -274,17 +271,15 @@ subroutine scalar_pnpn_free(this) deallocate(this%makebdf) end if - - call this%slag%free() - end subroutine scalar_pnpn_free - subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf) + subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf, dt_controller) class(scalar_pnpn_t), intent(inout) :: this real(kind=rp), intent(inout) :: t integer, intent(inout) :: tstep real(kind=rp), intent(in) :: dt type(time_scheme_controller_t), intent(inout) :: ext_bdf + type(time_step_controller_t), intent(in) :: dt_controller ! Number of degrees of freedom integer :: n ! Linear solver results monitor @@ -292,74 +287,95 @@ subroutine scalar_pnpn_step(this, t, tstep, dt, ext_bdf) character(len=LOG_SIZE) :: log_buf n = this%dm_Xh%size() - - call profiler_start_region('Scalar') + + call profiler_start_region('Scalar', 2) associate(u => this%u, v => this%v, w => this%w, s => this%s, & cp => this%cp, lambda => this%lambda, rho => this%rho, & ds => this%ds, & - ta1 => this%ta1, & - wa1 => this%wa1, & s_res =>this%s_res, & Ax => this%Ax, f_Xh => this%f_Xh, Xh => this%Xh, & c_Xh => this%c_Xh, dm_Xh => this%dm_Xh, gs_Xh => this%gs_Xh, & slag => this%slag, & projection_dim => this%projection_dim, & - ksp_maxiter => this%ksp_maxiter, & msh => this%msh, res => this%res, & - makeext => this%makeext, makebdf => this%makebdf) + makeext => this%makeext, makebdf => this%makebdf, & + if_variable_dt => dt_controller%if_variable_dt, & + dt_last_change => dt_controller%dt_last_change) + + if (neko_log%level_ .ge. NEKO_LOG_DEBUG) then + write(log_buf,'(A,A,E15.7,A,E15.7,A,E15.7)') 'Scalar debug',& + ' l2norm s', glsc2(this%s%x,this%s%x,n),& + ' slag1', glsc2(this%slag%lf(1)%x,this%slag%lf(1)%x,n),& + ' slag2', glsc2(this%slag%lf(2)%x,this%slag%lf(2)%x,n) + call neko_log%message(log_buf) + write(log_buf,'(A,A,E15.7,A,E15.7)') 'Scalar debug2',& + ' l2norm abx1', glsc2(this%abx1%x,this%abx1%x,n),& + ' abx2', glsc2(this%abx2%x,this%abx2%x,n) + call neko_log%message(log_buf) + end if - ! Evaluate the source term and scale with the mass matrix. - call f_Xh%eval(t) + ! Compute the source terms + call this%source_term%compute(t, tstep) + ! Pre-multiply the source terms with the mass matrix. if (NEKO_BCKND_DEVICE .eq. 1) then - call device_col2(f_Xh%s_d, c_Xh%B_d, n) + call device_col2(f_Xh%x_d, c_Xh%B_d, n) else - call col2(f_Xh%s, c_Xh%B, n) + call col2(f_Xh%x, c_Xh%B, n) end if + ! Apply Neumann boundary conditions + call bc_list_apply_scalar(this%bclst_neumann, this%f_Xh%x, dm_Xh%size()) + ! Add the advection operators to the right-hans-side. - call this%adv%compute_scalar(u, v, w, s, f_Xh%s, & + call this%adv%compute_scalar(u, v, w, s, f_Xh%x, & Xh, this%c_Xh, dm_Xh%size()) - call makeext%compute_scalar(ta1, this%abx1, this%abx2, f_Xh%s, & - rho, ext_bdf%advection_coeffs, n) + ! At this point the RHS contains the sum of the advection operator, + ! Neumann boundary sources and additional source terms, evaluated using + ! the scalar field from the previous time-step. Now, this value is used in + ! the explicit time scheme to advance these terms in time. + call makeext%compute_scalar(this%abx1, this%abx2, f_Xh%x, rho, & + ext_bdf%advection_coeffs, n) - call makebdf%compute_scalar(ta1, wa1, slag, f_Xh%s, s, c_Xh%B, & - rho, dt, ext_bdf%diffusion_coeffs, ext_bdf%ndiff, n) + ! Add the RHS contributions coming from the BDF scheme. + call makebdf%compute_scalar(slag, f_Xh%x, s, c_Xh%B, rho, dt, & + ext_bdf%diffusion_coeffs, ext_bdf%ndiff, n) call slag%update() - ! We assume that no change of boundary conditions - ! occurs between elements. I.e. we do not apply gsop here like in Nek5000 - ! Apply dirichlet - call this%bc_apply() + + !> Apply Dirichlet boundary conditions + !! We assume that no change of boundary conditions + !! occurs between elements. i.e. we do not apply gsop here like in Nek5000 + call this%dirichlet_update_(this%field_dirichlet_fields, & + this%field_dirichlet_bcs, this%c_Xh, t, tstep, "scalar") + call bc_list_apply_scalar(this%bclst_dirichlet, this%s%x, this%dm_Xh%size()) ! Compute scalar residual. - call profiler_start_region('Scalar residual') + call profiler_start_region('Scalar residual', 20) call res%compute(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, lambda, rho * cp, & ext_bdf%diffusion_coeffs(1), dt, & dm_Xh%size()) - call gs_Xh%op(s_res, GS_OP_ADD) + call gs_Xh%op(s_res, GS_OP_ADD) + + ! Apply a 0-valued Dirichlet boundary conditions on the ds. + call bc_list_apply_scalar(this%bclst_ds, s_res%x, dm_Xh%size()) - call bc_list_apply_scalar(this%bclst_ds,& - s_res%x, dm_Xh%size()) call profiler_end_region - if (tstep .gt. 5 .and. projection_dim .gt. 0) then - call this%proj_s%project_on(s_res%x, c_Xh, n) - end if + call this%proj_s%pre_solving(s_res%x, tstep, c_Xh, n, dt_controller) call this%pc%update() - call profiler_start_region('Scalar solve') + call profiler_start_region('Scalar solve', 21) ksp_results(1) = this%ksp%solve(Ax, ds, s_res%x, n, & - c_Xh, this%bclst_ds, gs_Xh, ksp_maxiter) + c_Xh, this%bclst_ds, gs_Xh) call profiler_end_region - if (tstep .gt. 5 .and. projection_dim .gt. 0) then - call this%proj_s%project_back(ds%x, Ax, c_Xh, & - this%bclst_ds, gs_Xh, n) - end if + call this%proj_s%post_solving(ds%x, Ax, c_Xh, & + this%bclst_ds, gs_Xh, n, tstep, dt_controller) + ! Update the solution if (NEKO_BCKND_DEVICE .eq. 1) then call device_add2s2(s%x_d, ds%x_d, 1.0_rp, n) else diff --git a/src/scalar/scalar_residual.f90 b/src/scalar/scalar_residual.f90 index bd029b0d998..a3a79ae4f26 100644 --- a/src/scalar/scalar_residual.f90 +++ b/src/scalar/scalar_residual.f90 @@ -32,7 +32,7 @@ ! !> Defines the residual for the scalar transport equation module scalar_residual - use gather_scatter, only : gs_t + use gather_scatter, only : gs_t use ax_product, only : ax_t use field, only : field_t use coefs, only : coef_t @@ -42,14 +42,27 @@ module scalar_residual use mesh, only : mesh_t use num_types, only : rp implicit none - + !> Abstract type to compute scalar residual type, abstract :: scalar_residual_t contains procedure(scalar_residual_interface), nopass, deferred :: compute end type scalar_residual_t - + abstract interface + !> Interface for computing the residual of a scalar transport equation. + !! @param Ax The Helmholtz operator. + !! @param s The values of the scalar. + !! @param s_res The values of the scalar residual. + !! @param f_xH The right hand side. + !! @param c_xH The SEM coefficients. + !! @param msh The mesh. + !! @param Xh The SEM function space. + !! @param lambda The thermal conductivity. + !! @param rhocp The density multiplied by the specific heat capacity. + !! @param bd The coefficeints from the BDF differencing scheme. + !! @param dt The timestep. + !! @param n The total number of degrees of freedom. subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & lambda, rhocp, bd, dt, n) import field_t @@ -57,16 +70,16 @@ subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & import gs_t import facet_normal_t import source_scalar_t - import space_t + import space_t import coef_t import mesh_t import rp class(ax_t), intent(in) :: Ax type(mesh_t), intent(inout) :: msh - type(space_t), intent(inout) :: Xh + type(space_t), intent(inout) :: Xh type(field_t), intent(inout) :: s type(field_t), intent(inout) :: s_res - type(source_scalar_t), intent(inout) :: f_Xh + type(field_t), intent(inout) :: f_Xh type(coef_t), intent(inout) :: c_Xh real(kind=rp), intent(in) :: lambda real(kind=rp), intent(in) :: rhocp @@ -75,5 +88,5 @@ subroutine scalar_residual_interface(Ax, s, s_res, f_Xh, c_Xh, msh, Xh, & integer, intent(in) :: n end subroutine scalar_residual_interface end interface - + end module scalar_residual diff --git a/src/scalar/scalar_residual_fctry.f90 b/src/scalar/scalar_residual_fctry.f90 index 6fc86c8f29b..10eae216449 100644 --- a/src/scalar/scalar_residual_fctry.f90 +++ b/src/scalar/scalar_residual_fctry.f90 @@ -41,7 +41,7 @@ module scalar_residual_fctry contains - + subroutine scalar_residual_factory(scalar_res) class(scalar_residual_t), allocatable, intent(inout) :: scalar_res @@ -57,7 +57,7 @@ subroutine scalar_residual_factory(scalar_res) else allocate(scalar_residual_cpu_t::scalar_res) end if - - + + end subroutine scalar_residual_factory end module scalar_residual_fctry diff --git a/src/scalar/scalar_scheme.f90 b/src/scalar/scalar_scheme.f90 index 07f43ca7aa2..9ddbac871e7 100644 --- a/src/scalar/scalar_scheme.f90 +++ b/src/scalar/scalar_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2022, The Neko Authors +! Copyright (c) 2022-2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -30,57 +30,110 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Modular version of the Classic Nek5000 Pn/Pn formulation for scalars +!> Contains the scalar_scheme_t type. ! todo: module name module scalar_scheme - use gather_scatter - use neko_config - use checkpoint - use num_types - use source_scalar - use field - use space - use dofmap - use krylov - use coefs - use dirichlet - use krylov_fctry - use precon_fctry - use bc - use mesh - use facet_zone - use time_scheme_controller - use logger - use field_registry - use usr_scalar + use gather_scatter, only : gs_t + use checkpoint, only : chkp_t + use num_types, only: rp + use field, only : field_t + use field_list, only: field_list_t + use space, only : space_t + use dofmap, only : dofmap_t + use krylov, only : ksp_t + use coefs, only : coef_t + use dirichlet, only : dirichlet_t + use neumann, only : neumann_t + use krylov_fctry, only : krylov_solver_factory, krylov_solver_destroy + use jacobi, only : jacobi_t + use device_jacobi, only : device_jacobi_t + use sx_jacobi, only : sx_jacobi_t + use hsmg, only : hsmg_t + use precon_fctry, only : precon_factory, pc_t, precon_destroy + use bc, only : bc_t, bc_list_t, bc_list_free, bc_list_init, & + bc_list_apply_scalar, bc_list_add + use field_dirichlet, only: field_dirichlet_t, field_dirichlet_update + use mesh, only : mesh_t, NEKO_MSH_MAX_ZLBLS, NEKO_MSH_MAX_ZLBL_LEN + use facet_zone, only : facet_zone_t + use time_scheme_controller, only : time_scheme_controller_t + use logger, only : neko_log, LOG_SIZE + use field_registry, only : neko_field_registry + use usr_scalar, only : usr_scalar_t, usr_scalar_bc_eval use json_utils, only : json_get, json_get_or_default use json_module, only : json_file use user_intf, only : user_t use material_properties, only : material_properties_t + use utils, only : neko_error + use comm, only: NEKO_COMM, MPI_INTEGER, MPI_SUM + use scalar_source_term, only : scalar_source_term_t + use field_series + use time_step_controller implicit none + !> Base type for a scalar advection-diffusion solver. type, abstract :: scalar_scheme_t - type(field_t), pointer :: u !< x-component of Velocity - type(field_t), pointer :: v !< y-component of Velocity - type(field_t), pointer :: w !< z-component of Velocity - type(field_t), pointer :: s !< the scalar - type(space_t), pointer :: Xh !< Function space \f$ X_h \f$ - type(dofmap_t), pointer :: dm_Xh !< Dofmap associated with \f$ X_h \f$ - type(gs_t), pointer :: gs_Xh !< Gather-scatter associated with \f$ X_h \f$ - type(coef_t), pointer :: c_Xh !< Coefficients associated with \f$ X_h \f$ - type(source_scalar_t) :: f_Xh !< Source term associated with \f$ X_h \f$ - class(ksp_t), allocatable :: ksp !< Krylov solver - integer :: ksp_maxiter !< Max iteration number in ksp. - integer :: projection_dim !< Projection space size in ksp. - class(pc_t), allocatable :: pc !< Preconditioner - type(dirichlet_t) :: dir_bcs(NEKO_MSH_MAX_ZLBLS) !< Dirichlet conditions - type(usr_scalar_t) :: user_bc !< Dirichlet conditions + !> x-component of Velocity + type(field_t), pointer :: u + !> y-component of Velocity + type(field_t), pointer :: v + !> z-component of Velocity + type(field_t), pointer :: w + !> The scalar. + type(field_t), pointer :: s + !> Lag arrays, i.e. solutions at previous timesteps. + type(field_series_t) :: slag + !> Function space \f$ X_h \f$. + type(space_t), pointer :: Xh + !> Dofmap associated with \f$ X_h \f$. + type(dofmap_t), pointer :: dm_Xh + !> Gather-scatter associated with \f$ X_h \f$. + type(gs_t), pointer :: gs_Xh + !> Coefficients associated with \f$ X_h \f$. + type(coef_t), pointer :: c_Xh + !> Right-hand side. + type(field_t), pointer :: f_Xh => null() + !> The source term for equation. + type(scalar_source_term_t) :: source_term + !> Krylov solver. + class(ksp_t), allocatable :: ksp + !> Max iterations in the Krylov solver. + integer :: ksp_maxiter + !> Projection space size. + integer :: projection_dim + !< Steps to activate projection for ksp + integer :: projection_activ_step + !> Preconditioner. + class(pc_t), allocatable :: pc + !> Dirichlet conditions. + type(dirichlet_t) :: dir_bcs(NEKO_MSH_MAX_ZLBLS) + !> Field Dirichlet conditions. + type(field_dirichlet_t) :: field_dir_bc + !> Pointer to user_dirichlet_update to be called in fluid_scheme_step + procedure(field_dirichlet_update), nopass, pointer :: dirichlet_update_ & + => null() + !> List of BC objects to pass to user_dirichlet_update + type(bc_list_t) :: field_dirichlet_bcs + !< List of fields to pass to user_dirichlet_update + type(field_list_t) :: field_dirichlet_fields + !> Neumann conditions. + type(neumann_t) :: neumann_bcs(NEKO_MSH_MAX_ZLBLS) + !> User Dirichlet conditions. + type(usr_scalar_t) :: user_bc + !> Number of Dirichlet bcs. integer :: n_dir_bcs = 0 - type(bc_list_t) :: bclst !< List of boundary conditions - type(json_file), pointer :: params !< Parameters - type(mesh_t), pointer :: msh => null() !< Mesh - type(chkp_t) :: chkp !< Checkpoint + !> Number of Neumann bcs. + integer :: n_neumann_bcs = 0 + !> List of Dirichlet boundary conditions, including the user one. + type(bc_list_t) :: bclst_dirichlet + !> List of Neumann conditions list + type(bc_list_t) :: bclst_neumann + !> Case paramters. + type(json_file), pointer :: params + !> Mesh. + type(mesh_t), pointer :: msh => null() + !> Checkpoint for restarts. + type(chkp_t) :: chkp !> Thermal diffusivity. real(kind=rp), pointer :: lambda !> Density. @@ -88,17 +141,23 @@ module scalar_scheme !> Specific heat capacity. real(kind=rp), pointer :: cp !> Boundary condition labels (if any) - character(len=20), allocatable :: bc_labels(:) + character(len=NEKO_MSH_MAX_ZLBL_LEN), allocatable :: bc_labels(:) contains + !> Constructor for the base type. procedure, pass(this) :: scheme_init => scalar_scheme_init + !> Destructor for the base type. procedure, pass(this) :: scheme_free => scalar_scheme_free + !> Validate successful initialization. procedure, pass(this) :: validate => scalar_scheme_validate - procedure, pass(this) :: bc_apply => scalar_scheme_bc_apply - procedure, pass(this) :: set_source => scalar_scheme_set_source + !> Assings the evaluation function for `user_bc`. procedure, pass(this) :: set_user_bc => scalar_scheme_set_user_bc + !> Constructor. procedure(scalar_scheme_init_intrf), pass(this), deferred :: init + !> Destructor. procedure(scalar_scheme_free_intrf), pass(this), deferred :: free + !> Solve for the current timestep. procedure(scalar_scheme_step_intrf), pass(this), deferred :: step + !> Restart from a checkpoint. procedure(scalar_scheme_restart_intrf), pass(this), deferred :: restart end type scalar_scheme_t @@ -114,7 +173,7 @@ subroutine scalar_scheme_init_intrf(this, msh, coef, gs, params, user,& import user_t import material_properties_t class(scalar_scheme_t), target, intent(inout) :: this - type(mesh_t), target, intent(inout) :: msh + type(mesh_t), target, intent(inout) :: msh type(coef_t), target, intent(inout) :: coef type(gs_t), target, intent(inout) :: gs type(json_file), target, intent(inout) :: params @@ -125,7 +184,7 @@ end subroutine scalar_scheme_init_intrf !> Abstract interface to restart a scalar formulation abstract interface - subroutine scalar_scheme_restart_intrf(this,dtlag, tlag) + subroutine scalar_scheme_restart_intrf(this, dtlag, tlag) import scalar_scheme_t import chkp_t import rp @@ -133,6 +192,7 @@ subroutine scalar_scheme_restart_intrf(this,dtlag, tlag) real(kind=rp) :: dtlag(10), tlag(10) end subroutine scalar_scheme_restart_intrf end interface + !> Abstract interface to dealocate a scalar formulation abstract interface @@ -141,18 +201,20 @@ subroutine scalar_scheme_free_intrf(this) class(scalar_scheme_t), intent(inout) :: this end subroutine scalar_scheme_free_intrf end interface - + !> Abstract interface to compute a time-step abstract interface - subroutine scalar_scheme_step_intrf(this, t, tstep, dt, ext_bdf) + subroutine scalar_scheme_step_intrf(this, t, tstep, dt, ext_bdf, dt_controller) import scalar_scheme_t import time_scheme_controller_t + import time_step_controller_t import rp class(scalar_scheme_t), intent(inout) :: this real(kind=rp), intent(inout) :: t integer, intent(inout) :: tstep real(kind=rp), intent(in) :: dt type(time_scheme_controller_t), intent(inout) :: ext_bdf + type(time_step_controller_t), intent(in) :: dt_controller end subroutine scalar_scheme_step_intrf end interface @@ -162,47 +224,66 @@ end subroutine scalar_scheme_step_intrf !! @param zones List of zones !! @param bc_labels List of user specified bcs from the parameter file !! currently dirichlet 'd=X' and 'user' supported - subroutine scalar_scheme_add_bcs(this, zones, bc_labels) - class(scalar_scheme_t), intent(inout) :: this + subroutine scalar_scheme_add_bcs(this, zones, bc_labels) + class(scalar_scheme_t), intent(inout) :: this type(facet_zone_t), intent(inout) :: zones(NEKO_MSH_MAX_ZLBLS) - character(len=20), intent(in) :: bc_labels(NEKO_MSH_MAX_ZLBLS) - character(len=20) :: bc_label + character(len=NEKO_MSH_MAX_ZLBL_LEN), intent(in) :: bc_labels(:) + character(len=NEKO_MSH_MAX_ZLBL_LEN) :: bc_label integer :: i, j, bc_idx - real(kind=rp) :: dir_value + real(kind=rp) :: dir_value, flux_value logical :: bc_exists - do i = 1, NEKO_MSH_MAX_ZLBLS + do i = 1, size(bc_labels) bc_label = trim(bc_labels(i)) - if (bc_label(1:1) .eq. 'd') then - bc_exists = .false. - bc_idx = 0 - do j = 1, i-1 - if (bc_label .eq. bc_labels(j)) then - bc_exists = .true. - bc_idx = j - end if - end do - - if (bc_exists) then - call this%dir_bcs(j)%mark_zone(zones(i)) - else - this%n_dir_bcs = this%n_dir_bcs + 1 - call this%dir_bcs(this%n_dir_bcs)%init(this%dm_Xh) - call this%dir_bcs(this%n_dir_bcs)%mark_zone(zones(i)) - read(bc_label(3:), *) dir_value - call this%dir_bcs(this%n_dir_bcs)%set_g(dir_value) - end if + if (bc_label(1:2) .eq. 'd=') then +! The idea of this commented piece of code is to merge bcs with the same +! Dirichlet value into 1 so that one has less kernel launches. Currently +! segfaults, needs investigation. +! bc_exists = .false. +! bc_idx = 0 +! do j = 1, i-1 +! if (bc_label .eq. bc_labels(j)) then +! bc_exists = .true. +! bc_idx = j +! end if +! end do + +! if (bc_exists) then +! call this%dir_bcs(j)%mark_zone(zones(i)) +! else + this%n_dir_bcs = this%n_dir_bcs + 1 + call this%dir_bcs(this%n_dir_bcs)%init(this%c_Xh) + call this%dir_bcs(this%n_dir_bcs)%mark_zone(zones(i)) + read(bc_label(3:), *) dir_value + call this%dir_bcs(this%n_dir_bcs)%set_g(dir_value) +! end if + end if + + if (bc_label(1:2) .eq. 'n=') then + this%n_neumann_bcs = this%n_neumann_bcs + 1 + call this%neumann_bcs(this%n_neumann_bcs)%init(this%c_Xh) + call this%neumann_bcs(this%n_neumann_bcs)%mark_zone(zones(i)) + read(bc_label(3:), *) flux_value + call this%neumann_bcs(this%n_neumann_bcs)%init_neumann(flux_value) end if !> Check if user bc on this zone if (bc_label(1:4) .eq. 'user') then call this%user_bc%mark_zone(zones(i)) end if + end do do i = 1, this%n_dir_bcs call this%dir_bcs(i)%finalize() - call bc_list_add(this%bclst, this%dir_bcs(i)) + call bc_list_add(this%bclst_dirichlet, this%dir_bcs(i)) + end do + + ! Create list with just Neumann bcs + call bc_list_init(this%bclst_neumann, this%n_neumann_bcs) + do i=1, this%n_neumann_bcs + call this%neumann_bcs(i)%finalize() + call bc_list_add(this%bclst_neumann, this%neumann_bcs(i)) end do end subroutine scalar_scheme_add_bcs @@ -229,7 +310,7 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, & ! Variables for retrieving json parameters logical :: logical_val real(kind=rp) :: real_val, solver_abstol - integer :: integer_val + integer :: integer_val, ierr character(len=:), allocatable :: solver_type, solver_precon this%u => neko_field_registry%get_field('u') @@ -257,11 +338,12 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, & write(log_buf, '(A,ES13.6)') 'cp :', this%cp call neko_log%message(log_buf) - call json_get_or_default(params, 'case.fluid.velocity_solver.max_iterations',& - this%ksp_maxiter, 800) call json_get_or_default(params, & 'case.fluid.velocity_solver.projection_space_size',& this%projection_dim, 20) + call json_get_or_default(params, & + 'case.fluid.velocity_solver.projection_hold_steps',& + this%projection_activ_step, 5) write(log_buf, '(A, A)') 'Type : ', trim(scheme) @@ -275,19 +357,22 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, & this%dm_Xh => this%u%dof this%params => params this%msh => msh - call neko_field_registry%add_field(this%dm_Xh, 's') + if (.not. neko_field_registry%field_exists('s')) then + call neko_field_registry%add_field(this%dm_Xh, 's') + end if this%s => neko_field_registry%get_field('s') + call this%slag%init(this%s, 2) + this%gs_Xh => gs_Xh this%c_Xh => c_Xh - call source_scalar_init(this%f_Xh, this%dm_Xh) ! ! Setup scalar boundary conditions ! - call bc_list_init(this%bclst) - call this%user_bc%init(this%dm_Xh) + call bc_list_init(this%bclst_dirichlet) + call this%user_bc%init(this%c_Xh) ! Read boundary types from the case file allocate(this%bc_labels(NEKO_MSH_MAX_ZLBLS)) @@ -298,12 +383,22 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, & if (params%valid_path('case.scalar.boundary_types')) then call json_get(params, & 'case.scalar.boundary_types', & - this%bc_labels) + this%bc_labels) end if - - call scalar_scheme_add_bcs(this, msh%labeled_zones, this%bc_labels) + ! + ! Setup right-hand side field. + ! + allocate(this%f_Xh) + call this%f_Xh%init(this%dm_Xh, fld_name="scalar_rhs") + + ! Initialize the source term + call this%source_term%init(params, this%f_Xh, this%c_Xh, user) + + call scalar_scheme_add_bcs(this, msh%labeled_zones, this%bc_labels) + + ! Mark BC zones call this%user_bc%mark_zone(msh%wall) call this%user_bc%mark_zone(msh%inlet) call this%user_bc%mark_zone(msh%outlet) @@ -311,15 +406,46 @@ subroutine scalar_scheme_init(this, msh, c_Xh, gs_Xh, params, scheme, user, & call this%user_bc%mark_zone(msh%sympln) call this%user_bc%finalize() call this%user_bc%set_coef(this%c_Xh) - if (this%user_bc%msk(0) .gt. 0) call bc_list_add(this%bclst, this%user_bc) - + if (this%user_bc%msk(0) .gt. 0) call bc_list_add(this%bclst_dirichlet,& + this%user_bc) + + ! Add field dirichlet BCs + call this%field_dir_bc%init(this%c_Xh) + call this%field_dir_bc%mark_zones_from_list(msh%labeled_zones, & + 'd_s', this%bc_labels) + call this%field_dir_bc%finalize() + call MPI_Allreduce(this%field_dir_bc%msk(0), integer_val, 1, & + MPI_INTEGER, MPI_SUM, NEKO_COMM, ierr) + if (integer_val .gt. 0) call this%field_dir_bc%init_field('d_s') + + call bc_list_add(this%bclst_dirichlet, this%field_dir_bc) + + ! + ! Associate our field dirichlet update to the user one. + ! + this%dirichlet_update_ => user%user_dirichlet_update + + ! + ! Initialize field list and bc list for user_dirichlet_update + ! + allocate(this%field_dirichlet_fields%fields(1)) + this%field_dirichlet_fields%fields(1)%f => & + this%field_dir_bc%field_bc + + call bc_list_init(this%field_dirichlet_bcs, size=1) + call bc_list_add(this%field_dirichlet_bcs, this%field_dir_bc) + + ! todo parameter file ksp tol should be added + call json_get_or_default(params, 'case.fluid.velocity_solver.max_iterations',& + integer_val, 800) call scalar_scheme_solver_factory(this%ksp, this%dm_Xh%size(), & - solver_type, solver_abstol) + solver_type, integer_val, solver_abstol) call scalar_scheme_precon_factory(this%pc, this%ksp, & - this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst, solver_precon) - + this%c_Xh, this%dm_Xh, this%gs_Xh, this%bclst_dirichlet, solver_precon) + call neko_log%end_section() + end subroutine scalar_scheme_init @@ -347,9 +473,21 @@ subroutine scalar_scheme_free(this) deallocate(this%bc_labels) end if - call source_scalar_free(this%f_Xh) + call this%source_term%free() - call bc_list_free(this%bclst) + call bc_list_free(this%bclst_dirichlet) + call bc_list_free(this%bclst_neumann) + + call this%slag%free() + + ! Free everything related to field dirichlet BCs + call this%field_dirichlet_fields%free() + call bc_list_free(this%field_dirichlet_bcs) + call this%field_dir_bc%field_bc%free() + call this%field_dir_bc%free() + if (associated(this%dirichlet_update_)) then + this%dirichlet_update_ => null() + end if end subroutine scalar_scheme_free @@ -380,9 +518,9 @@ subroutine scalar_scheme_validate(this) if (.not. associated(this%c_Xh)) then call neko_error('No coefficients defined') end if - - if (.not. associated(this%f_Xh%eval)) then - call neko_error('No source term defined') + + if (.not. associated(this%f_Xh)) then + call neko_error('No rhs allocated') end if if (.not. associated(this%params)) then @@ -397,23 +535,17 @@ subroutine scalar_scheme_validate(this) end subroutine scalar_scheme_validate - !> Apply all boundary conditions defined for velocity - !! @todo Why can't we call the interface here? - subroutine scalar_scheme_bc_apply(this) - class(scalar_scheme_t), intent(inout) :: this - call bc_list_apply_scalar(this%bclst, this%s%x, this%dm_Xh%size()) - end subroutine scalar_scheme_bc_apply - !> Initialize a linear solver !! @note Currently only supporting Krylov solvers - subroutine scalar_scheme_solver_factory(ksp, n, solver, abstol) + subroutine scalar_scheme_solver_factory(ksp, n, solver, max_iter, abstol) class(ksp_t), allocatable, target, intent(inout) :: ksp integer, intent(in), value :: n + integer, intent(in) :: max_iter character(len=*), intent(in) :: solver real(kind=rp) :: abstol - call krylov_solver_factory(ksp, n, solver, abstol) - + call krylov_solver_factory(ksp, n, solver, max_iter, abstol) + end subroutine scalar_scheme_solver_factory !> Initialize a Krylov preconditioner @@ -425,9 +557,9 @@ subroutine scalar_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype) type(gs_t), target, intent(inout) :: gs type(bc_list_t), target, intent(inout) :: bclst character(len=*) :: pctype - + call precon_factory(pc, pctype) - + select type(pcp => pc) type is(jacobi_t) call pcp%init(coef, dof, gs) @@ -449,28 +581,9 @@ subroutine scalar_scheme_precon_factory(pc, ksp, coef, dof, gs, bclst, pctype) end select call ksp%set_pc(pc) - - end subroutine scalar_scheme_precon_factory - !> Initialize source term - subroutine scalar_scheme_set_source(this, source_term_type, usr_f, usr_f_vec) - class(scalar_scheme_t), intent(inout) :: this - character(len=*) :: source_term_type - procedure(source_scalar_term_pw), optional :: usr_f - procedure(source_scalar_term), optional :: usr_f_vec - - if (trim(source_term_type) .eq. 'noforce') then - call source_scalar_set_type(this%f_Xh, source_scalar_eval_noforce) - else if (trim(source_term_type) .eq. 'user' .and. present(usr_f)) then - call source_scalar_set_pw_type(this%f_Xh, usr_f) - else if (trim(source_term_type) .eq. 'user_vector' .and. present(usr_f_vec)) then - call source_scalar_set_type(this%f_Xh, usr_f_vec) - else - call neko_error('Invalid scalar source term '//source_term_type) - end if + end subroutine scalar_scheme_precon_factory - end subroutine scalar_scheme_set_source - !> Initialize a user defined scalar bc !! @param usr_eval User specified boundary condition for scalar field subroutine scalar_scheme_set_user_bc(this, usr_eval) @@ -478,8 +591,8 @@ subroutine scalar_scheme_set_user_bc(this, usr_eval) procedure(usr_scalar_bc_eval) :: usr_eval call this%user_bc%set_eval(usr_eval) - + end subroutine scalar_scheme_set_user_bc - + end module scalar_scheme diff --git a/src/scalar/scalar_source_term.f90 b/src/scalar/scalar_source_term.f90 new file mode 100644 index 00000000000..fbe9696d7d1 --- /dev/null +++ b/src/scalar/scalar_source_term.f90 @@ -0,0 +1,202 @@ + +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the `scalar_source_term_t` type. +module scalar_source_term + use neko_config, only : NEKO_BCKND_DEVICE + use num_types, only : rp + use scalar_user_source_term, only: scalar_user_source_term_t + use source_term, only : source_term_wrapper_t, source_term_t + use source_term_fctry, only : source_term_factory + use field, only : field_t + use field_list, only : field_list_t + use json_utils, only : json_get + use json_module, only : json_file, json_core, json_value + use coefs, only : coef_t + use user_intf, only : user_t + use utils, only : neko_warning + implicit none + private + + !> Wrapper contaning and executing the scalar source terms. + !! @details + !! Exists mainly to keep the `scalar_scheme_t` type smaller and also as + !! placeholder for future optimizations. + type, public :: scalar_source_term_t + !> Array of ordinary source terms. + class(source_term_wrapper_t), allocatable :: source_terms(:) + !> The right-hand side. + type(field_t), pointer :: f => null() + contains + !> Constructor. + procedure, pass(this) :: init => scalar_source_term_init + !> Destructor. + procedure, pass(this) :: free => scalar_source_term_free + !> Add all the source terms to the passed right-hand side fields. + procedure, pass(this) :: compute => scalar_source_term_compute + !> Initialize the user source term. + procedure, nopass, private :: init_user_source + + end type scalar_source_term_t + +contains + + !> Constructor. + subroutine scalar_source_term_init(this, json, f, coef, user) + class(scalar_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + type(field_t), pointer, intent(in) :: f + type(coef_t), intent(inout) :: coef + type(user_t), intent(in) :: user + + type(field_list_t) :: rhs_fields + ! Json low-level manipulator. + type(json_core) :: core + ! Pointer to the source_terms JSON object and the individual sources. + type(json_value), pointer :: source_object, source_pointer + ! Buffer for serializing the json. + character(len=:), allocatable :: buffer + ! A single source term as its own json_file. + type(json_file) :: source_subdict + ! Source type + character(len=:), allocatable :: type + ! Dummy source strenth values + real(kind=rp) :: values(3) + logical :: found + integer :: n_sources, i + + call this%free() + + this%f => f + + + if (json%valid_path('case.scalar.source_terms')) then + ! We package the fields for the source term to operate on in a field list. + allocate(rhs_fields%fields(1)) + rhs_fields%fields(1)%f => f + + call json%get_core(core) + call json%get('case.scalar.source_terms', source_object, found) + + n_sources = core%count(source_object) + allocate(this%source_terms(n_sources)) + + + do i=1, n_sources + ! Create a new json containing just the subdict for this source. + call core%get_child(source_object, i, source_pointer, found) + call core%print_to_string(source_pointer, buffer) + call source_subdict%load_from_string(buffer) + call json_get(source_subdict, "type", type) + + ! The user source is treated separately + if ((trim(type) .eq. "user_vector") .or. & + (trim(type) .eq. "user_pointwise")) then + if (source_subdict%valid_path("start_time") .or. & + source_subdict%valid_path("end_time")) then + call neko_warning("The start_time and end_time parameters have& + & no effect on the scalar user source term") + end if + + call init_user_source(this%source_terms(i)%source_term, & + rhs_fields, coef, type, user) + else + + call source_term_factory(this%source_terms(i)%source_term, & + source_subdict, rhs_fields, coef) + end if + end do + end if + + end subroutine scalar_source_term_init + + !> Initialize the user source term. + !! @param source_term The allocatable source term to be initialized to a user. + !! @param rhs_fields The field list with the right-hand-side. + !! @param coef The SEM coefs. + !! @param type The type of the user source term, "user_vector" or + !! "user_poinwise". + !! @param user The user type containing the user source term routines. + subroutine init_user_source(source_term, rhs_fields, coef, type, user) + class(source_term_t), allocatable, intent(inout) :: source_term + type(field_list_t) :: rhs_fields + type(coef_t), intent(inout) :: coef + character(len=*) :: type + type(user_t), intent(in) :: user + + allocate(scalar_user_source_term_t::source_term) + + select type (source_term) + type is (scalar_user_source_term_t) + call source_term%init_from_components(rhs_fields, coef, type, & + user%scalar_user_f_vector, & + user%scalar_user_f) + end select + end subroutine init_user_source + + !> Destructor. + subroutine scalar_source_term_free(this) + class(scalar_source_term_t), intent(inout) :: this + integer :: i + + nullify(this%f) + + if (allocated(this%source_terms)) then + do i=1, size(this%source_terms) + call this%source_terms(i)%free() + end do + deallocate(this%source_terms) + end if + + end subroutine scalar_source_term_free + + !> Add all the source term to the passed right-hand side fields. + !! @param t The time value. + !! @param tstep The current time step. + subroutine scalar_source_term_compute(this, t, tstep) + class(scalar_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + integer :: i, n + + this%f = 0.0_rp + + ! Add contribution from all source terms. + if (allocated(this%source_terms)) then + do i=1, size(this%source_terms) + call this%source_terms(i)%source_term%compute(t, tstep) + end do + end if + + end subroutine scalar_source_term_compute +end module scalar_source_term diff --git a/src/scalar/scalar_user_source_term.f90 b/src/scalar/scalar_user_source_term.f90 new file mode 100644 index 00000000000..47c5b3d98c3 --- /dev/null +++ b/src/scalar/scalar_user_source_term.f90 @@ -0,0 +1,242 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the `scalar_user_source_term_t` type. +module scalar_user_source_term + use neko_config, only : NEKO_BCKND_DEVICE + use num_types, only : rp + use utils, only : neko_error + use source_term, only : source_term_t + use json_module, only : json_file + use field_list, only : field_list_t + use coefs, only : coef_t + use device, only : device_map, device_free + use device_math, only : device_add2 + use math, only : add2 + use dofmap, only : dofmap_t + use, intrinsic :: iso_c_binding + implicit none + private + + public :: scalar_source_compute_pointwise, scalar_source_compute_vector + + !> A source-term for the scalar, with procedure pointers pointing to the + !! actual implementation in the user file. + !! @details The user source term can be applied either pointiwse or acting + !! on the whole array in a single call, which is referred to as "vector" + !! application. + !! @warning + !! The user source term does not support init from JSON and should instead be + !! directly initialized from components. + type, public, extends(source_term_t) :: scalar_user_source_term_t + !> Pointer to the dofmap of the right-hand-side fields. + type(dofmap_t), pointer :: dm + !> The source term. + real(kind=rp), allocatable :: s(:, :, :, :) + + !> Device pointer for `s`. + type(c_ptr) :: s_d = C_NULL_PTR + !> Compute the source term for a single point + procedure(scalar_source_compute_pointwise), nopass, pointer :: compute_pw_ & + => null() + !> Compute the source term for the entire boundary + procedure(scalar_source_compute_vector), nopass, pointer :: compute_vector_& + => null() + contains + !> Constructor from JSON (will throw!). + procedure, pass(this) :: init => scalar_user_source_term_init + !> Constructor from components. + procedure, pass(this) :: init_from_components => & + scalar_user_source_term_init_from_components + !> Destructor. + procedure, pass(this) :: free => scalar_user_source_term_free + !> Computes the source term and adds the result to `fields`. + procedure, pass(this) :: compute_ => scalar_user_source_term_compute + end type scalar_user_source_term_t + + abstract interface + !> Computes the source term and adds the result to `fields`. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine scalar_source_compute_vector(this, t) + import scalar_user_source_term_t, rp + class(scalar_user_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + end subroutine scalar_source_compute_vector + end interface + + abstract interface + !> Computes the source term at a single point. + !! @param s The source value. + !! @param j The x-index of GLL point. + !! @param k The y-index of GLL point. + !! @param l The z-index of GLL point. + !! @param e The index of element. + !! @param t The time value. + subroutine scalar_source_compute_pointwise(s, j, k, l, e, t) + import rp + real(kind=rp), intent(inout) :: s + integer, intent(in) :: j + integer, intent(in) :: k + integer, intent(in) :: l + integer, intent(in) :: e + real(kind=rp), intent(in) :: t + end subroutine scalar_source_compute_pointwise + end interface + +contains + + !> Constructor from JSON. + !! @details + !! This will throw, as the user source term should be initialized directly + !! from components. + subroutine scalar_user_source_term_init(this, json, fields, coef) + class(scalar_user_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + type(field_list_t), intent(inout), target :: fields + type(coef_t), intent(inout) :: coef + + call neko_error("The user scalar source term should be init from components") + + end subroutine scalar_user_source_term_init + + !> Constructor from components. + !! @param fields A list with 1 field for adding the source values. + !! @param coef The SEM coeffs. + !! @param sourc_term_type The type of the user source term, "user_vector" or + !! "user_pointwise". + !! @param eval_vector The procedure to vector-compute the source term. + !! @param eval_pointwise The procedure to pointwise-compute the source term. + subroutine scalar_user_source_term_init_from_components(this, fields, coef, & + source_term_type, eval_vector, eval_pointwise) + class(scalar_user_source_term_t), intent(inout) :: this + type(field_list_t), intent(inout), target :: fields + type(coef_t), intent(inout) :: coef + character(len=*) :: source_term_type + procedure(scalar_source_compute_vector), optional :: eval_vector + procedure(scalar_source_compute_pointwise), optional :: eval_pointwise + + call this%free() + call this%init_base(fields, coef, 0.0_rp, huge(0.0_rp)) + + this%dm => fields%fields(1)%f%dof + + allocate(this%s(this%dm%Xh%lx, this%dm%Xh%ly, this%dm%Xh%lz, & + this%dm%msh%nelv)) + + this%s = 0d0 + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_map(this%s, this%s_d, this%dm%size()) + end if + + + if (trim(source_term_type) .eq. 'user_pointwise' .and. & + present(eval_pointwise)) then + if (NEKO_BCKND_DEVICE .eq. 1) then + call neko_error('Pointwise source terms not supported on accelerators') + end if + this%compute_vector_ => pointwise_eval_driver + this%compute_pw_ => eval_pointwise + else if (trim(source_term_type) .eq. 'user_vector' .and. & + present(eval_vector)) then + this%compute_vector_ => eval_vector + else + call neko_error('Invalid fluid source term '//source_term_type) + end if + end subroutine scalar_user_source_term_init_from_components + + !> Destructor. + subroutine scalar_user_source_term_free(this) + class(scalar_user_source_term_t), intent(inout) :: this + + if (allocated(this%s)) deallocate(this%s) + + if (c_associated(this%s_d)) call device_free(this%s_d) + + nullify(this%compute_vector_) + nullify(this%compute_pw_) + nullify(this%dm) + + call this%free_base() + end subroutine scalar_user_source_term_free + + !> Computes the source term and adds the result to `fields`. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine scalar_user_source_term_compute(this, t, tstep) + class(scalar_user_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + integer :: n + + call this%compute_vector_(this, t) + n = this%fields%fields(1)%f%dof%size() + + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_add2(this%fields%fields(1)%f%x_d, this%s_d, n) + else + call add2(this%fields%fields(1)%f%x, this%s, n) + end if + + end subroutine scalar_user_source_term_compute + + !> Driver for all pointwise source term evaluatons. + !! @param t The time value. + subroutine pointwise_eval_driver(this, t) + class(scalar_user_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer :: j, k, l, e + integer :: jj, kk, ll, ee + + select type (this) + type is (scalar_user_source_term_t) + do e = 1, size(this%s, 4) + ee = e + do l = 1, size(this%s, 3) + ll = l + do k = 1, size(this%s, 2) + kk = k + do j = 1, size(this%s, 1) + jj =j + call this%compute_pw_(this%s(j,k,l,e), jj, kk, ll, ee, t) + end do + end do + end do + end do + class default + call neko_error('Incorrect source type in pointwise eval driver!') + end select + + end subroutine pointwise_eval_driver + +end module scalar_user_source_term diff --git a/src/scalar/source_scalar.f90 b/src/scalar/source_scalar.f90 index d43ba4753f3..80b6621a955 100644 --- a/src/scalar/source_scalar.f90 +++ b/src/scalar/source_scalar.f90 @@ -72,7 +72,7 @@ subroutine source_scalar_term_pw(s, j, k, l, e, t) real(kind=rp), intent(in) :: t end subroutine source_scalar_term_pw end interface - + contains !> Initialize a source_scalar term @a f @@ -91,7 +91,7 @@ subroutine source_scalar_init(f, dm) if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(f%s, f%s_d, dm%size()) end if - + end subroutine source_scalar_init !> Deallocate a source_scalar term @a f @@ -107,7 +107,7 @@ subroutine source_scalar_free(f) if (c_associated(f%s_d)) then call device_free(f%s_d) end if - + end subroutine source_scalar_free !> Set the eval method for the source_scalar term @a f @@ -160,7 +160,7 @@ subroutine source_scalar_eval_pw(f, t) end do end do end do - + end subroutine source_scalar_eval_pw - + end module source_scalar diff --git a/src/sem/bcknd/device/cuda/coef_kernel.h b/src/sem/bcknd/device/cuda/coef_kernel.h index 3a7bb750c6c..2eb69472504 100644 --- a/src/sem/bcknd/device/cuda/coef_kernel.h +++ b/src/sem/bcknd/device/cuda/coef_kernel.h @@ -1,3 +1,5 @@ +#ifndef __SEM_COEF_KERNEL_H__ +#define __SEM_COEF_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -280,3 +282,5 @@ __global__ void coef_generate_drst_kernel(T * __restrict__ jac, } } + +#endif // __SEM_COEF_KERNEL_H__ diff --git a/src/sem/bcknd/device/device_coef.F90 b/src/sem/bcknd/device/device_coef.F90 index 1f4917ad6f6..4aaa035c9db 100644 --- a/src/sem/bcknd/device/device_coef.F90 +++ b/src/sem/bcknd/device/device_coef.F90 @@ -38,9 +38,9 @@ module device_coef private public :: device_coef_generate_geo, device_coef_generate_dxydrst - + #ifdef HAVE_HIP - interface + interface subroutine hip_coef_generate_geo(G11, G12, G13, G22, G23, G33, & drdx, drdy, drdz, dsdx, dsdy, dsdz, dtdx, dtdy, dtdz, & jacinv, w3, nel, lx, gdim) & @@ -49,7 +49,7 @@ subroutine hip_coef_generate_geo(G11, G12, G13, G22, G23, G33, & implicit none type(c_ptr), value :: G11, G12, G13, G22, G23, G33 type(c_ptr), value :: drdx, drdy, drdz - type(c_ptr), value :: dsdx, dsdy, dsdz + type(c_ptr), value :: dsdx, dsdy, dsdz type(c_ptr), value :: dtdx, dtdy, dtdz type(c_ptr), value :: jacinv, w3 integer(c_int) :: nel, gdim, lx @@ -84,7 +84,7 @@ subroutine cuda_coef_generate_geo(G11, G12, G13, G22, G23, G33, & implicit none type(c_ptr), value :: G11, G12, G13, G22, G23, G33 type(c_ptr), value :: drdx, drdy, drdz - type(c_ptr), value :: dsdx, dsdy, dsdz + type(c_ptr), value :: dsdx, dsdy, dsdz type(c_ptr), value :: dtdx, dtdy, dtdz type(c_ptr), value :: jacinv, w3 integer(c_int) :: nel, gdim, lx @@ -119,7 +119,7 @@ subroutine opencl_coef_generate_geo(G11, G12, G13, G22, G23, G33, & implicit none type(c_ptr), value :: G11, G12, G13, G22, G23, G33 type(c_ptr), value :: drdx, drdy, drdz - type(c_ptr), value :: dsdx, dsdy, dsdz + type(c_ptr), value :: dsdx, dsdy, dsdz type(c_ptr), value :: dtdx, dtdy, dtdz type(c_ptr), value :: jacinv, w3 integer(c_int) :: nel, gdim, lx diff --git a/src/sem/bcknd/device/hip/coef_kernel.h b/src/sem/bcknd/device/hip/coef_kernel.h index 70980ceeef2..7ae59c27463 100644 --- a/src/sem/bcknd/device/hip/coef_kernel.h +++ b/src/sem/bcknd/device/hip/coef_kernel.h @@ -1,3 +1,5 @@ +#ifndef __SEM_COEF_KERNEL_H__ +#define __SEM_COEF_KERNEL_H__ /* Copyright (c) 2022, The Neko Authors All rights reserved. @@ -280,3 +282,5 @@ __global__ void coef_generate_drst_kernel(T * __restrict__ jac, } } + +#endif // __SEM_COEF_KERNEL_H__ diff --git a/src/sem/bcknd/device/opencl/coef_kernel.cl b/src/sem/bcknd/device/opencl/coef_kernel.cl index 673fc6aef39..714c9fad84f 100644 --- a/src/sem/bcknd/device/opencl/coef_kernel.cl +++ b/src/sem/bcknd/device/opencl/coef_kernel.cl @@ -1,3 +1,5 @@ +#ifndef __SEM_COEF_KERNEL_CL__ +#define __SEM_COEF_KERNEL_CL__ /* Copyright (c) 2022-2023, The Neko Authors All rights reserved. @@ -314,3 +316,5 @@ __kernel void coef_generate_drst_kernel(__global real * __restrict__ jac, } } + +#endif // __SEM_COEF_KERNEL_CL__ diff --git a/src/sem/coef.f90 b/src/sem/coef.f90 index 821e5fd60d9..0be5d82b575 100644 --- a/src/sem/coef.f90 +++ b/src/sem/coef.f90 @@ -30,7 +30,7 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Coefficients +!> Coefficients module coefs use gather_scatter use gs_ops @@ -48,7 +48,7 @@ module coefs use, intrinsic :: iso_c_binding implicit none private - + !> Coefficients defined on a given (mesh, \f$ X_h \f$) tuple. !! Arrays use indices (i,j,k,e): element e, local coordinate (i,j,k). type, public :: coef_t @@ -66,24 +66,24 @@ module coefs real(kind=rp), allocatable :: G23(:,:,:,:) real(kind=rp), allocatable :: mult(:,:,:,:) !< Multiplicity - !> generate mapping data between element and reference element + !> generate mapping data between element and reference element !! \f$ dx/dr, dy/dr, dz/dr \f$ !! \f$ dx/ds, dy/ds, dz/ds \f$ !! \f$ dx/dt, dy/dt, dz/dt \f$ - real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:) + real(kind=rp), allocatable :: dxdr(:,:,:,:), dydr(:,:,:,:), dzdr(:,:,:,:) real(kind=rp), allocatable :: dxds(:,:,:,:), dyds(:,:,:,:), dzds(:,:,:,:) - real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:) + real(kind=rp), allocatable :: dxdt(:,:,:,:), dydt(:,:,:,:), dzdt(:,:,:,:) !> \f$ dr/dx, dr/dy, dr/dz \f$ !! \f$ ds/dx, ds/dy, ds/dz \f$ !! \f$ dt/dx, dt/dy, dt/dz \f$ - real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:) + real(kind=rp), allocatable :: drdx(:,:,:,:), drdy(:,:,:,:), drdz(:,:,:,:) real(kind=rp), allocatable :: dsdx(:,:,:,:), dsdy(:,:,:,:), dsdz(:,:,:,:) - real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:) - + real(kind=rp), allocatable :: dtdx(:,:,:,:), dtdy(:,:,:,:), dtdz(:,:,:,:) + real(kind=rp), allocatable :: h1(:,:,:,:) !< Stiffness scaling real(kind=rp), allocatable :: h2(:,:,:,:) !< Mass scaling logical :: ifh2 !< True if h2 .ne. 0 - + real(kind=rp), allocatable :: jac(:,:,:,:) !< Jacobian real(kind=rp), allocatable :: jacinv(:,:,:,:) !< Inverted Jacobian real(kind=rp), allocatable :: B(:,:,:,:) !< Mass matrix/volume matrix @@ -93,10 +93,10 @@ module coefs real(kind=rp), allocatable :: nx(:,:,:,:) !< x-direction of facet normal real(kind=rp), allocatable :: ny(:,:,:,:) !< y-direction of facet normal real(kind=rp), allocatable :: nz(:,:,:,:) !< z-direction of facet normal - !> Pointers to main fields - + !> Pointers to main fields + real(kind=rp) :: volume - + type(space_t), pointer :: Xh => null() type(mesh_t), pointer :: msh => null() type(dofmap_t), pointer :: dof => null() @@ -104,8 +104,8 @@ module coefs ! ! Device pointers (if present) - ! - + ! + type(c_ptr) :: G11_d = C_NULL_PTR type(c_ptr) :: G22_d = C_NULL_PTR type(c_ptr) :: G33_d = C_NULL_PTR @@ -148,38 +148,39 @@ module coefs procedure, private, pass(this) :: init_all => coef_init_all procedure, pass(this) :: free => coef_free procedure, pass(this) :: get_normal => coef_get_normal + procedure, pass(this) :: get_area => coef_get_area generic :: init => init_empty, init_all end type coef_t - + contains - - !> Initialize empty coefs for a space and a mesh + + !> Initialize empty coefs for a space and a mesh subroutine coef_init_empty(this, Xh, msh) class(coef_t), intent(inout) :: this type(space_t), intent(inout), target :: Xh type(mesh_t), intent(inout), target :: msh - integer :: n + integer :: n call this%free() this%msh => msh this%Xh => Xh - + allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + ! ! Setup device memory (if present) ! - + n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then @@ -195,9 +196,9 @@ subroutine coef_init_empty(this, Xh, msh) call device_map(this%dtdx, this%dtdx_d, n) call device_map(this%dtdy, this%dtdy_d, n) call device_map(this%dtdz, this%dtdz_d, n) - + end if - + end subroutine coef_init_empty !> Initialize coefficients @@ -206,7 +207,7 @@ subroutine coef_init_all(this, gs_h) type(gs_t), intent(inout), target :: gs_h integer :: n, m call this%free() - + this%msh => gs_h%dofmap%msh this%Xh => gs_h%dofmap%Xh this%dof => gs_h%dofmap @@ -222,39 +223,39 @@ subroutine coef_init_all(this, gs_h) allocate(this%G12(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%G13(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%G23(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%dxdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dxds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dxdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%dydr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dyds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dydt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%dzdr(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dzds(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dzdt(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%drdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdx(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%drdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdy(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%drdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dsdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%dtdz(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%jac(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%jacinv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + allocate(this%area(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv)) allocate(this%nx(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv)) allocate(this%ny(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv)) allocate(this%nz(this%Xh%lx, this%Xh%ly, 6, this%msh%nelv)) - + allocate(this%B(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%Binv(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) @@ -262,12 +263,12 @@ subroutine coef_init_all(this, gs_h) allocate(this%h2(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) allocate(this%mult(this%Xh%lx, this%Xh%ly, this%Xh%lz, this%msh%nelv)) - + ! ! Setup device memory (if present) ! - + n = this%Xh%lx * this%Xh%ly * this%Xh%lz * this%msh%nelv if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%G11, this%G11_d, n) @@ -276,7 +277,7 @@ subroutine coef_init_all(this, gs_h) call device_map(this%G12, this%G12_d, n) call device_map(this%G13, this%G13_d, n) call device_map(this%G23, this%G23_d, n) - + call device_map(this%dxdr, this%dxdr_d, n) call device_map(this%dydr, this%dydr_d, n) call device_map(this%dzdr, this%dzdr_d, n) @@ -284,7 +285,7 @@ subroutine coef_init_all(this, gs_h) call device_map(this%dxds, this%dxds_d, n) call device_map(this%dyds, this%dyds_d, n) call device_map(this%dzds, this%dzds_d, n) - + call device_map(this%dxdt, this%dxdt_d, n) call device_map(this%dydt, this%dydt_d, n) call device_map(this%dzdt, this%dzdt_d, n) @@ -311,16 +312,16 @@ subroutine coef_init_all(this, gs_h) call device_map(this%Binv, this%Binv_d, n) m = this%Xh%lx * this%Xh%ly * 6 * this%msh%nelv - + call device_map(this%area, this%area_d, m) call device_map(this%nx, this%nx_d, m) call device_map(this%ny, this%ny_d, m) call device_map(this%nz, this%nz_d, m) - + end if call coef_generate_dxyzdrst(this) - + call coef_generate_geo(this) call coef_generate_area_and_normal(this) @@ -333,15 +334,17 @@ subroutine coef_init_all(this, gs_h) if (NEKO_BCKND_DEVICE .eq. 1) then call device_rone(this%h1_d, n) call device_rone(this%h2_d, n) - call device_memcpy(this%h1, this%h1_d, n, DEVICE_TO_HOST) - call device_memcpy(this%h2, this%h2_d, n, DEVICE_TO_HOST) + call device_memcpy(this%h1, this%h1_d, n, & + DEVICE_TO_HOST, sync=.false.) + call device_memcpy(this%h2, this%h2_d, n, & + DEVICE_TO_HOST, sync=.false.) else call rone(this%h1,n) call rone(this%h2,n) end if this%ifh2 = .false. - + ! ! Set up multiplicity ! @@ -350,13 +353,14 @@ subroutine coef_init_all(this, gs_h) else call rone(this%mult, n) end if - + call gs_h%op(this%mult, n, GS_OP_ADD) if (NEKO_BCKND_DEVICE .eq. 1) then call device_invcol1(this%mult_d, n) - call device_memcpy(this%mult, this%mult_d, n, DEVICE_TO_HOST, sync=.true.) - else + call device_memcpy(this%mult, this%mult_d, n, & + DEVICE_TO_HOST, sync=.true.) + else call invcol1(this%mult, n) end if @@ -385,7 +389,7 @@ subroutine coef_free(this) if (allocated(this%G13)) then deallocate(this%G13) end if - + if (allocated(this%G23)) then deallocate(this%G23) end if @@ -393,99 +397,99 @@ subroutine coef_free(this) if (allocated(this%mult)) then deallocate(this%mult) end if - + if (allocated(this%B)) then deallocate(this%B) end if - + if (allocated(this%Binv)) then deallocate(this%Binv) end if - + if(allocated(this%dxdr)) then deallocate(this%dxdr) end if - + if(allocated(this%dxds)) then deallocate(this%dxds) end if - + if(allocated(this%dxdt)) then deallocate(this%dxdt) end if - + if(allocated(this%dydr)) then deallocate(this%dydr) end if - + if(allocated(this%dyds)) then deallocate(this%dyds) end if - + if(allocated(this%dydt)) then deallocate(this%dydt) end if - + if(allocated(this%dzdr)) then deallocate(this%dzdr) end if - + if(allocated(this%dzds)) then deallocate(this%dzds) end if - + if(allocated(this%dzdt)) then deallocate(this%dzdt) end if - + if(allocated(this%drdx)) then deallocate(this%drdx) end if - + if(allocated(this%dsdx)) then deallocate(this%dsdx) end if - + if(allocated(this%dtdx)) then deallocate(this%dtdx) end if - + if(allocated(this%drdy)) then deallocate(this%drdy) end if - + if(allocated(this%dsdy)) then deallocate(this%dsdy) end if - + if(allocated(this%dtdy)) then deallocate(this%dtdy) end if - + if(allocated(this%drdz)) then deallocate(this%drdz) end if - + if(allocated(this%dsdz)) then deallocate(this%dsdz) end if - + if(allocated(this%dtdz)) then deallocate(this%dtdz) end if - + if(allocated(this%jac)) then deallocate(this%jac) end if - + if(allocated(this%jacinv)) then deallocate(this%jacinv) end if - + if(allocated(this%h1)) then deallocate(this%h1) end if - + if(allocated(this%h2)) then deallocate(this%h2) end if @@ -505,8 +509,8 @@ subroutine coef_free(this) if (allocated(this%nz)) then deallocate(this%nz) end if - - + + nullify(this%msh) nullify(this%Xh) nullify(this%dof) @@ -514,7 +518,7 @@ subroutine coef_free(this) ! ! Cleanup the device (if present) ! - + if (c_associated(this%G11_d)) then call device_free(this%G11_d) end if @@ -562,7 +566,7 @@ subroutine coef_free(this) if (c_associated(this%dzds_d)) then call device_free(this%dzds_d) end if - + if (c_associated(this%dxdt_d)) then call device_free(this%dxdt_d) end if @@ -598,7 +602,7 @@ subroutine coef_free(this) if (c_associated(this%dsdz_d)) then call device_free(this%dsdz_d) end if - + if (c_associated(this%dtdx_d)) then call device_free(this%dtdx_d) end if @@ -610,7 +614,7 @@ subroutine coef_free(this) if (c_associated(this%dtdz_d)) then call device_free(this%dtdz_d) end if - + if (c_associated(this%mult_d)) then call device_free(this%mult_d) end if @@ -618,7 +622,7 @@ subroutine coef_free(this) if (c_associated(this%h1_d)) then call device_free(this%h1_d) end if - + if (c_associated(this%h2_d)) then call device_free(this%h2_d) end if @@ -630,11 +634,11 @@ subroutine coef_free(this) if (c_associated(this%jacinv_d)) then call device_free(this%jacinv_d) end if - + if (c_associated(this%B_d)) then call device_free(this%B_d) end if - + if (c_associated(this%Binv_d)) then call device_free(this%Binv_d) end if @@ -642,7 +646,7 @@ subroutine coef_free(this) if (c_associated(this%area_d)) then call device_free(this%area_d) end if - + if (c_associated(this%nx_d)) then call device_free(this%nx_d) end if @@ -654,18 +658,18 @@ subroutine coef_free(this) if (c_associated(this%nz_d)) then call device_Free(this%nz_d) end if - + end subroutine coef_free subroutine coef_generate_dxyzdrst(c) type(coef_t), intent(inout) :: c integer :: e,i,lxy,lyz,ntot - + lxy=c%Xh%lx*c%Xh%ly lyz=c%Xh%ly*c%Xh%lz ntot = c%dof%size() - + associate(drdx => c%drdx, drdy => c%drdy, drdz => c%drdz, & dsdx => c%dsdx, dsdy => c%dsdy, dsdz => c%dsdz, & dtdx => c%dtdx, dtdy => c%dtdy, dtdz => c%dtdz, & @@ -678,7 +682,7 @@ subroutine coef_generate_dxyzdrst(c) dyt => c%Xh%dyt, dzt => c%Xh%dzt, & jacinv => c%jacinv, jac => c%jac) - if (NEKO_BCKND_DEVICE .eq. 1) then + if (NEKO_BCKND_DEVICE .eq. 1) then call device_coef_generate_dxydrst(c%drdx_d, c%drdy_d, c%drdz_d, & c%dsdx_d, c%dsdy_d, c%dsdz_d, c%dtdx_d, c%dtdy_d, c%dtdz_d, & @@ -687,39 +691,40 @@ subroutine coef_generate_dxyzdrst(c) c%dof%x_d, c%dof%y_d, c%dof%z_d, c%jacinv_d, c%jac_d, & c%Xh%lx, c%msh%nelv) - call device_memcpy(dxdr, c%dxdr_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dydr, c%dydr_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dzdr, c%dzdr_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dxds, c%dxds_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dyds, c%dyds_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dzds, c%dzds_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dxdt, c%dxdt_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dydt, c%dydt_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dzdt, c%dzdt_d, ntot, DEVICE_TO_HOST) - call device_memcpy(drdx, c%drdx_d, ntot, DEVICE_TO_HOST) - call device_memcpy(drdy, c%drdy_d, ntot, DEVICE_TO_HOST) - call device_memcpy(drdz, c%drdz_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dsdx, c%dsdx_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dsdy, c%dsdy_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dsdz, c%dsdz_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dtdx, c%dtdx_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dtdy, c%dtdy_d, ntot, DEVICE_TO_HOST) - call device_memcpy(dtdz, c%dtdz_d, ntot, DEVICE_TO_HOST) - call device_memcpy(jac, c%jac_d, ntot, DEVICE_TO_HOST) - call device_memcpy(jacinv, c%jacinv_d, ntot, DEVICE_TO_HOST, sync=.true.) + call device_memcpy(dxdr, c%dxdr_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dydr, c%dydr_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dzdr, c%dzdr_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dxds, c%dxds_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dyds, c%dyds_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dzds, c%dzds_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dxdt, c%dxdt_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dydt, c%dydt_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dzdt, c%dzdt_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(drdx, c%drdx_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(drdy, c%drdy_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(drdz, c%drdz_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dsdx, c%dsdx_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dsdy, c%dsdy_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dsdz, c%dsdz_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dtdx, c%dtdx_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dtdy, c%dtdy_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(dtdz, c%dtdz_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(jac, c%jac_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(jacinv, c%jacinv_d, ntot, & + DEVICE_TO_HOST, sync=.true.) else do e = 1, c%msh%nelv call mxm(dx, lx, x(1,1,1,e), lx, dxdr(1,1,1,e), lyz) call mxm(dx, lx, y(1,1,1,e), lx, dydr(1,1,1,e), lyz) call mxm(dx, lx, z(1,1,1,e), lx, dzdr(1,1,1,e), lyz) - + do i = 1, lz call mxm(x(1,1,i,e), lx, dyt, ly, dxds(1,1,i,e), ly) call mxm(y(1,1,i,e), lx, dyt, ly, dyds(1,1,i,e), ly) call mxm(z(1,1,i,e), lx, dyt, ly, dzds(1,1,i,e), ly) end do - + ! We actually take 2d into account, wow, need to do that for the rest. if(c%msh%gdim .eq. 3) then call mxm(x(1,1,1,e), lxy, dzt, lz, dxdt(1,1,1,e), lz) @@ -747,11 +752,11 @@ subroutine coef_generate_dxyzdrst(c) call rone (dtdz, ntot) else - do i = 1, ntot + do i = 1, ntot c%jac(i, 1, 1, 1) = 0.0_rp end do - do i = 1, ntot + do i = 1, ntot c%jac(i, 1, 1, 1) = c%jac(i, 1, 1, 1) + ( c%dxdr(i, 1, 1, 1) & * c%dyds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) ) @@ -779,7 +784,7 @@ subroutine coef_generate_dxyzdrst(c) c%drdy(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) & - c%dxds(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) - + c%drdz(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) & - c%dxdt(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) end do @@ -790,18 +795,18 @@ subroutine coef_generate_dxyzdrst(c) c%dsdy(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dzdt(i, 1, 1, 1) & - c%dxdt(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) - + c%dsdz(i, 1, 1, 1) = c%dxdt(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) & - c%dxdr(i, 1, 1, 1) * c%dydt(i, 1, 1, 1) end do - do i = 1, ntot + do i = 1, ntot c%dtdx(i, 1, 1, 1) = c%dydr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) & - c%dyds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) c%dtdy(i, 1, 1, 1) = c%dxds(i, 1, 1, 1) * c%dzdr(i, 1, 1, 1) & - c%dxdr(i, 1, 1, 1) * c%dzds(i, 1, 1, 1) - + c%dtdz(i, 1, 1, 1) = c%dxdr(i, 1, 1, 1) * c%dyds(i, 1, 1, 1) & - c%dxds(i, 1, 1, 1) * c%dydr(i, 1, 1, 1) end do @@ -810,9 +815,9 @@ subroutine coef_generate_dxyzdrst(c) call invers2(jacinv, jac, ntot) end if end associate - + end subroutine coef_generate_dxyzdrst - + !> Generate geometric data for the given mesh !! @note Current implementation assumes regular shaped hex elements subroutine coef_generate_geo(c) @@ -832,25 +837,25 @@ subroutine coef_generate_geo(c) c%jacinv_d, c%Xh%w3_d, c%msh%nelv, & c%Xh%lx, c%msh%gdim) - call device_memcpy(c%G11, c%G11_d, ntot, DEVICE_TO_HOST) - call device_memcpy(c%G22, c%G22_d, ntot, DEVICE_TO_HOST) - call device_memcpy(c%G33, c%G33_d, ntot, DEVICE_TO_HOST) - call device_memcpy(c%G12, c%G12_d, ntot, DEVICE_TO_HOST) - call device_memcpy(c%G13, c%G13_d, ntot, DEVICE_TO_HOST) + call device_memcpy(c%G11, c%G11_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(c%G22, c%G22_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(c%G33, c%G33_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(c%G12, c%G12_d, ntot, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(c%G13, c%G13_d, ntot, DEVICE_TO_HOST, sync=.false.) call device_memcpy(c%G23, c%G23_d, ntot, DEVICE_TO_HOST, sync=.true.) - + else if(c%msh%gdim .eq. 2) then do i = 1, ntot c%G11(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%drdx(i, 1, 1, 1) & - + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) + + c%drdy(i, 1, 1, 1) * c%drdy(i, 1, 1, 1) c%G22(i, 1, 1, 1) = c%dsdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) & + c%dsdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) c%G12(i, 1, 1, 1) = c%drdx(i, 1, 1, 1) * c%dsdx(i, 1, 1, 1) & - + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) + + c%drdy(i, 1, 1, 1) * c%dsdy(i, 1, 1, 1) end do do i = 1, ntot @@ -864,12 +869,12 @@ subroutine coef_generate_geo(c) do e = 1, c%msh%nelv do i = 1, lxyz - c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1) - c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1) - c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1) - end do - end do - + c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1) + c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1) + c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1) + end do + end do + else do i = 1, ntot @@ -917,7 +922,7 @@ subroutine coef_generate_geo(c) c%G11(i,1,1,e) = c%G11(i,1,1,e) * c%Xh%w3(i,1,1) c%G22(i,1,1,e) = c%G22(i,1,1,e) * c%Xh%w3(i,1,1) c%G12(i,1,1,e) = c%G12(i,1,1,e) * c%Xh%w3(i,1,1) - + c%G33(i,1,1,e) = c%G33(i,1,1,e) * c%Xh%w3(i,1,1) c%G13(i,1,1,e) = c%G13(i,1,1,e) * c%Xh%w3(i,1,1) c%G23(i,1,1,e) = c%G23(i,1,1,e) * c%Xh%w3(i,1,1) @@ -926,18 +931,18 @@ subroutine coef_generate_geo(c) end if end if - + end subroutine coef_generate_geo - + !> Generate mass matrix B for the given mesh and space !! @note This is also a stapleholder, we need to go through the coef class properly. subroutine coef_generate_mass(c) type(coef_t), intent(inout) :: c integer :: e, i, lxyz, ntot - + lxyz = c%Xh%lx * c%Xh%ly * c%Xh%lz ntot = c%dof%size() - + !> @todo rewrite this nest into a device kernel do e = 1, c%msh%nelv ! Here we need to handle things differently for axis symmetric elements @@ -948,15 +953,15 @@ subroutine coef_generate_mass(c) end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(c%B, c%B_d, ntot, HOST_TO_DEVICE) - call device_memcpy(c%Binv, c%Binv_d, ntot, HOST_TO_DEVICE) + call device_memcpy(c%B, c%B_d, ntot, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c%Binv, c%Binv_d, ntot, HOST_TO_DEVICE, sync=.false.) end if - + call c%gs_h%op(c%Binv, ntot, GS_OP_ADD) if (NEKO_BCKND_DEVICE .eq. 1) then call device_invcol1(c%Binv_d, ntot) - call device_memcpy(c%Binv, c%Binv_d, ntot, DEVICE_TO_HOST) + call device_memcpy(c%Binv, c%Binv_d, ntot, DEVICE_TO_HOST, sync=.true.) else call invcol1(c%Binv, ntot) end if @@ -974,22 +979,38 @@ pure function coef_get_normal(this, i, j, k, e, facet) result(normal) class(coef_t), intent(in) :: this integer, intent(in) :: i, j, k, e, facet real(kind=rp) :: normal(3) + + select case (facet) + case(1,2) + normal(1) = this%nx(j, k, facet, e) + normal(2) = this%ny(j, k, facet, e) + normal(3) = this%nz(j, k, facet, e) + case(3,4) + normal(1) = this%nx(i, k, facet, e) + normal(2) = this%ny(i, k, facet, e) + normal(3) = this%nz(i, k, facet, e) + case(5,6) + normal(1) = this%nx(i, j, facet, e) + normal(2) = this%ny(i, j, facet, e) + normal(3) = this%nz(i, j, facet, e) + end select + end function coef_get_normal + + pure function coef_get_area(this, i, j, k, e, facet) result(area) + class(coef_t), intent(in) :: this + integer, intent(in) :: i, j, k, e, facet + real(kind=rp) :: area select case (facet) case(1,2) - normal(1) = this%nx(j, k, facet, e) - normal(2) = this%ny(j, k, facet, e) - normal(3) = this%nz(j, k, facet, e) + area = this%area(j, k, facet, e) case(3,4) - normal(1) = this%nx(i, k, facet, e) - normal(2) = this%ny(i, k, facet, e) - normal(3) = this%nz(i, k, facet, e) + area = this%area(i, k, facet, e) case(5,6) - normal(1) = this%nx(i, j, facet, e) - normal(2) = this%ny(i, j, facet, e) - normal(3) = this%nz(i, j, facet, e) + area = this%area(i, j, facet, e) end select - end function coef_get_normal + end function coef_get_area + !> Generate facet area and surface normals subroutine coef_generate_area_and_normal(coef) @@ -1002,7 +1023,7 @@ subroutine coef_generate_area_and_normal(coef) real(kind=rp) :: weight, len n = coef%dof%size() lx = coef%Xh%lx - + allocate(a(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv)) allocate(b(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv)) allocate(c(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx, coef%msh%nelv)) @@ -1012,7 +1033,7 @@ subroutine coef_generate_area_and_normal(coef) do i = 1, n a(i, 1, 1, 1) = coef%dyds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) & - coef%dzds(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) - + b(i, 1, 1, 1) = coef%dzds(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) & - coef%dxds(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) @@ -1046,7 +1067,7 @@ subroutine coef_generate_area_and_normal(coef) do i = 1, n a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) & - coef%dzdr(i, 1, 1, 1) * coef%dydt(i, 1, 1, 1) - + b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxdt(i, 1, 1, 1) & - coef%dxdr(i, 1, 1, 1) * coef%dzdt(i, 1, 1, 1) @@ -1071,7 +1092,7 @@ subroutine coef_generate_area_and_normal(coef) coef%ny(j,k, 3, e) = B(j, 1, k, e) coef%ny(j,k, 4, e) = -B(j, lx, k, e) coef%nz(j,k, 3, e) = C(j, 1, k, e) - coef%nz(j,k, 4, e) = -C(j, lx, k, e) + coef%nz(j,k, 4, e) = -C(j, lx, k, e) end do end do end do @@ -1080,7 +1101,7 @@ subroutine coef_generate_area_and_normal(coef) do i = 1, n a(i, 1, 1, 1) = coef%dydr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) & - coef%dzdr(i, 1, 1, 1) * coef%dyds(i, 1, 1, 1) - + b(i, 1, 1, 1) = coef%dzdr(i, 1, 1, 1) * coef%dxds(i, 1, 1, 1) & - coef%dxdr(i, 1, 1, 1) * coef%dzds(i, 1, 1, 1) @@ -1105,11 +1126,11 @@ subroutine coef_generate_area_and_normal(coef) coef%ny(j,k, 5, e) = -B(j, k, 1, e) coef%ny(j,k, 6, e) = B(j, k, lx, e) coef%nz(j,k, 5, e) = -C(j, k, 1, e) - coef%nz(j,k, 6, e) = C(j, k, lx, e) + coef%nz(j,k, 6, e) = C(j, k, lx, e) end do end do end do - + ! Normalize do j = 1, size(coef%nz) len = sqrt(coef%nx(j,1,1,1)**2 + & @@ -1128,13 +1149,17 @@ subroutine coef_generate_area_and_normal(coef) !> @todo cleanup once we have device math in place if (NEKO_BCKND_DEVICE .eq. 1) then n = size(coef%area) - call device_memcpy(coef%area, coef%area_d, n, HOST_TO_DEVICE) - call device_memcpy(coef%nx, coef%nx_d, n, HOST_TO_DEVICE) - call device_memcpy(coef%ny, coef%ny_d, n, HOST_TO_DEVICE) - call device_memcpy(coef%nz, coef%nz_d, n, HOST_TO_DEVICE) + call device_memcpy(coef%area, coef%area_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(coef%nx, coef%nx_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(coef%ny, coef%ny_d, n, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(coef%nz, coef%nz_d, n, & + HOST_TO_DEVICE, sync=.false.) end if - + end subroutine coef_generate_area_and_normal - - + + end module coefs diff --git a/src/sem/cpr.f90 b/src/sem/cpr.f90 index a933013128e..09ad81306cb 100644 --- a/src/sem/cpr.f90 +++ b/src/sem/cpr.f90 @@ -52,9 +52,9 @@ module cpr real(kind=rp), allocatable :: v(:,:) !< Transformation matrix real(kind=rp), allocatable :: vt(:,:) !< Transformation matrix transposed - real(kind=rp), allocatable :: vinv(:,:) !< Transformation matrix inversed + real(kind=rp), allocatable :: vinv(:,:) !< Transformation matrix inversed real(kind=rp), allocatable :: vinvt(:,:) !< Transformation matrix - !! inversed and transposed + !! inversed and transposed real(kind=rp), allocatable :: w(:,:) !< Diagonal matrix with weights real(kind=rp), allocatable :: fldhat(:,:,:,:) !< transformed Field data @@ -147,7 +147,7 @@ subroutine cpr_generate_specmat(cpr) real(kind=rp) :: L(0:cpr%Xh%lx-1) real(kind=rp) :: delta(cpr%Xh%lx) integer :: i, kj, j, j2, kk - character(len=LOG_SIZE) :: log_buf + character(len=LOG_SIZE) :: log_buf associate(Xh => cpr%Xh, v=> cpr%v, vt => cpr%vt, & vinv => cpr%vinv, vinvt => cpr%vinvt, w => cpr%w) @@ -159,14 +159,14 @@ subroutine cpr_generate_specmat(cpr) L(1) = Xh%zg(j,1) do j2 = 2, Xh%lx-1 L(j2) = ( (2*j2-1) * Xh%zg(j,1) * L(j2-1) & - - (j2-1) * L(j2-2) ) / j2 + - (j2-1) * L(j2-2) ) / j2 end do do kk = 1, Xh%lx kj = kj+1 v(kj,1) = L(KK-1) end do end do - + ! transpose the matrix call trsp1(v, Xh%lx) !< non orthogonal wrt weights @@ -174,24 +174,24 @@ subroutine cpr_generate_specmat(cpr) do i = 1, Xh%lx delta(i) = 2.0_rp / (2*(i-1)+1) end do - ! modify last entry + ! modify last entry delta(Xh%lx) = 2.0_rp / (Xh%lx-1) - + ! calculate the inverse to multiply the matrix do i = 1, Xh%lx delta(i) = sqrt(1.0_rp / delta(i)) end do - ! scale the matrix + ! scale the matrix do i = 1, Xh%lx do j = 1, Xh%lx v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights end do end do - + ! get the trasposed call copy(vt, v, Xh%lx * Xh%lx) call trsp1(vt, Xh%lx) - + !populate the mass matrix kk = 1 do i = 1, Xh%lx @@ -204,15 +204,15 @@ subroutine cpr_generate_specmat(cpr) end if end do end do - + !Get the inverse of the transform matrix call mxm(vt, Xh%lx, w, Xh%lx, vinv, Xh%lx) - + !get the transposed of the inverse call copy(vinvt, vinv, Xh%lx * Xh%lx) call trsp1(vinvt, Xh%lx) end associate - + end subroutine cpr_generate_specmat @@ -225,14 +225,14 @@ subroutine cpr_goto_space(cpr, space) real(kind=rp) :: specmat(cpr%Xh%lx,cpr%Xh%lx) real(kind=rp) :: specmatt(cpr%Xh%lx,cpr%Xh%lx) integer :: i, j, k, e, nxyz, nelv - character(len=LOG_SIZE) :: log_buf - character(len=4) :: space + character(len=LOG_SIZE) :: log_buf + character(len=4) :: space ! define some constants nxyz = cpr%Xh%lx*cpr%Xh%lx*cpr%Xh%lx nelv = cpr%msh%nelv - ! Define the matrix according to which transform to do + ! Define the matrix according to which transform to do if (space .eq. 'spec') then call copy(specmat, cpr%vinv, cpr%Xh%lx*cpr%Xh%lx) call copy(specmatt, cpr%vinvt, cpr%Xh%lx*cpr%Xh%lx) @@ -271,15 +271,15 @@ subroutine cpr_truncate_wn(cpr, coef) real(kind=rp) :: vsort(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) real(kind=rp) :: vtrunc(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) real(kind=rp) :: vtemp(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) - real(kind=rp) :: errvec(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) - real(kind=rp) :: fx(cpr%Xh%lx, cpr%Xh%lx) - real(kind=rp) :: fy(cpr%Xh%lx, cpr%Xh%lx) - real(kind=rp) :: fz(cpr%Xh%lx, cpr%Xh%lx) + real(kind=rp) :: errvec(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) + real(kind=rp) :: fx(cpr%Xh%lx, cpr%Xh%lx) + real(kind=rp) :: fy(cpr%Xh%lx, cpr%Xh%lx) + real(kind=rp) :: fz(cpr%Xh%lx, cpr%Xh%lx) real(kind=rp) :: l2norm, oldl2norm, targeterr integer :: isort(cpr%Xh%lx, cpr%Xh%lx, cpr%Xh%lx) integer :: i, j, k, e, nxyz, nelv integer :: kut, kutx, kuty, kutz, nx - character(len=LOG_SIZE) :: log_buf + character(len=LOG_SIZE) :: log_buf ! define some constants nx = cpr%Xh%lx @@ -293,7 +293,7 @@ subroutine cpr_truncate_wn(cpr, coef) call copy(vtemp, cpr%fldhat(1,1,1,e), nxyz) call copy(vtrunc, cpr%fldhat(1,1,1,e), nxyz) ! sort the coefficients by absolute value - call sortcoeff(vsort, cpr%fldhat(1,1,1,e), isort, nxyz) + call sortcoeff(vsort, cpr%fldhat(1,1,1,e), isort, nxyz) ! initialize values for iterative procedure l2norm = 0.0_rp kut = 0 @@ -374,10 +374,10 @@ subroutine cpr_truncate_wn(cpr, coef) end subroutine cpr_truncate_wn - !> Sort the spectral coefficient in descending order + !> Sort the spectral coefficient in descending order !! array vsort. The original indices are stored in the isort vector. - subroutine sortcoeff(vsort, v, isort, nxyz) - integer, intent(in) :: nxyz + subroutine sortcoeff(vsort, v, isort, nxyz) + integer, intent(in) :: nxyz real(kind=rp), intent(inout) :: vsort(nxyz) real(kind=rp), intent(inout) :: v(nxyz) integer, intent(inout) :: isort(nxyz) @@ -404,9 +404,9 @@ subroutine sortcoeff(vsort, v, isort, nxyz) end subroutine sortcoeff - !> Flip vector b and ind + !> Flip vector b and ind subroutine flipv(b, ind, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), intent(inout) :: b(n) integer, intent(inout) :: ind(n) real(kind=rp) :: temp(n) @@ -427,7 +427,7 @@ end subroutine flipv !> sort the array acording to ind vector subroutine swap(b, ind, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), intent(inout) :: b(n) integer, intent(inout) :: ind(n) real(kind=rp) :: temp(n) @@ -445,7 +445,7 @@ end subroutine swap !> reorder the array - inverse of swap subroutine reord(b, ind, n) - integer, intent(in) :: n + integer, intent(in) :: n real(kind=rp), intent(inout) :: b(n) integer, intent(inout) :: ind(n) real(kind=rp) :: temp(n) @@ -461,10 +461,10 @@ subroutine reord(b, ind, n) end subroutine reord - !> create filter transfer function - subroutine build_filter_tf(fx, fy, fz, kut, lx) - integer, intent(in) :: lx - integer, intent(in) :: kut + !> create filter transfer function + subroutine build_filter_tf(fx, fy, fz, kut, lx) + integer, intent(in) :: lx + integer, intent(in) :: kut real(kind=rp), intent(inout) :: fx(lx,lx) real(kind=rp), intent(inout) :: fy(lx,lx) real(kind=rp), intent(inout) :: fz(lx,lx) @@ -532,7 +532,7 @@ function get_elem_l2norm(elemdata, coef, space, e) result(l2e) real(kind=rp) :: elemdata(coef%Xh%lx, coef%Xh%lx, coef%Xh%lx) real(kind=rp) :: vole, suma, l2e integer i, e, eg, nxyz - character(len=4) :: space + character(len=4) :: space ! Get the volume of the element nxyz = coef%Xh%lx*coef%Xh%lx*coef%Xh%lx diff --git a/src/sem/dofmap.f90 b/src/sem/dofmap.f90 index 36d136005bb..16f4e0b4018 100644 --- a/src/sem/dofmap.f90 +++ b/src/sem/dofmap.f90 @@ -46,7 +46,7 @@ module dofmap use element, only : element_t use quad, only : quad_t use hex, only : hex_t - use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR + use, intrinsic :: iso_c_binding, only : c_ptr, C_NULL_PTR implicit none private @@ -76,7 +76,7 @@ module dofmap interface dofmap_t module procedure dofmap_init end interface dofmap_t - + contains function dofmap_init(msh, Xh) result(this) @@ -96,11 +96,11 @@ function dofmap_init(msh, Xh) result(this) this%Xh => Xh this%ntot = Xh%lx* Xh%ly * Xh%lz * msh%nelv - + ! ! Assign a unique id for all dofs - ! - + ! + allocate(this%dof(Xh%lx, Xh%ly, Xh%lz, msh%nelv)) allocate(this%shared_dof(Xh%lx, Xh%ly, Xh%lz, msh%nelv)) @@ -120,28 +120,31 @@ function dofmap_init(msh, Xh) result(this) ! ! Generate x,y,z-coordinates for all dofs ! - + allocate(this%x(Xh%lx, Xh%ly, Xh%lz, msh%nelv)) allocate(this%y(Xh%lx, Xh%ly, Xh%lz, msh%nelv)) allocate(this%z(Xh%lx, Xh%ly, Xh%lz, msh%nelv)) - + this%x = 0d0 this%y = 0d0 this%z = 0d0 !> @note should be intialised differently in axissymmetric case - call dofmap_generate_xyz(this) + call dofmap_generate_xyz(this) if (NEKO_BCKND_DEVICE .eq. 1) then call device_map(this%x, this%x_d, this%ntot) call device_map(this%y, this%y_d, this%ntot) call device_map(this%z, this%z_d, this%ntot) - call device_memcpy(this%x, this%x_d, this%ntot, HOST_TO_DEVICE) - call device_memcpy(this%y, this%y_d, this%ntot, HOST_TO_DEVICE) - call device_memcpy(this%z, this%z_d, this%ntot, HOST_TO_DEVICE) + call device_memcpy(this%x, this%x_d, this%ntot, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%y, this%y_d, this%ntot, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%z, this%z_d, this%ntot, & + HOST_TO_DEVICE, sync=.false.) end if - + end function dofmap_init !> Deallocate the dofmap @@ -185,7 +188,7 @@ subroutine dofmap_free(this) if (c_associated(this%z_d)) then call device_free(this%z_d) end if - + end subroutine dofmap_free !> Return the total number of dofs in the dofmap, lx*ly*lz*nelv @@ -222,15 +225,15 @@ subroutine dofmap_number_edges(this) type(mesh_t), pointer :: msh type(space_t), pointer :: Xh integer :: i,j,k - integer :: global_id + integer :: global_id type(tuple_i4_t) :: edge integer(kind=i8) :: num_dofs_edges(3) ! #dofs for each dir (r, s, t) integer(kind=i8) :: edge_id, edge_offset logical :: shared_dof - + msh => this%msh Xh => this%Xh - + ! Number of dofs on an edge excluding end-points num_dofs_edges(1) = int(Xh%lx - 2, i8) num_dofs_edges(2) = int(Xh%ly - 2, i8) @@ -238,12 +241,12 @@ subroutine dofmap_number_edges(this) edge_offset = int(msh%glb_mpts, i8) + int(1, 4) do i = 1, msh%nelv - + select type(ep=>msh%elements(i)%e) type is (hex_t) ! ! Number edges in r-direction - ! + ! call ep%edge_id(edge, 1) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -256,7 +259,7 @@ subroutine dofmap_number_edges(this) this%shared_dof(k, 1, 1, i) = shared_dof edge_id = edge_id + 1 end do - + call ep%edge_id(edge, 3) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -296,7 +299,7 @@ subroutine dofmap_number_edges(this) ! ! Number edges in s-direction - ! + ! call ep%edge_id(edge, 5) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -308,7 +311,7 @@ subroutine dofmap_number_edges(this) this%shared_dof(1, k, 1, i) = shared_dof edge_id = edge_id + 1 end do - + call ep%edge_id(edge, 7) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -339,7 +342,7 @@ subroutine dofmap_number_edges(this) edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(2) do j = 2, Xh%ly - 1 k = j - if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,Xh%lz,i)) k = Xh%lz+1-j + if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,Xh%lz,i)) k = Xh%lz+1-j this%dof(Xh%lx, k, Xh%lz, i) = edge_id this%shared_dof(Xh%lx, k, Xh%lz, i) = shared_dof edge_id = edge_id + 1 @@ -347,26 +350,26 @@ subroutine dofmap_number_edges(this) ! ! Number edges in t-direction - ! + ! call ep%edge_id(edge, 9) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3) do j = 2, Xh%lz - 1 k = j - if(int(edge%x(1), i8) .ne. this%dof(1,1,1,i)) k = Xh%lz+1-j + if(int(edge%x(1), i8) .ne. this%dof(1,1,1,i)) k = Xh%lz+1-j this%dof(1, 1, k, i) = edge_id this%shared_dof(1, 1, k, i) = shared_dof edge_id = edge_id + 1 end do - + call ep%edge_id(edge, 10) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3) do j = 2, Xh%lz - 1 k = j - if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,1,i)) k = Xh%lz+1-j + if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,1,1,i)) k = Xh%lz+1-j this%dof(Xh%lx, 1, k, i) = edge_id this%shared_dof(Xh%lx, 1, k, i) = shared_dof edge_id = edge_id + 1 @@ -378,7 +381,7 @@ subroutine dofmap_number_edges(this) edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3) do j = 2, Xh%lz - 1 k = j - if(int(edge%x(1), i8) .ne. this%dof(1,Xh%ly,1,i)) k = Xh%lz+1-j + if(int(edge%x(1), i8) .ne. this%dof(1,Xh%ly,1,i)) k = Xh%lz+1-j this%dof(1, Xh%ly, k, i) = edge_id this%shared_dof(1, Xh%ly, k, i) = shared_dof edge_id = edge_id + 1 @@ -390,7 +393,7 @@ subroutine dofmap_number_edges(this) edge_id = edge_offset + int((global_id - 1), i8) * num_dofs_edges(3) do j = 2, Xh%lz - 1 k = j - if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,Xh%ly,1,i)) k = Xh%lz+1-j + if(int(edge%x(1), i8) .ne. this%dof(Xh%lx,Xh%ly,1,i)) k = Xh%lz+1-j this%dof(Xh%lx, Xh%ly, k, i) = edge_id this%shared_dof(Xh%lx, Xh%ly, k, i) = shared_dof edge_id = edge_id + 1 @@ -398,7 +401,7 @@ subroutine dofmap_number_edges(this) type is (quad_t) ! ! Number edges in r-direction - ! + ! call ep%facet_id(edge, 3) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -426,7 +429,7 @@ subroutine dofmap_number_edges(this) ! ! Number edges in s-direction - ! + ! call ep%facet_id(edge, 1) shared_dof = msh%is_shared(edge) global_id = msh%get_global(edge) @@ -450,9 +453,9 @@ subroutine dofmap_number_edges(this) this%shared_dof(Xh%lx, k, 1, i) = shared_dof edge_id = edge_id + 1 end do - + end select - + end do end subroutine dofmap_number_edges @@ -460,13 +463,13 @@ end subroutine dofmap_number_edges subroutine dofmap_number_faces(this) type(dofmap_t), target :: this type(mesh_t), pointer :: msh - type(space_t), pointer :: Xh + type(space_t), pointer :: Xh integer :: i,j,k integer :: global_id type(tuple4_i4_t) :: face, face_order - integer(kind=i8) :: num_dofs_faces(3) ! #dofs for each dir (r, s, t) + integer(kind=i8) :: num_dofs_faces(3) ! #dofs for each dir (r, s, t) integer(kind=i8) :: facet_offset, facet_id - logical :: shared_dof + logical :: shared_dof msh => this%msh Xh => this%Xh @@ -481,7 +484,7 @@ subroutine dofmap_number_faces(this) num_dofs_faces(3) = int((Xh%lx - 2) * (Xh%ly - 2), i8) do i = 1, msh%nelv - + ! ! Number facets in r-direction (s, t)-plane ! @@ -497,7 +500,7 @@ subroutine dofmap_number_faces(this) this%shared_dof(1, j, k, i) = shared_dof end do end do - + call msh%elements(i)%e%facet_id(face, 2) call msh%elements(i)%e%facet_order(face_order, 2) shared_dof = msh%is_shared(face) @@ -527,7 +530,7 @@ subroutine dofmap_number_faces(this) this%shared_dof(j, 1, k, i) = shared_dof end do end do - + call msh%elements(i)%e%facet_id(face, 4) call msh%elements(i)%e%facet_order(face_order, 4) shared_dof = msh%is_shared(face) @@ -557,7 +560,7 @@ subroutine dofmap_number_faces(this) this%shared_dof(j, k, 1, i) = shared_dof end do end do - + call msh%elements(i)%e%facet_id(face, 6) call msh%elements(i)%e%facet_order(face_order, 6) shared_dof = msh%is_shared(face) @@ -574,57 +577,57 @@ subroutine dofmap_number_faces(this) end subroutine dofmap_number_faces - !> Get idx for GLL point on face depending on face ordering k and j + !> Get idx for GLL point on face depending on face ordering k and j function dofmap_facetidx(face_order, face, facet_id, k1, j1, lk1, lj1) result(facet_idx) - type(tuple4_i4_t) :: face_order, face - integer(kind=i8) :: facet_idx, facet_id - integer :: k1, j1, lk1, lj1 - integer :: k,j,lk,lj - - k = k1 - 2 - j = j1 - 2 - lk = lk1 - 2 - lj = lj1 - 2 - - ! Given the indexes k,j for a GLL point on the inner part of the - ! face, we assign a unique number to it that depends on the - ! corner with the lowest id and its neighbour with the lowest - ! id. The id is assigned in this way to be consistent regardless - ! of how the faces are rotated or mirrored. - ! - ! 4 -------- 3 - ! | | k - ! |----->| ^ - ! |----->| | - ! |----->| | - ! 1 -------- 2 0--->j - - - if(face_order%x(1) .eq. face%x(1)) then + type(tuple4_i4_t) :: face_order, face + integer(kind=i8) :: facet_idx, facet_id + integer :: k1, j1, lk1, lj1 + integer :: k,j,lk,lj + + k = k1 - 2 + j = j1 - 2 + lk = lk1 - 2 + lj = lj1 - 2 + + ! Given the indexes k,j for a GLL point on the inner part of the + ! face, we assign a unique number to it that depends on the + ! corner with the lowest id and its neighbour with the lowest + ! id. The id is assigned in this way to be consistent regardless + ! of how the faces are rotated or mirrored. + ! + ! 4 -------- 3 + ! | | k + ! |----->| ^ + ! |----->| | + ! |----->| | + ! 1 -------- 2 0--->j + + + if(face_order%x(1) .eq. face%x(1)) then if(face_order%x(2) .lt. face_order%x(4)) then - facet_idx = facet_id + j + k*lj + facet_idx = facet_id + j + k*lj else - facet_idx = facet_id + j*lk + k - end if - else if(face_order%x(2) .eq. face%x(1)) then + facet_idx = facet_id + j*lk + k + end if + else if(face_order%x(2) .eq. face%x(1)) then if(face_order%x(3) .lt. face_order%x(1)) then - facet_idx = facet_id + lk*(lj-1-j) + k + facet_idx = facet_id + lk*(lj-1-j) + k else - facet_idx = facet_id + (lj-1-j) + k*lj - end if - else if(face_order%x(3) .eq. face%x(1)) then + facet_idx = facet_id + (lj-1-j) + k*lj + end if + else if(face_order%x(3) .eq. face%x(1)) then if(face_order%x(4) .lt. face_order%x(2)) then - facet_idx = facet_id + (lj-1-j) + lj*(lk-1-k) + facet_idx = facet_id + (lj-1-j) + lj*(lk-1-k) else - facet_idx = facet_id + lk*(lj-1-j) + (lk-1-k) - end if - else if(face_order%x(4) .eq. face%x(1)) then + facet_idx = facet_id + lk*(lj-1-j) + (lk-1-k) + end if + else if(face_order%x(4) .eq. face%x(1)) then if(face_order%x(1) .lt. face_order%x(3)) then - facet_idx = facet_id + lk*j + (lk-1-k) + facet_idx = facet_id + lk*j + (lk-1-k) else - facet_idx = facet_id + j + lj*(lk-1-k) - end if - end if + facet_idx = facet_id + j + lj*(lk-1-k) + end if + end if end function dofmap_facetidx @@ -641,18 +644,18 @@ subroutine dofmap_generate_xyz(this) msh => this%msh Xh => this%Xh - + if (msh%gdim .eq. 3) then n_edge = 12 else n_edge = 4 end if - do i = 1, msh%nelv + do i = 1, msh%nelv call dofmap_xyzlin(Xh, msh, msh%elements(i)%e, this%x(1,1,1,i), & - this%y(1,1,1,i), this%z(1,1,1,i)) + this%y(1,1,1,i), this%z(1,1,1,i)) end do - do i =1, msh%curve%size + do i =1, msh%curve%size midpoint = .false. el_idx = msh%curve%curve_el(i)%el_idx curve_type = msh%curve%curve_el(i)%curve_type @@ -668,7 +671,7 @@ subroutine dofmap_generate_xyz(this) this%z(1,1,1,el_idx),curve_type, curve_data_tot) end if end do - do i =1, msh%curve%size + do i =1, msh%curve%size el_idx = msh%curve%curve_el(i)%el_idx do j = 1, 8 if (msh%curve%curve_el(i)%curve_type(j) .eq. 3) then @@ -677,7 +680,7 @@ subroutine dofmap_generate_xyz(this) this%x(1,1,1,el_idx), & this%y(1,1,1,el_idx), & this%z(1,1,1, el_idx), & - Xh, msh%elements(el_idx)%e, msh%gdim) + Xh, msh%elements(el_idx)%e, msh%gdim) end if end do end do @@ -706,19 +709,19 @@ subroutine dofmap_xyzlin(Xh, msh, element, x, y, z) real(kind=rp) :: jxt(Xh%lx*2), jyt(Xh%lx*2), jzt(Xh%lx*2) real(kind=rp) :: w(4*Xh%lx**3), tmp(Xh%lx, Xh%lx, Xh%lx) real(kind=rp), dimension(2), parameter :: zlin = (/-1d0, 1d0/) - + integer :: j, k zgml = 0d0 xyzb = 0d0 - + w = 0d0 - call copy(zgml(1,1), Xh%zg(1,1), Xh%lx) + call copy(zgml(1,1), Xh%zg(1,1), Xh%lx) call copy(zgml(1,2), Xh%zg(1,2), Xh%ly) if (msh%gdim .gt. 2) then call copy(zgml(1,3), Xh%zg(1,3), Xh%lz) end if - + k = 1 do j = 1, Xh%lx call fd_weights_full(zgml(j,1), zlin, 1, 0, jxt(k)) @@ -797,9 +800,9 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data) y3(eindx(k),1,1) = curve_data(2,k) z3(eindx(k),1,1) = curve_data(3,k) end if - end do + end do zg(1) = -1 - zg(2) = 0 + zg(2) = 0 zg(3) = 1 if (msh%gdim .eq. 3) then call gh_face_extend_3d(x3,zg,3,2,w(1,1),w(1,2)) ! 2 --> edge extend @@ -810,7 +813,7 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data) call gh_face_extend_2d(x3,zg,3,2,w(1,1),w(1,2)) ! 2 --> edge extend call gh_face_extend_2d(y3,zg,3,2,w(1,1),w(1,2)) end if - k =1 + k =1 do j = 1, Xh%lx call fd_weights_full(Xh%zg(j,1),zquad,2,0,jxt(k)) call fd_weights_full(Xh%zg(j,2),zquad,2,0,jyt(k)) @@ -837,151 +840,151 @@ subroutine dofmap_xyzquad(Xh, msh, element, x, y, z, curve_type, curve_data) call Xh3%free() end subroutine dofmap_xyzquad - !> Extend faces into interior via gordon hall - !! gh_type: 1 - vertex only - !! 2 - vertex and edges - !! 3 - vertex, edges, and faces - !! Original in Nek5000/core/navier5.f - subroutine gh_face_extend_3d(x, zg, n, gh_type, e, v) - integer, intent(in) :: n - real(kind=rp), intent(inout) :: x(n, n, n) - real(kind=rp), intent(in) :: zg(n) - real(kind=rp), intent(inout) :: e(n, n, n) - real(kind=rp), intent(inout) :: v(n, n, n) - integer :: gh_type, ntot, kk, jj, ii, k, j, i - real(kind=rp) :: si, sj, sk, hi, hj, hk - + !> Extend faces into interior via gordon hall + !! gh_type: 1 - vertex only + !! 2 - vertex and edges + !! 3 - vertex, edges, and faces + !! Original in Nek5000/core/navier5.f + subroutine gh_face_extend_3d(x, zg, n, gh_type, e, v) + integer, intent(in) :: n + real(kind=rp), intent(inout) :: x(n, n, n) + real(kind=rp), intent(in) :: zg(n) + real(kind=rp), intent(inout) :: e(n, n, n) + real(kind=rp), intent(inout) :: v(n, n, n) + integer :: gh_type, ntot, kk, jj, ii, k, j, i + real(kind=rp) :: si, sj, sk, hi, hj, hk + ! ! Build vertex interpolant ! - ntot = n**3 - call rzero(v, ntot) - do kk = 1, n, n-1 - do jj = 1, n, n-1 - do ii = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - si = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) - sj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) - sk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) - v(i,j,k) = v(i,j,k) + si*sj*sk*x(ii,jj,kk) - end do - end do - end do - end do - end do - end do - - if (gh_type .eq. 1) then - call copy(x, v, ntot) - return - end if + ntot = n**3 + call rzero(v, ntot) + do kk = 1, n, n-1 + do jj = 1, n, n-1 + do ii = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + si = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) + sj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) + sk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) + v(i,j,k) = v(i,j,k) + si*sj*sk*x(ii,jj,kk) + end do + end do + end do + end do + end do + end do + + if (gh_type .eq. 1) then + call copy(x, v, ntot) + return + end if ! ! ! Extend 12 edges - call rzero(e, ntot) + call rzero(e, ntot) ! ! x-edges ! - do kk = 1, n, n-1 - do jj = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) - hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) - e(i,j,k) = e(i,j,k) + hj*hk*(x(i,jj,kk)-v(i,jj,kk)) - end do - end do - end do - end do - end do + do kk = 1, n, n-1 + do jj = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) + hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) + e(i,j,k) = e(i,j,k) + hj*hk*(x(i,jj,kk)-v(i,jj,kk)) + end do + end do + end do + end do + end do ! ! y-edges ! - do kk = 1, n, n-1 - do ii = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) - hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) - e(i,j,k) = e(i,j,k) + hi*hk*(x(ii,j,kk)-v(ii,j,kk)) - end do - end do - end do - end do - end do + do kk = 1, n, n-1 + do ii = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) + hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) + e(i,j,k) = e(i,j,k) + hi*hk*(x(ii,j,kk)-v(ii,j,kk)) + end do + end do + end do + end do + end do ! ! z-edges ! - do jj = 1, n, n-1 - do ii = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) - hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) - e(i,j,k) = e(i,j,k) + hi*hj*(x(ii,jj,k)-v(ii,jj,k)) - end do - end do - end do - end do - end do - - call add2(e, v, ntot) - - if (gh_type .eq. 2) then - call copy(x, e, ntot) - return - end if + do jj = 1, n, n-1 + do ii = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) + hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) + e(i,j,k) = e(i,j,k) + hi*hj*(x(ii,jj,k)-v(ii,jj,k)) + end do + end do + end do + end do + end do + + call add2(e, v, ntot) + + if (gh_type .eq. 2) then + call copy(x, e, ntot) + return + end if ! ! Extend faces ! - call rzero(v, ntot) + call rzero(v, ntot) ! ! x-edges ! - do ii = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) - v(i,j,k) = v(i,j,k) + hi*(x(ii,j,k)-e(ii,j,k)) - end do - end do - end do - end do + do ii = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hi = 0.5*((n-ii)*(1-zg(i))+(ii-1)*(1+zg(i)))/(n-1) + v(i,j,k) = v(i,j,k) + hi*(x(ii,j,k)-e(ii,j,k)) + end do + end do + end do + end do ! ! y-edges ! - do jj = 1, n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) - v(i,j,k) = v(i,j,k) + hj*(x(i,jj,k)-e(i,jj,k)) - end do - end do - end do - end do -! + do jj = 1, n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hj = 0.5*((n-jj)*(1-zg(j))+(jj-1)*(1+zg(j)))/(n-1) + v(i,j,k) = v(i,j,k) + hj*(x(i,jj,k)-e(i,jj,k)) + end do + end do + end do + end do +! ! z-edges -! - do kk= 1 , n, n-1 - do k = 1, n - do j = 1, n - do i = 1, n - hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) - v(i,j,k) = v(i,j,k) + hk*(x(i,j,kk)-e(i,j,kk)) - end do - end do - end do - end do - - call add2(v, e, ntot) - call copy(x, v ,ntot) +! + do kk= 1 , n, n-1 + do k = 1, n + do j = 1, n + do i = 1, n + hk = 0.5*((n-kk)*(1-zg(k))+(kk-1)*(1+zg(k)))/(n-1) + v(i,j,k) = v(i,j,k) + hk*(x(i,j,kk)-e(i,j,kk)) + end do + end do + end do + end do + + call add2(v, e, ntot) + call copy(x, v ,ntot) end subroutine gh_face_extend_3d @@ -996,7 +999,7 @@ subroutine gh_face_extend_2d(x, zg, n, gh_type, e, v) real(kind=rp), intent(inout) :: v(n, n) integer, intent(in) :: gh_type integer :: i,j , jj, ii, ntot - real(kind=rp) :: si, sj, hi, hj + real(kind=rp) :: si, sj, hi, hj !Build vertex interpolant @@ -1054,18 +1057,18 @@ subroutine arc_surface(isid, curve_data, x, y, z, Xh, element, gdim) type(space_t), intent(in) :: Xh class(element_t) :: element real(kind=rp), dimension(5), intent(in) :: curve_data - real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz), intent(inout) :: x, y, z + real(kind=rp), dimension(Xh%lx, Xh%ly, Xh%lz), intent(inout) :: x, y, z real(kind=rp) :: pt1x, pt1y, pt2x, pt2y, pt12x, pt12y - real(kind=rp) :: radius, gap, dtheta, r, xys + real(kind=rp) :: radius, dtheta, r, xys real(kind=rp) :: theta0, xcenn, ycenn, h(Xh%lx, 3, 2) real(kind=rp) :: xcrved(Xh%lx), ycrved(Xh%lx), xs, ys integer :: isid1, ixt, iyt, izt, ix, itmp integer(i4), dimension(6), parameter :: fcyc_to_sym = (/3, 2, 4, 1, 5, 6/) ! cyclic to symmetric face mapping integer(i4), dimension(12), parameter :: ecyc_to_sym = (/1, 6, 2, 5, 3, 8, & - & 4, 7, 9, 10, 12, 11/) ! cyclic to symmetric edge mapping + & 4, 7, 9, 10, 12, 11/) ! cyclic to symmetric edge mapping integer, parameter, dimension(2, 12) :: edge_nodes = reshape((/1, 2, 3, 4, 5, 6, & - & 7, 8, 1, 3, 2, 4, 5, 7, 6, 8, 1, 5, 2, 6, 3, 7, 4, 8/), (/2,12/)) ! symmetric edge to vertex mapping - ! copy from hex as this has private attribute there + & 7, 8, 1, 3, 2, 4, 5, 7, 6, 8, 1, 5, 2, 6, 3, 7, 4, 8/), (/2,12/)) ! symmetric edge to vertex mapping + ! copy from hex as this has private attribute there ! this subroutine is a mess of symmetric and cyclic edge/face numberring and ! cannot be cleaned without changing an input format (isid seems to be @@ -1092,7 +1095,7 @@ subroutine arc_surface(isid, curve_data, x, y, z, Xh, element, gdim) xys = sqrt(xs**2 + ys**2) ! sanity check if (abs(2.0 * radius) <= xys * 1.00001) & - & call neko_error('Radius to small for arced element surface') + & call neko_error('Radius to small for arced element surface') ! find center dtheta = abs(asin(0.5*xys/radius)) pt12x = (pt1x + pt2x)/2.0 @@ -1137,18 +1140,18 @@ subroutine compute_h(h, zgml, gdim, lx) integer, intent(in) :: lx, gdim real(kind=rp), intent(inout) :: h(lx, 3, 2) real(kind=rp), intent(in) :: zgml(lx, 3) - integer :: ix, iy, iz + integer :: ix, iy, iz do ix = 1, lx h(ix,1,1) = (1.0_rp - zgml(ix, 1)) * 0.5_rp h(ix,1,2) = (1.0_rp + zgml(ix, 1)) * 0.5_rp end do - + do iy = 1, lx h(iy,2,1) = (1.0_rp - zgml(iy, 2)) * 0.5_rp h(iy,2,2) = (1.0_rp + zgml(iy, 2)) * 0.5_rp end do - + if (gdim .eq. 3) then do iz = 1, lx h(iz,3,1) = (1.0_rp - zgml(iz, 3)) * 0.5_rp @@ -1158,7 +1161,7 @@ subroutine compute_h(h, zgml, gdim, lx) call rone(h(1,3,1), lx) call rone(h(1,3,2), lx) end if - + end subroutine compute_h - + end module dofmap diff --git a/src/sem/interpolation.f90 b/src/sem/interpolation.f90 index 66be00d23dc..ec3ae35cdae 100644 --- a/src/sem/interpolation.f90 +++ b/src/sem/interpolation.f90 @@ -36,13 +36,9 @@ module interpolation use num_types, only : rp use device use fast3d - use field, only : field_t use tensor, only : tnsr3d use tensor_cpu, only : tnsr3d_cpu use space, only : space_t, operator(.eq.), GL, GLL - use mxm_wrapper, only : mxm - use coefs, only : coef_t - use point, only : point_t implicit none private @@ -119,10 +115,14 @@ subroutine interpolator_init(this, Xh, Yh) call device_map(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx) call device_map(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx) call device_map(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx) - call device_memcpy(this%Xh_to_Yh, this%Xh_Yh_d, Yh%lx*Xh%lx, HOST_TO_DEVICE) - call device_memcpy(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx, HOST_TO_DEVICE) - call device_memcpy(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx, HOST_TO_DEVICE) - call device_memcpy(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx, HOST_TO_DEVICE) + call device_memcpy(this%Xh_to_Yh, this%Xh_Yh_d, Yh%lx*Xh%lx, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%Xh_to_YhT, this%Xh_YhT_d, Yh%lx*Xh%lx, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%Yh_to_Xh, this%Yh_Xh_d, Yh%lx*Xh%lx, & + HOST_TO_DEVICE, sync=.false.) + call device_memcpy(this%Yh_to_XhT, this%Yh_XhT_d, Yh%lx*Xh%lx, & + HOST_TO_DEVICE, sync=.false.) end if end subroutine interpolator_init diff --git a/src/sem/local_interpolation.f90 b/src/sem/local_interpolation.f90 index c88865d3f02..07fac665c48 100644 --- a/src/sem/local_interpolation.f90 +++ b/src/sem/local_interpolation.f90 @@ -30,7 +30,7 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Routines to obtain interpolated values on a set of points with known +!> Routines to obtain interpolated values on a set of points with known !! rst coordinates in elements local to this process. module local_interpolation use tensor, only: triple_tensor_product, tnsr3d_el_list @@ -84,7 +84,7 @@ subroutine local_interpolator_init(this, Xh, r, s, t, n_points) class(local_interpolator_t), intent(inout), target :: this type(space_t), intent(in), target :: Xh integer, intent(in) :: n_points - real(kind=rp) :: r(n_points), s(n_points), t(n_points) + real(kind=rp) :: r(n_points), s(n_points), t(n_points) integer :: size_weights call this%free() if ((Xh%t .eq. GL) .or. (Xh%t .eq. GLL)) then diff --git a/src/sem/map_1d.f90 b/src/sem/map_1d.f90 new file mode 100644 index 00000000000..28481d6f23c --- /dev/null +++ b/src/sem/map_1d.f90 @@ -0,0 +1,220 @@ + +!> Creates a 1d GLL point map along a specified direction based on the connectivity in the mesh. +module map_1d + use num_types, only: rp + use space, only: space_t + use dofmap, only: dofmap_t + use gather_scatter + use mesh, only: mesh_t + use device + use comm + use logger, only: neko_log, LOG_SIZE + use utils, only: neko_error, neko_warning + use math, only: glmax, glmin, glimax, glimin, relcmp + use neko_mpi_types + use, intrinsic :: iso_c_binding + implicit none + private + !> Type that encapsulates a mapping from each gll point in the mesh + !! to its corresponding (global) GLL point index in one direction. + !! @remark Could also be rather easily extended to say polar coordinates as well. + type, public :: map_1d_t + !> Checks whether the specified direction is in the r,s, or t direction for each element. + integer, allocatable :: dir_el(:) + !> Checks which level an element belongs to. + integer, allocatable :: el_lvl(:) + !> Checks which level or id in the 1D GLL mapping each point in the dofmap is. + integer, allocatable :: pt_lvl(:,:,:,:) + !> Number of elements stacked on top of eachother in the specified direction + integer :: n_el_lvls + !> Dofmap + type(dofmap_t), pointer :: dof => null() + !> Mesh + type(mesh_t), pointer :: msh => null() + !> The specified direction in which we create the 1D mapping + integer :: dir + !> Tolerance for the mesh + real(kind=rp) :: tol = 1e-7 + contains + !> Constructor + procedure, pass(this) :: init => map_1d_init + !> Destructor + procedure, pass(this) :: free => map_1d_free + end type map_1d_t + + +contains + + subroutine map_1d_init(this, dof, gs, dir, tol) + class(map_1d_t) :: this + type(dofmap_t), target, intent(in) :: dof + type(gs_t), intent(inout) :: gs + integer, intent(in) :: dir + real(kind=rp), intent(in) :: tol + integer :: nelv, lx, n, i, e, lvl + real(kind=rp), contiguous, pointer :: line(:,:,:,:) + real(kind=rp), allocatable :: min_vals(:,:,:,:) + type(c_ptr) :: min_vals_d = c_null_ptr + real(kind=rp) :: el_dim(3,3), glb_min, glb_max, el_min + call this%free() + + if (NEKO_BCKND_DEVICE .eq. 1) then + if (pe_rank .eq. 0) then + call neko_warning('map_1d does not copy indices to device, but ok if used on cpu') + end if + end if + + this%dir = dir + this%dof => dof + this%msh => dof%msh + nelv = this%msh%nelv + lx = this%dof%Xh%lx + n = dof%size() + + if (dir .eq. 1) then + line => dof%x + else if (dir .eq. 2) then + line => dof%y + else if(dir .eq. 3) then + line => dof%z + else + call neko_error('Invalid dir for geopmetric comm') + end if + + allocate(this%dir_el(nelv)) + allocate(this%el_lvl(nelv)) + allocate(min_vals(lx, lx, lx, nelv)) + allocate(this%pt_lvl(lx, lx, lx, nelv)) + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_map(min_vals,min_vals_d,n) + end if + + do i = 1, nelv + !store which direction r,s,t corresponds to speciifed direction, x,y,z + !we assume elements are stacked on each other... + ! Check which one of the normalized vectors are closest to dir + ! If we want to incorporate other directions, we should look here + el_dim(1,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(2)%p%x) + el_dim(1,:) = el_dim(1,:)/norm2(el_dim(1,:)) + el_dim(2,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(3)%p%x) + el_dim(2,:) = el_dim(2,:)/norm2(el_dim(2,:)) + el_dim(3,:) = abs(this%msh%elements(i)%e%pts(1)%p%x - this%msh%elements(i)%e%pts(5)%p%x) + el_dim(3,:) = el_dim(3,:)/norm2(el_dim(3,:)) + ! Checks which directions in rst the xyz corresponds to + ! 1 corresponds to r, 2 to s, 3 to t and are stored in dir_el + this%dir_el(i) = maxloc(el_dim(:,this%dir),dim=1) + end do + glb_min = glmin(line,n) + glb_max = glmax(line,n) + + i = 1 + this%el_lvl = -1 + ! Check what the mimum value in each element and put in min_vals + do e = 1, nelv + el_min = minval(line(:,:,:,e)) + min_vals(:,:,:,e) = el_min + ! Check if this element is on the bottom, in this case assign el_lvl = i = 1 + if (relcmp(el_min, glb_min, this%tol)) then + if(this%el_lvl(e) .eq. -1) this%el_lvl(e) = i + end if + end do + ! While loop where at each iteation the global maximum value propagates down one level. + ! When the minumum value has propagated to the highest level this stops. + ! Only works when the bottom plate of the domain is flat. + do while (.not. relcmp(glmax(min_vals,n), glb_min, this%tol)) + i = i + 1 + do e = 1, nelv + !Sets the value at the bottom of each element to glb_max + if (this%dir_el(e) .eq. 1) then + if (line(1,1,1,e) .gt. line(lx,1,1,e)) then + min_vals(lx,:,:,e) = glb_max + else + min_vals(1,:,:,e) = glb_max + end if + end if + if (this%dir_el(e) .eq. 2) then + if (line(1,1,1,e) .gt. line(1,lx,1,e)) then + min_vals(:,lx,:,e) = glb_max + else + min_vals(:,1,:,e) = glb_max + end if + end if + if (this%dir_el(e) .eq. 3) then + if (line(1,1,1,e) .gt. line(1,1,lx,e)) then + min_vals(:,:,lx,e) = glb_max + else + min_vals(:,:,1,e) = glb_max + end if + end if + end do + if (NEKO_BCKND_DEVICE .eq. 1) & + call device_memcpy(min_vals, min_vals_d, n,& + HOST_TO_DEVICE, sync=.false.) + !Propagates the minumum value along the element boundary. + call gs%op(min_vals,n,GS_OP_MIN) + if (NEKO_BCKND_DEVICE .eq. 1) & + call device_memcpy(min_vals, min_vals_d, n,& + DEVICE_TO_HOST, sync=.true.) + !Checks the new minimum value on each element + !Assign this value to all points in this element in min_val + !If the element has not already been assinged a level, + !and it has obtained the minval, set el_lvl = i + do e = 1, nelv + el_min = minval(min_vals(:,:,:,e)) + min_vals(:,:,:,e) = el_min + if (relcmp(el_min, glb_min, this%tol)) then + if (this%el_lvl(e) .eq. -1) this%el_lvl(e) = i + end if + end do + end do + this%n_el_lvls = glimax(this%el_lvl,nelv) + if ( pe_rank .eq. 0) then + write(*,*) 'Number of element levels', this%n_el_lvls + end if + !Numbers the points in each element based on the element level + !and its orientation + do e = 1, nelv + do i = 1, lx + lvl = lx * (this%el_lvl(e) - 1) + i + if (this%dir_el(e) .eq. 1) then + if (line(1,1,1,e) .gt. line(lx,1,1,e)) then + this%pt_lvl(lx-i+1,:,:,e) = lvl + else + this%pt_lvl(i,:,:,e) = lvl + end if + end if + if (this%dir_el(e) .eq. 2) then + if (line(1,1,1,e) .gt. line(1,lx,1,e)) then + this%pt_lvl(:,lx-i+1,:,e) = lvl + else + this%pt_lvl(:,i,:,e) = lvl + end if + end if + if (this%dir_el(e) .eq. 3) then + if (line(1,1,1,e) .gt. line(1,1,lx,e)) then + this%pt_lvl(:,:,lx-i+1,e) = lvl + else + this%pt_lvl(:,:,i,e) = lvl + end if + end if + end do + end do + call device_deassociate(min_vals) + call device_free(min_vals_d) + deallocate(min_vals) + end subroutine map_1d_init + + subroutine map_1d_free(this) + class(map_1d_t) :: this + + if(allocated(this%dir_el)) deallocate(this%dir_el) + if(allocated(this%el_lvl)) deallocate(this%el_lvl) + if(allocated(this%pt_lvl)) deallocate(this%pt_lvl) + if(associated(this%dof)) nullify(this%dof) + if(associated(this%msh)) nullify(this%msh) + this%dir = 0 + this%n_el_lvls = 0 + + end subroutine map_1d_free + +end module map_1d diff --git a/src/sem/point_interpolator.f90 b/src/sem/point_interpolator.f90 index 2411b120a71..0017d5cdbe3 100644 --- a/src/sem/point_interpolator.f90 +++ b/src/sem/point_interpolator.f90 @@ -30,24 +30,22 @@ ! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ! POSSIBILITY OF SUCH DAMAGE. ! -!> Routines to interpolate fields on a given element +!> Routines to interpolate fields on a given element !! on a point in that element with given r,s,t coordinates module point_interpolator - use tensor, only: triple_tensor_product, tnsr3d_el_list + use tensor, only: triple_tensor_product use space, only: space_t, GL, GLL use num_types, only: rp use point, only: point_t use math, only: abscmp - use fast3d, only: fd_weights_full, setup_intp + use fast3d, only: fd_weights_full use utils, only: neko_error - use field, only: field_t - use field_list, only: field_list_t use device use device_math, only: device_rzero use neko_config, only: NEKO_BCKND_DEVICE implicit none private - + !> Field interpolator to arbitrary points within an element. !! Tailored for experimentation, and convenience, not performance !! Does all interpolation on the CPU. diff --git a/src/sem/space.f90 b/src/sem/space.f90 index c6b4fa259b0..29e7801997f 100644 --- a/src/sem/space.f90 +++ b/src/sem/space.f90 @@ -36,7 +36,7 @@ module space use num_types, only : rp use speclib use device - use utils, only : neko_error + use utils, only : neko_error use fast3d, only : setup_intp use math use tensor, only : trsp1 @@ -44,7 +44,7 @@ module space use, intrinsic :: iso_c_binding implicit none private - + integer, public, parameter :: GL = 0, GLL = 1, GJ = 2 !> The function space for the SEM solution fields @@ -52,7 +52,7 @@ module space !! In SEM, the solution fields are represented as a linear combination of !! basis functions for a particular function space. Thus, the properties of !! the space define that of the solution. The global SEM basis is never build, - !! but is implictly defined by the local basis for each element. + !! but is implictly defined by the local basis for each element. !! The latter is polynomial, and is therefore defined by the order of the !! polys and the selected locations of the nodes for storing the solution. !! In SEM, the latter coincides with a Gaussian quadrature (GL, GLL, etc.) @@ -68,9 +68,9 @@ module space integer :: lyz !< Number of points in yz-plane integer :: lxz !< Number of points in xz-plane integer :: lxyz !< Number of points in xyz-block - + real(kind=rp), allocatable :: zg(:,:) !< Quadrature points - + real(kind=rp), allocatable :: dr_inv(:) !< 1/dist quadrature points real(kind=rp), allocatable :: ds_inv(:) !< 1/dist quadrature points real(kind=rp), allocatable :: dt_inv(:) !< 1/dist quadrature points @@ -94,7 +94,7 @@ module space real(kind=rp), allocatable :: dyt(:,:) !> Transposed derivative operator \f$ D_3^T \f$ real(kind=rp), allocatable :: dzt(:,:) - + !> Legendre transformation matrices real(kind=rp), allocatable :: v(:,:) !< legendre to physical real(kind=rp), allocatable :: vt(:,:) !< legendre to physical t @@ -140,7 +140,7 @@ module space end interface operator(.ne.) public :: operator(.eq.), operator(.ne.) - + contains !> Initialize a function space @a s with given polynomial dimensions @@ -151,7 +151,7 @@ subroutine space_init(s, t, lx, ly, lz) integer, intent(in) :: ly !< Polynomial dimension in y-direction integer, optional, intent(in) :: lz !< Polynomial dimension in z-direction integer :: ix, iy, iz - + call space_free(s) s%lx = lx @@ -180,7 +180,7 @@ subroutine space_init(s, t, lx, ly, lz) allocate(s%wx(s%lx)) allocate(s%wy(s%ly)) allocate(s%wz(s%lz)) - + allocate(s%dr_inv(s%lx)) allocate(s%ds_inv(s%ly)) allocate(s%dt_inv(s%lz)) @@ -194,13 +194,13 @@ subroutine space_init(s, t, lx, ly, lz) allocate(s%dxt(s%lx, s%lx)) allocate(s%dyt(s%ly, s%ly)) allocate(s%dzt(s%lz, s%lz)) - + allocate(s%v(s%lx, s%lx)) allocate(s%vt(s%lx, s%lx)) allocate(s%vinv(s%lx, s%lx)) allocate(s%vinvt(s%lx, s%lx)) allocate(s%w(s%lx, s%lx)) - + ! Call low-level routines to compute nodes and quadrature weights if (t .eq. GLL) then call zwgll(s%zg(1,1), s%wx, s%lx) @@ -233,27 +233,27 @@ subroutine space_init(s, t, lx, ly, lz) end do !> Setup derivative matrices if (t .eq. GLL) then - call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx) - call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly) - if (s%lz .gt. 1) then - call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz) - else - s%dz = 0d0 - s%dzt = 0d0 - end if + call dgll(s%dx, s%dxt, s%zg(1,1), s%lx, s%lx) + call dgll(s%dy, s%dyt, s%zg(1,2), s%ly, s%ly) + if (s%lz .gt. 1) then + call dgll(s%dz, s%dzt, s%zg(1,3), s%lz, s%lz) + else + s%dz = 0d0 + s%dzt = 0d0 + end if else if (t .eq. GL) then call setup_intp(s%dx, s%dxt, s%zg(1,1), s%zg(1,1), s%lx, s%lx,1) call setup_intp(s%dy, s%dyt, s%zg(1,2), s%zg(1,2), s%ly, s%ly,1) - if (s%lz .gt. 1) then - call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1) - else - s%dz = 0d0 - s%dzt = 0d0 - end if - else - call neko_error("Invalid quadrature rule") - end if - + if (s%lz .gt. 1) then + call setup_intp(s%dz, s%dzt, s%zg(1,3), s%zg(1,3), s%lz, s%lz, 1) + else + s%dz = 0d0 + s%dzt = 0d0 + end if + else + call neko_error("Invalid quadrature rule") + end if + call space_compute_dist(s%dr_inv, s%zg(1,1), s%lx) call space_compute_dist(s%ds_inv, s%zg(1,2), s%ly) if (s%lz .gt. 1) then @@ -282,29 +282,29 @@ subroutine space_init(s, t, lx, ly, lz) call device_map(s%vinvt, s%vinvt_d, s%lxy) call device_map(s%w, s%w_d, s%lxy) - call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%wx, s%wx_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%wy, s%wy_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%wz, s%wz_d, s%lx, HOST_TO_DEVICE) - call device_memcpy(s%dx, s%dx_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%dy, s%dy_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%dz, s%dz_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%dxt, s%dxt_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%dyt, s%dyt_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%dzt, s%dzt_d, s%lxy, HOST_TO_DEVICE) - call device_memcpy(s%w3, s%w3_d, s%lxyz, HOST_TO_DEVICE) + call device_memcpy(s%dr_inv, s%dr_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%ds_inv, s%ds_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dt_inv, s%dt_inv_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%wx, s%wx_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%wy, s%wy_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%wz, s%wz_d, s%lx, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dx, s%dx_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dy, s%dy_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dz, s%dz_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dxt, s%dxt_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dyt, s%dyt_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%dzt, s%dzt_d, s%lxy, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(s%w3, s%w3_d, s%lxyz, HOST_TO_DEVICE, sync=.false.) ix = s%lx * 3 call device_map(s%zg, s%zg_d, ix) - call device_memcpy(s%zg, s%zg_d, ix, HOST_TO_DEVICE) + call device_memcpy(s%zg, s%zg_d, ix, HOST_TO_DEVICE, sync=.false.) end if - + call space_generate_transformation_matrices(s) end subroutine space_init - + !> Deallocate a space @a s subroutine space_free(s) class(space_t), intent(inout) :: s @@ -352,19 +352,19 @@ subroutine space_free(s) if (allocated(s%dzt)) then deallocate(s%dzt) end if - + if (allocated(s%dr_inv)) then deallocate(s%dr_inv) end if - + if (allocated(s%ds_inv)) then deallocate(s%ds_inv) end if - + if (allocated(s%dt_inv)) then deallocate(s%dt_inv) end if - + if(allocated(s%v)) then deallocate(s%v) end if @@ -388,7 +388,7 @@ subroutine space_free(s) ! ! Cleanup the device (if present) ! - + if (c_associated(s%dr_inv_d)) then call device_free(s%dr_inv_d) end if @@ -412,7 +412,7 @@ subroutine space_free(s) if (c_associated(s%dzt_d)) then call device_free(s%dzt_d) end if - + if (c_associated(s%dx_d)) then call device_free(s%dx_d) end if @@ -444,7 +444,7 @@ subroutine space_free(s) if (c_associated(s%zg_d)) then call device_free(s%zg_d) end if - + if (c_associated(s%v_d)) then call device_free(s%v_d) end if @@ -481,7 +481,7 @@ pure function space_eq(Xh, Yh) result(res) else res = .false. end if - + end function space_eq !> Check if \f$ X_h \ne Y_H \f$ @@ -498,9 +498,9 @@ pure function space_ne(Xh, Yh) result(res) else res = .true. end if - + end function space_ne - + subroutine space_compute_dist(dx, x, lx) integer, intent(in) :: lx real(kind=rp), intent(inout) :: dx(lx), x(lx) @@ -514,13 +514,13 @@ subroutine space_compute_dist(dx, x, lx) dx(i) = 1.0_rp / dx(i) end do end subroutine space_compute_dist - - + + !> Generate spectral tranform matrices !! @param Xh SEM function space. subroutine space_generate_transformation_matrices(Xh) type(space_t), intent(inout) :: Xh - + real(kind=rp) :: L(0:Xh%lx-1) real(kind=rp) :: delta(Xh%lx) integer :: i, kj, j, j2, kk @@ -535,7 +535,7 @@ subroutine space_generate_transformation_matrices(Xh) L(1) = Xh%zg(j,1) do j2 = 2, Xh%lx-1 L(j2) = ( (2*j2-1) * Xh%zg(j,1) * L(j2-1) & - - (j2-1) * L(j2-2) ) / j2 + - (j2-1) * L(j2-2) ) / j2 end do do kk = 1, Xh%lx kj = kj+1 @@ -550,14 +550,14 @@ subroutine space_generate_transformation_matrices(Xh) do i = 1, Xh%lx delta(i) = 2.0_rp / (2*(i-1)+1) end do - ! modify last entry + ! modify last entry delta(Xh%lx) = 2.0_rp / (Xh%lx-1) ! calculate the inverse to multiply the matrix do i = 1, Xh%lx delta(i) = sqrt(1.0_rp / delta(i)) end do - ! scale the matrix + ! scale the matrix do i = 1, Xh%lx do j = 1, Xh%lx v(i,j) = v(i,j) * delta(j) ! orthogonal wrt weights @@ -590,20 +590,20 @@ subroutine space_generate_transformation_matrices(Xh) end associate ! Copy the data to the GPU - ! Move all this to space.f90 to for next version + ! Move all this to space.f90 to for next version if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & - (NEKO_BCKND_OPENCL .eq. 1)) then + (NEKO_BCKND_OPENCL .eq. 1)) then call device_memcpy(Xh%v, Xh%v_d, Xh%lxy, & - HOST_TO_DEVICE) + HOST_TO_DEVICE, sync=.false.) call device_memcpy(Xh%vt, Xh%vt_d, Xh%lxy, & - HOST_TO_DEVICE) + HOST_TO_DEVICE, sync=.false.) call device_memcpy(Xh%vinv, Xh%vinv_d, Xh%lxy, & - HOST_TO_DEVICE) + HOST_TO_DEVICE, sync=.false.) call device_memcpy(Xh%vinvt, Xh%vinvt_d, Xh%lxy, & - HOST_TO_DEVICE) + HOST_TO_DEVICE, sync=.false.) call device_memcpy(Xh%w, Xh%w_d, Xh%lxy, & - HOST_TO_DEVICE) + HOST_TO_DEVICE, sync=.false.) end if diff --git a/src/sem/speclib.f90 b/src/sem/speclib.f90 index bcf054be8f4..6c0d136b082 100644 --- a/src/sem/speclib.f90 +++ b/src/sem/speclib.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !============================================================================== @@ -150,21 +150,21 @@ module speclib contains - !> Generate `NP` Gauss Legendre points `Z` and weights `W` - !! associated with Jacobi polynomial \f$ P(N)(\alpha=0, \beta=0) \f$. - !! The polynomial degree `N = NP-1`. - !! @param Z Quadrature points. - !! @param W Quadrature weights. - !! @param NP Number of quadrature points. - SUBROUTINE ZWGL (Z,W,NP) - REAL(KIND=RP) Z(1),W(1), ALPHA, BETA - ALPHA = 0. - BETA = 0. - CALL ZWGJ (Z,W,NP,ALPHA,BETA) - RETURN - END SUBROUTINE + !> Generate `NP` Gauss Legendre points `Z` and weights `W` + !! associated with Jacobi polynomial \f$ P(N)(\alpha=0, \beta=0) \f$. + !! The polynomial degree `N = NP-1`. + !! @param Z Quadrature points. + !! @param W Quadrature weights. + !! @param NP Number of quadrature points. + SUBROUTINE ZWGL (Z,W,NP) + REAL(KIND=RP) Z(1),W(1), ALPHA, BETA + ALPHA = 0. + BETA = 0. + CALL ZWGJ (Z,W,NP,ALPHA,BETA) + RETURN + end subroutine ZWGL - SUBROUTINE ZWGLL (Z,W,NP) + SUBROUTINE ZWGLL (Z,W,NP) !-------------------------------------------------------------------- ! ! Generate NP Gauss-Lobatto Legendre points (Z) and weights (W) @@ -174,14 +174,14 @@ SUBROUTINE ZWGLL (Z,W,NP) ! operations are done in double precision. ! !-------------------------------------------------------------------- - REAL(KIND=RP) Z(1),W(1), ALPHA, BETA - ALPHA = 0. - BETA = 0. - CALL ZWGLJ (Z,W,NP,ALPHA,BETA) - RETURN - END SUBROUTINE + REAL(KIND=RP) Z(1),W(1), ALPHA, BETA + ALPHA = 0. + BETA = 0. + CALL ZWGLJ (Z,W,NP,ALPHA,BETA) + RETURN + end subroutine ZWGLL - SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA) + SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA) !-------------------------------------------------------------------- ! ! Generate NP GAUSS JACOBI points (Z) and weights (W) @@ -190,29 +190,29 @@ SUBROUTINE ZWGJ (Z,W,NP,ALPHA,BETA) ! Single precision version. ! !-------------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZD = NMAX) - REAL(KIND=RP) ZD(NZD),WD(NZD),ALPHAD,BETAD - REAL(KIND=RP) Z(1),W(1),ALPHA,BETA + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL(KIND=RP) ZD(NZD),WD(NZD),ALPHAD,BETAD + REAL(KIND=RP) Z(1),W(1),ALPHA,BETA - NPMAX = NZD - IF (NP.GT.NPMAX) THEN - WRITE (6,*) 'Too large polynomial degree in ZWGJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here NP=',NP - call neko_error - ENDIF - ALPHAD = ALPHA - BETAD = BETA - CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD) - DO 100 I=1,NP - Z(I) = ZD(I) - W(I) = WD(I) - 100 CONTINUE - RETURN - END SUBROUTINE + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call neko_error + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) +100 CONTINUE + RETURN + end subroutine ZWGJ - SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA) + SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA) !-------------------------------------------------------------------- ! ! Generate NP GAUSS JACOBI points (Z) and weights (W) @@ -221,50 +221,50 @@ SUBROUTINE ZWGJD (Z,W,NP,ALPHA,BETA) ! Double precision version. ! !-------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) Z(1),W(1),ALPHA,BETA + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) Z(1),W(1),ALPHA,BETA - N = NP-1 - DN = ((N)) - ONE = 1. - TWO = 2. - APB = ALPHA+BETA + N = NP-1 + DN = ((N)) + ONE = 1. + TWO = 2. + APB = ALPHA+BETA - IF (NP.LE.0) THEN - WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np - call neko_error - ENDIF - IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN - WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + IF (NP.LE.0) THEN + WRITE (6,*) 'ZWGJD: Minimum number of Gauss points is 1',np + call neko_error + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGJD: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - IF (NP.EQ.1) THEN - Z(1) = (BETA-ALPHA)/(APB+TWO) - W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) & + IF (NP.EQ.1) THEN + Z(1) = (BETA-ALPHA)/(APB+TWO) + W(1) = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE)/GAMMAF(APB+TWO) & * TWO**(APB+ONE) - RETURN - ENDIF + RETURN + ENDIF - CALL JACG (Z,NP,ALPHA,BETA) + CALL JACG (Z,NP,ALPHA,BETA) - NP1 = N+1 - NP2 = N+2 - DNP1 = ((NP1)) - DNP2 = ((NP2)) - FAC1 = DNP1+ALPHA+BETA+ONE - FAC2 = FAC1+DNP1 - FAC3 = FAC2+ONE - FNORM = PNORMJ(NP1,ALPHA,BETA) - RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2) - DO 100 I=1,NP - CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I)) - W(I) = -RCOEF/(P*PDM1) - 100 CONTINUE - RETURN - END SUBROUTINE + NP1 = N+1 + NP2 = N+2 + DNP1 = ((NP1)) + DNP2 = ((NP2)) + FAC1 = DNP1+ALPHA+BETA+ONE + FAC2 = FAC1+DNP1 + FAC3 = FAC2+ONE + FNORM = PNORMJ(NP1,ALPHA,BETA) + RCOEF = (FNORM*FAC2*FAC3)/(TWO*FAC1*DNP2) + DO 100 I=1,NP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP2,ALPHA,BETA,Z(I)) + W(I) = -RCOEF/(P*PDM1) +100 CONTINUE + RETURN + end subroutine ZWGJD - SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA) + SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA) !-------------------------------------------------------------------- ! ! Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) @@ -273,29 +273,29 @@ SUBROUTINE ZWGLJ (Z,W,NP,ALPHA,BETA) ! Single precision version. ! !-------------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZD = NMAX) - REAL(KIND=RP) ZD(NZD),WD(NZD),ALPHAD,BETAD - REAL(KIND=RP) Z(1),W(1),ALPHA,BETA + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL(KIND=RP) ZD(NZD),WD(NZD),ALPHAD,BETAD + REAL(KIND=RP) Z(1),W(1),ALPHA,BETA - NPMAX = NZD - IF (NP.GT.NPMAX) THEN - WRITE (6,*) 'Too large polynomial degree in ZWGLJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here NP=',NP - call neko_error - ENDIF - ALPHAD = ALPHA - BETAD = BETA - CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD) - DO 100 I=1,NP - Z(I) = ZD(I) - W(I) = WD(I) - 100 CONTINUE - RETURN - END SUBROUTINE + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in ZWGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call neko_error + ENDIF + ALPHAD = ALPHA + BETAD = BETA + CALL ZWGLJD (ZD,WD,NP,ALPHAD,BETAD) + DO 100 I=1,NP + Z(I) = ZD(I) + W(I) = WD(I) +100 CONTINUE + RETURN + end subroutine ZWGLJ - SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA) + SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA) !-------------------------------------------------------------------- ! ! Generate NP GAUSS LOBATTO JACOBI points (Z) and weights (W) @@ -304,181 +304,181 @@ SUBROUTINE ZWGLJD (Z,W,NP,ALPHA,BETA) ! Double precision version. ! !-------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) Z(NP),W(NP),ALPHA,BETA + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) Z(NP),W(NP),ALPHA,BETA - N = NP-1 - NM1 = N-1 - ONE = 1. - TWO = 2. + N = NP-1 + NM1 = N-1 + ONE = 1. + TWO = 2. - IF (NP.LE.1) THEN + IF (NP.LE.1) THEN WRITE (6,*) 'ZWGLJD: Minimum number of Gauss-Lobatto points is 2' WRITE (6,*) 'ZWGLJD: alpha,beta:',alpha,beta,np call neko_error - ENDIF - IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN - WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'ZWGLJD: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - IF (NM1.GT.0) THEN - ALPG = ALPHA+ONE - BETG = BETA+ONE - CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG) - ENDIF - Z(1) = -ONE - Z(NP) = ONE - DO 100 I=2,NP-1 - W(I) = W(I)/(ONE-Z(I)**2) - 100 CONTINUE - CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1)) - W(1) = ENDW1 (N,ALPHA,BETA)/(TWO*PD) - CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP)) - W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD) + IF (NM1.GT.0) THEN + ALPG = ALPHA+ONE + BETG = BETA+ONE + CALL ZWGJD (Z(2),W(2),NM1,ALPG,BETG) + ENDIF + Z(1) = -ONE + Z(NP) = ONE + DO 100 I=2,NP-1 + W(I) = W(I)/(ONE-Z(I)**2) +100 CONTINUE + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(1)) + W(1) = ENDW1 (N,ALPHA,BETA)/(TWO*PD) + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(NP)) + W(NP) = ENDW2 (N,ALPHA,BETA)/(TWO*PD) ! RETURN - END SUBROUTINE + end subroutine ZWGLJD - REAL(KIND=RP) FUNCTION ENDW1 (N,ALPHA,BETA) - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) ALPHA,BETA - ZERO = 0. - ONE = 1. - TWO = 2. - THREE = 3. - FOUR = 4. - APB = ALPHA+BETA - IF (N.EQ.0) THEN - ENDW1 = ZERO - RETURN - ENDIF - F1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) - F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO - IF (N.EQ.1) THEN - ENDW1 = F1 - RETURN - ENDIF - FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) - FINT1 = FINT1*TWO**(APB+TWO) - FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) - FINT2 = FINT2*TWO**(APB+THREE) - F2 = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) & + REAL(KIND=RP) FUNCTION ENDW1 (N,ALPHA,BETA) + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW1 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW1 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+ONE)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (-TWO*(BETA+TWO)*FINT1 + (APB+FOUR)*FINT2) & * (APB+THREE)/FOUR - IF (N.EQ.2) THEN - ENDW1 = F2 - RETURN - ENDIF - DO 100 I=3,N - DI = ((I-1)) - ABN = ALPHA+BETA+DI - ABNN = ABN+DI - A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) - A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) - A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) - F3 = -(A2*F2+A1*F1)/A3 - F1 = F2 - F2 = F3 - 100 CONTINUE - ENDW1 = F3 - RETURN - END FUNCTION + IF (N.EQ.2) THEN + ENDW1 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 +100 CONTINUE + ENDW1 = F3 + RETURN + end function ENDW1 - REAL(KIND=RP) FUNCTION ENDW2 (N,ALPHA,BETA) - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) ALPHA,BETA - ZERO = 0. - ONE = 1. - TWO = 2. - THREE = 3. - FOUR = 4. - APB = ALPHA+BETA - IF (N.EQ.0) THEN - ENDW2 = ZERO - RETURN - ENDIF - F1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) - F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO - IF (N.EQ.1) THEN - ENDW2 = F1 - RETURN - ENDIF - FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) - FINT1 = FINT1*TWO**(APB+TWO) - FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) - FINT2 = FINT2*TWO**(APB+THREE) - F2 = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) & + REAL(KIND=RP) FUNCTION ENDW2 (N,ALPHA,BETA) + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) ALPHA,BETA + ZERO = 0. + ONE = 1. + TWO = 2. + THREE = 3. + FOUR = 4. + APB = ALPHA+BETA + IF (N.EQ.0) THEN + ENDW2 = ZERO + RETURN + ENDIF + F1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + F1 = F1*(APB+TWO)*TWO**(APB+TWO)/TWO + IF (N.EQ.1) THEN + ENDW2 = F1 + RETURN + ENDIF + FINT1 = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+TWO)/GAMMAF(APB+THREE) + FINT1 = FINT1*TWO**(APB+TWO) + FINT2 = GAMMAF(ALPHA+TWO)*GAMMAF(BETA+TWO)/GAMMAF(APB+FOUR) + FINT2 = FINT2*TWO**(APB+THREE) + F2 = (TWO*(ALPHA+TWO)*FINT1 - (APB+FOUR)*FINT2) & * (APB+THREE)/FOUR - IF (N.EQ.2) THEN - ENDW2 = F2 - RETURN - ENDIF - DO 100 I=3,N - DI = ((I-1)) - ABN = ALPHA+BETA+DI - ABNN = ABN+DI - A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) - A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) - A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) - F3 = -(A2*F2+A1*F1)/A3 - F1 = F2 - F2 = F3 - 100 CONTINUE - ENDW2 = F3 - RETURN - END FUNCTION + IF (N.EQ.2) THEN + ENDW2 = F2 + RETURN + ENDIF + DO 100 I=3,N + DI = ((I-1)) + ABN = ALPHA+BETA+DI + ABNN = ABN+DI + A1 = -(TWO*(DI+ALPHA)*(DI+BETA))/(ABN*ABNN*(ABNN+ONE)) + A2 = (TWO*(ALPHA-BETA))/(ABNN*(ABNN+TWO)) + A3 = (TWO*(ABN+ONE))/((ABNN+TWO)*(ABNN+ONE)) + F3 = -(A2*F2+A1*F1)/A3 + F1 = F2 + F2 = F3 +100 CONTINUE + ENDW2 = F3 + RETURN + end function ENDW2 - REAL(KIND=RP) FUNCTION GAMMAF (X) - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) X - ZERO = 0.0 - HALF = 0.5 - ONE = 1.0 - TWO = 2.0 - FOUR = 4.0 - PI = FOUR*ATAN(ONE) - GAMMAF = ONE - IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI) - IF (X.EQ. HALF) GAMMAF = SQRT(PI) - IF (X.EQ. ONE ) GAMMAF = ONE - IF (X.EQ. TWO ) GAMMAF = ONE - IF (X.EQ. 1.5 ) GAMMAF = SQRT(PI)/2. - IF (X.EQ. 2.5) GAMMAF = 1.5*SQRT(PI)/2. - IF (X.EQ. 3.5) GAMMAF = 0.5*(2.5*(1.5*SQRT(PI))) - IF (X.EQ. 3. ) GAMMAF = 2. - IF (X.EQ. 4. ) GAMMAF = 6. - IF (X.EQ. 5. ) GAMMAF = 24. - IF (X.EQ. 6. ) GAMMAF = 120. - RETURN - END FUNCTION + REAL(KIND=RP) FUNCTION GAMMAF (X) + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) X + ZERO = 0.0 + HALF = 0.5 + ONE = 1.0 + TWO = 2.0 + FOUR = 4.0 + PI = FOUR*ATAN(ONE) + GAMMAF = ONE + IF (X.EQ.-HALF) GAMMAF = -TWO*SQRT(PI) + IF (X.EQ. HALF) GAMMAF = SQRT(PI) + IF (X.EQ. ONE ) GAMMAF = ONE + IF (X.EQ. TWO ) GAMMAF = ONE + IF (X.EQ. 1.5 ) GAMMAF = SQRT(PI)/2. + IF (X.EQ. 2.5) GAMMAF = 1.5*SQRT(PI)/2. + IF (X.EQ. 3.5) GAMMAF = 0.5*(2.5*(1.5*SQRT(PI))) + IF (X.EQ. 3. ) GAMMAF = 2. + IF (X.EQ. 4. ) GAMMAF = 6. + IF (X.EQ. 5. ) GAMMAF = 24. + IF (X.EQ. 6. ) GAMMAF = 120. + RETURN + end function GAMMAF - REAL(KIND=RP) FUNCTION PNORMJ (N,ALPHA,BETA) - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) ALPHA,BETA - ONE = 1. - TWO = 2. - DN = ((N)) - CONST = ALPHA+BETA+ONE - IF (N.LE.1) THEN - PROD = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA) - PROD = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA)) - PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) - RETURN - ENDIF - PROD = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE) - PROD = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE)) - PROD = PROD*(ONE+ALPHA)*(TWO+ALPHA) - PROD = PROD*(ONE+BETA)*(TWO+BETA) - DO 100 I=3,N - DINDX = ((I)) - FRAC = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA)) - PROD = PROD*FRAC - 100 CONTINUE - PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) - RETURN - END FUNCTION + REAL(KIND=RP) FUNCTION PNORMJ (N,ALPHA,BETA) + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) ALPHA,BETA + ONE = 1. + TWO = 2. + DN = ((N)) + CONST = ALPHA+BETA+ONE + IF (N.LE.1) THEN + PROD = GAMMAF(DN+ALPHA)*GAMMAF(DN+BETA) + PROD = PROD/(GAMMAF(DN)*GAMMAF(DN+ALPHA+BETA)) + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + ENDIF + PROD = GAMMAF(ALPHA+ONE)*GAMMAF(BETA+ONE) + PROD = PROD/(TWO*(ONE+CONST)*GAMMAF(CONST+ONE)) + PROD = PROD*(ONE+ALPHA)*(TWO+ALPHA) + PROD = PROD*(ONE+BETA)*(TWO+BETA) + DO 100 I=3,N + DINDX = ((I)) + FRAC = (DINDX+ALPHA)*(DINDX+BETA)/(DINDX*(DINDX+ALPHA+BETA)) + PROD = PROD*FRAC +100 CONTINUE + PNORMJ = PROD * TWO**CONST/(TWO*DN+CONST) + RETURN + end function PNORMJ - SUBROUTINE JACG (XJAC,NP,ALPHA,BETA) + SUBROUTINE JACG (XJAC,NP,ALPHA,BETA) !-------------------------------------------------------------------- ! ! Compute NP Gauss points XJAC, which are the zeros of the @@ -489,94 +489,94 @@ SUBROUTINE JACG (XJAC,NP,ALPHA,BETA) ! ALPHA = BETA = -0.5 -> Chebyshev points ! !-------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) XJAC(1) - DATA KSTOP /10/ - DATA EPS/1.0e-12_RP/ - N = NP-1 - one = 1. - DTH = 4.*ATAN(one)/(2.*((N))+2.) - DO 40 J=1,NP - IF (J.EQ.1) THEN - X = COS((2.*(((J))-1.)+1.)*DTH) - ELSE - X1 = COS((2.*(((J))-1.)+1.)*DTH) - X2 = XLAST - X = (X1+X2)/2. - ENDIF - DO 30 K=1,KSTOP - CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X) - RECSUM = 0. - JM = J-1 - DO 29 I=1,JM - RECSUM = RECSUM+1./(X-XJAC(NP-I+1)) - 29 CONTINUE - DELX = -P/(PD-RECSUM*P) - X = X+DELX - IF (ABS(DELX) .LT. EPS) GOTO 31 - 30 CONTINUE - 31 CONTINUE - XJAC(NP-J+1) = X - XLAST = X - 40 CONTINUE - DO 200 I=1,NP - XMIN = 2. - DO 100 J=I,NP - IF (XJAC(J).LT.XMIN) THEN - XMIN = XJAC(J) - JMIN = J - ENDIF - 100 CONTINUE - IF (JMIN.NE.I) THEN - SWAP = XJAC(I) - XJAC(I) = XJAC(JMIN) - XJAC(JMIN) = SWAP - ENDIF - 200 CONTINUE - RETURN - END SUBROUTINE + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) XJAC(1) + DATA KSTOP /10/ + DATA EPS/1.0e-12_RP/ + N = NP-1 + one = 1. + DTH = 4.*ATAN(one)/(2.*((N))+2.) + DO 40 J=1,NP + IF (J.EQ.1) THEN + X = COS((2.*(((J))-1.)+1.)*DTH) + ELSE + X1 = COS((2.*(((J))-1.)+1.)*DTH) + X2 = XLAST + X = (X1+X2)/2. + ENDIF + DO 30 K=1,KSTOP + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,X) + RECSUM = 0. + JM = J-1 + DO 29 I=1,JM + RECSUM = RECSUM+1./(X-XJAC(NP-I+1)) +29 CONTINUE + DELX = -P/(PD-RECSUM*P) + X = X+DELX + IF (ABS(DELX) .LT. EPS) GOTO 31 +30 CONTINUE +31 CONTINUE + XJAC(NP-J+1) = X + XLAST = X +40 CONTINUE + DO 200 I=1,NP + XMIN = 2. + DO 100 J=I,NP + IF (XJAC(J).LT.XMIN) THEN + XMIN = XJAC(J) + JMIN = J + ENDIF +100 CONTINUE + IF (JMIN.NE.I) THEN + SWAP = XJAC(I) + XJAC(I) = XJAC(JMIN) + XJAC(JMIN) = SWAP + ENDIF +200 CONTINUE + RETURN + end subroutine JACG - SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2,N,ALP,BET,X) + SUBROUTINE JACOBF (POLY,PDER,POLYM1,PDERM1,POLYM2,PDERM2,N,ALP,BET,X) !-------------------------------------------------------------------- ! ! Computes the Jacobi polynomial (POLY) and its derivative (PDER) ! of degree N at X. ! !-------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - APB = ALP+BET - POLY = 1. - PDER = 0. - IF (N .EQ. 0) RETURN - POLYL = POLY - PDERL = PDER - POLY = (ALP-BET+(APB+2.)*X)/2. - PDER = (APB+2.)/2. - IF (N .EQ. 1) RETURN - DO 20 K=2,N - DK = ((K)) - A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.) - A2 = (2.*DK+APB-1.)*(ALP**2-BET**2) - B3 = (2.*DK+APB-2.) - A3 = B3*(B3+1.)*(B3+2.) - A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB) - POLYN = ((A2+A3*X)*POLY-A4*POLYL)/A1 - PDERN = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1 - PSAVE = POLYL - PDSAVE = PDERL - POLYL = POLY - POLY = POLYN - PDERL = PDER - PDER = PDERN - 20 CONTINUE - POLYM1 = POLYL - PDERM1 = PDERL - POLYM2 = PSAVE - PDERM2 = PDSAVE - RETURN - END SUBROUTINE + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + APB = ALP+BET + POLY = 1. + PDER = 0. + IF (N .EQ. 0) RETURN + POLYL = POLY + PDERL = PDER + POLY = (ALP-BET+(APB+2.)*X)/2. + PDER = (APB+2.)/2. + IF (N .EQ. 1) RETURN + DO 20 K=2,N + DK = ((K)) + A1 = 2.*DK*(DK+APB)*(2.*DK+APB-2.) + A2 = (2.*DK+APB-1.)*(ALP**2-BET**2) + B3 = (2.*DK+APB-2.) + A3 = B3*(B3+1.)*(B3+2.) + A4 = 2.*(DK+ALP-1.)*(DK+BET-1.)*(2.*DK+APB) + POLYN = ((A2+A3*X)*POLY-A4*POLYL)/A1 + PDERN = ((A2+A3*X)*PDER-A4*PDERL+A3*POLY)/A1 + PSAVE = POLYL + PDSAVE = PDERL + POLYL = POLY + POLY = POLYN + PDERL = PDER + PDER = PDERN +20 CONTINUE + POLYM1 = POLYL + PDERM1 = PDERL + POLYM2 = PSAVE + PDERM2 = PDSAVE + RETURN + end subroutine JACOBF - REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA) + REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant HGJ through @@ -584,28 +584,28 @@ REAL(KIND=RP) FUNCTION HGJ (II,Z,ZGJ,NP,ALPHA,BETA) ! Single precision version. ! !--------------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZD = NMAX) - REAL(KIND=RP) ZD,ZGJD(NZD),ALPHAD,BETAD - REAL(KIND=RP) Z,ZGJ(1),ALPHA,BETA - NPMAX = NZD - IF (NP.GT.NPMAX) THEN - WRITE (6,*) 'Too large polynomial degree in HGJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here NP=',NP - call neko_error - ENDIF - ZD = Z - DO 100 I=1,NP - ZGJD(I) = ZGJ(I) - 100 CONTINUE - ALPHAD = ALPHA - BETAD = BETA - HGJ = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD) - RETURN - END FUNCTION + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL(KIND=RP) ZD,ZGJD(NZD),ALPHAD,BETAD + REAL(KIND=RP) Z,ZGJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call neko_error + ENDIF + ZD = Z + DO 100 I=1,NP + ZGJD(I) = ZGJ(I) +100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGJ = HGJD (II,ZD,ZGJD,NP,ALPHAD,BETAD) + RETURN + end function HGJ - REAL(KIND=RP) FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA) + REAL(KIND=RP) FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant HGJD through @@ -613,23 +613,23 @@ REAL(KIND=RP) FUNCTION HGJD (II,Z,ZGJ,NP,ALPHA,BETA) ! Double precision version. ! !--------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) Z,ZGJ(1),ALPHA,BETA - EPS = 1.e-5 - ONE = 1. - ZI = ZGJ(II) - DZ = Z-ZI - IF (ABS(DZ).LT.EPS) THEN - HGJD = ONE - RETURN - ENDIF - CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI) - CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z) - HGJD = PZ/(PDZI*(Z-ZI)) - RETURN - END FUNCTION + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) Z,ZGJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGJ(II) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGJD = ONE + RETURN + ENDIF + CALL JACOBF (PZI,PDZI,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,ZI) + CALL JACOBF (PZ,PDZ,PM1,PDM1,PM2,PDM2,NP,ALPHA,BETA,Z) + HGJD = PZ/(PDZI*(Z-ZI)) + RETURN + end function HGJD - REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA) + REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant HGLJ through @@ -637,28 +637,28 @@ REAL(KIND=RP) FUNCTION HGLJ (II,Z,ZGLJ,NP,ALPHA,BETA) ! Single precision version. ! !--------------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZD = NMAX) - REAL(KIND=RP) ZD,ZGLJD(NZD),ALPHAD,BETAD - REAL(KIND=RP) Z,ZGLJ(1),ALPHA,BETA - NPMAX = NZD - IF (NP.GT.NPMAX) THEN - WRITE (6,*) 'Too large polynomial degree in HGLJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here NP=',NP - call neko_error - ENDIF - ZD = Z - DO 100 I=1,NP - ZGLJD(I) = ZGLJ(I) - 100 CONTINUE - ALPHAD = ALPHA - BETAD = BETA - HGLJ = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD) - RETURN - END FUNCTION + PARAMETER (NMAX=84) + PARAMETER (NZD = NMAX) + REAL(KIND=RP) ZD,ZGLJD(NZD),ALPHAD,BETAD + REAL(KIND=RP) Z,ZGLJ(1),ALPHA,BETA + NPMAX = NZD + IF (NP.GT.NPMAX) THEN + WRITE (6,*) 'Too large polynomial degree in HGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NP=',NP + call neko_error + ENDIF + ZD = Z + DO 100 I=1,NP + ZGLJD(I) = ZGLJ(I) +100 CONTINUE + ALPHAD = ALPHA + BETAD = BETA + HGLJ = HGLJD (II,ZD,ZGLJD,NP,ALPHAD,BETAD) + RETURN + end function HGLJ - REAL(KIND=RP) FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA) + REAL(KIND=RP) FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant HGLJD through @@ -666,27 +666,27 @@ REAL(KIND=RP) FUNCTION HGLJD (I,Z,ZGLJ,NP,ALPHA,BETA) ! Double precision version. ! !--------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) Z,ZGLJ(1),ALPHA,BETA - EPS = 1.e-5 - ONE = 1. - ZI = ZGLJ(I) - DZ = Z-ZI - IF (ABS(DZ).LT.EPS) THEN - HGLJD = ONE - RETURN - ENDIF - N = NP-1 - DN = ((N)) - EIGVAL = -DN*(DN+ALPHA+BETA+ONE) - CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI) - CONST = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI - CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z) - HGLJD = (ONE-Z**2)*PD/(CONST*(Z-ZI)) - RETURN - END FUNCTION + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) Z,ZGLJ(1),ALPHA,BETA + EPS = 1.e-5 + ONE = 1. + ZI = ZGLJ(I) + DZ = Z-ZI + IF (ABS(DZ).LT.EPS) THEN + HGLJD = ONE + RETURN + ENDIF + N = NP-1 + DN = ((N)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,ZI) + CONST = EIGVAL*PI+ALPHA*(ONE+ZI)*PDI-BETA*(ONE-ZI)*PDI + CALL JACOBF (P,PD,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z) + HGLJD = (ONE-Z**2)*PD/(CONST*(Z-ZI)) + RETURN + end function HGLJD - SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA) + SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA) !----------------------------------------------------------------- ! ! Compute the derivative matrix D and its transpose DT @@ -696,41 +696,41 @@ SUBROUTINE DGJ (D,DT,Z,NZ,NZD,ALPHA,BETA) ! Single precision version. ! !----------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZDD = NMAX) - REAL(KIND=RP) DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD - REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL(KIND=RP) DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD + REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA - IF (NZ.LE.0) THEN - WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1' - call neko_error - ENDIF - IF (NZ .GT. NMAX) THEN - WRITE (6,*) 'Too large polynomial degree in DGJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here Nz=',Nz - call neko_error - ENDIF - IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN - WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1' - call neko_error - ENDIF - ALPHAD = ALPHA - BETAD = BETA - DO 100 I=1,NZ - ZD(I) = Z(I) - 100 CONTINUE - CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) - DO I=1,NZ - DO J=1,NZ - D(I,J) = DD(I,J) - DT(I,J) = DTD(I,J) - END DO - END DO - RETURN - END SUBROUTINE + IF (NZ.LE.0) THEN + WRITE (6,*) 'DGJ: Minimum number of Gauss points is 1' + call neko_error + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here Nz=',Nz + call neko_error + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGJ: Alpha and Beta must be greater than -1' + call neko_error + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) +100 CONTINUE + CALL DGJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO I=1,NZ + DO J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + END DO + END DO + RETURN + end subroutine DGJ - SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA) + SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA) !----------------------------------------------------------------- ! ! Compute the derivative matrix D and its transpose DT @@ -740,36 +740,36 @@ SUBROUTINE DGJD (D,DT,Z,NZ,NZD,ALPHA,BETA) ! Double precision version. ! !----------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA - N = NZ-1 - DN = ((N)) - ONE = 1. - TWO = 2. + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. - IF (NZ.LE.1) THEN + IF (NZ.LE.1) THEN WRITE (6,*) 'DGJD: Minimum number of Gauss-Lobatto points is 2' call neko_error - ENDIF - IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN - WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGJD: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - DO I=1,NZ - DO J=1,NZ - CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I)) - CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J)) - IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J))) - IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ & + DO I=1,NZ + DO J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NZ,ALPHA,BETA,Z(J)) + IF (I.NE.J) D(I,J) = PDI/(PDJ*(Z(I)-Z(J))) + IF (I.EQ.J) D(I,J) = ((ALPHA+BETA+TWO)*Z(I)+ALPHA-BETA)/ & (TWO*(ONE-Z(I)**2)) - DT(J,I) = D(I,J) - END DO - END DO - RETURN - END SUBROUTINE + DT(J,I) = D(I,J) + END DO + END DO + RETURN + end subroutine DGJD - SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA) + SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA) !----------------------------------------------------------------- ! ! Compute the derivative matrix D and its transpose DT @@ -779,41 +779,41 @@ SUBROUTINE DGLJ (D,DT,Z,NZ,NZD,ALPHA,BETA) ! Single precision version. ! !----------------------------------------------------------------- - PARAMETER (NMAX=84) - PARAMETER (NZDD = NMAX) - REAL(KIND=RP) DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD - REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + PARAMETER (NMAX=84) + PARAMETER (NZDD = NMAX) + REAL(KIND=RP) DD(NZDD,NZDD),DTD(NZDD,NZDD),ZD(NZDD),ALPHAD,BETAD + REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA - IF (NZ.LE.1) THEN + IF (NZ.LE.1) THEN WRITE (6,*) 'DGLJ: Minimum number of Gauss-Lobatto points is 2' call neko_error - ENDIF - IF (NZ .GT. NMAX) THEN - WRITE (6,*) 'Too large polynomial degree in DGLJ' - WRITE (6,*) 'Maximum polynomial degree is',NMAX - WRITE (6,*) 'Here NZ=',NZ - call neko_error - ENDIF - IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN - WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1' - call neko_error - ENDIF - ALPHAD = ALPHA - BETAD = BETA - DO 100 I=1,NZ - ZD(I) = Z(I) - 100 CONTINUE - CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) - DO I=1,NZ - DO J=1,NZ - D(I,J) = DD(I,J) - DT(I,J) = DTD(I,J) - END DO - END DO - RETURN - END SUBROUTINE + ENDIF + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Too large polynomial degree in DGLJ' + WRITE (6,*) 'Maximum polynomial degree is',NMAX + WRITE (6,*) 'Here NZ=',NZ + call neko_error + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE (6,*) 'DGLJ: Alpha and Beta must be greater than -1' + call neko_error + ENDIF + ALPHAD = ALPHA + BETAD = BETA + DO 100 I=1,NZ + ZD(I) = Z(I) +100 CONTINUE + CALL DGLJD (DD,DTD,ZD,NZ,NZDD,ALPHAD,BETAD) + DO I=1,NZ + DO J=1,NZ + D(I,J) = DD(I,J) + DT(I,J) = DTD(I,J) + END DO + END DO + RETURN + end subroutine DGLJ - SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA) + SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA) !----------------------------------------------------------------- ! ! Compute the derivative matrix D and its transpose DT @@ -823,44 +823,44 @@ SUBROUTINE DGLJD (D,DT,Z,NZ,NZD,ALPHA,BETA) ! Double precision version. ! !----------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA - N = NZ-1 - DN = ((N)) - ONE = 1. - TWO = 2. - EIGVAL = -DN*(DN+ALPHA+BETA+ONE) + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1),ALPHA,BETA + N = NZ-1 + DN = ((N)) + ONE = 1. + TWO = 2. + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) - IF (NZ.LE.1) THEN + IF (NZ.LE.1) THEN WRITE (6,*) 'DGLJD: Minimum number of Gauss-Lobatto points is 2' call neko_error - ENDIF - IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN - WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + ENDIF + IF ((ALPHA.LE.-ONE).OR.(BETA.LE.-ONE)) THEN + WRITE (6,*) 'DGLJD: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - DO I=1,NZ - DO J=1,NZ - CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I)) - CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J)) - CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI - CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ - IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J))) - IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) & + DO I=1,NZ + DO J=1,NZ + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,N,ALPHA,BETA,Z(J)) + CI = EIGVAL*PI-(BETA*(ONE-Z(I))-ALPHA*(ONE+Z(I)))*PDI + CJ = EIGVAL*PJ-(BETA*(ONE-Z(J))-ALPHA*(ONE+Z(J)))*PDJ + IF (I.NE.J) D(I,J) = CI/(CJ*(Z(I)-Z(J))) + IF ((I.EQ.J).AND.(I.NE.1).AND.(I.NE.NZ)) & D(I,J) = (ALPHA*(ONE+Z(I))-BETA*(ONE-Z(I)))/ & (TWO*(ONE-Z(I)**2)) - IF ((I.EQ.J).AND.(I.EQ.1)) & - D(I,J) = (EIGVAL+ALPHA)/(TWO*(BETA+TWO)) - IF ((I.EQ.J).AND.(I.EQ.NZ)) & - D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO)) - DT(J,I) = D(I,J) - END DO - END DO - RETURN - END SUBROUTINE + IF ((I.EQ.J).AND.(I.EQ.1)) & + D(I,J) = (EIGVAL+ALPHA)/(TWO*(BETA+TWO)) + IF ((I.EQ.J).AND.(I.EQ.NZ)) & + D(I,J) = -(EIGVAL+BETA)/(TWO*(ALPHA+TWO)) + DT(J,I) = D(I,J) + END DO + END DO + RETURN + end subroutine DGLJD - SUBROUTINE DGLL (D,DT,Z,NZ,NZD) + SUBROUTINE DGLL (D,DT,Z,NZ,NZD) !----------------------------------------------------------------- ! ! Compute the derivative matrix D and its transpose DT @@ -869,76 +869,76 @@ SUBROUTINE DGLL (D,DT,Z,NZ,NZD) ! Note: D and DT are square matrices. ! !----------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - PARAMETER (NMAX=84) - REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1) - N = NZ-1 - IF (NZ .GT. NMAX) THEN - WRITE (6,*) 'Subroutine DGLL' - WRITE (6,*) 'Maximum polynomial degree =',NMAX - WRITE (6,*) 'Polynomial degree =',NZ - ENDIF - IF (NZ .EQ. 1) THEN - D(1,1) = 0. - RETURN - ENDIF - FN = (N) - d0 = FN*(FN+1.)/4. - DO I=1,NZ - DO J=1,NZ - D(I,J) = 0. - IF (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ & + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + PARAMETER (NMAX=84) + REAL(KIND=RP) D(NZD,NZD),DT(NZD,NZD),Z(1) + N = NZ-1 + IF (NZ .GT. NMAX) THEN + WRITE (6,*) 'Subroutine DGLL' + WRITE (6,*) 'Maximum polynomial degree =',NMAX + WRITE (6,*) 'Polynomial degree =',NZ + ENDIF + IF (NZ .EQ. 1) THEN + D(1,1) = 0. + RETURN + ENDIF + FN = (N) + d0 = FN*(FN+1.)/4. + DO I=1,NZ + DO J=1,NZ + D(I,J) = 0. + IF (I.NE.J) D(I,J) = PNLEG(Z(I),N)/ & (PNLEG(Z(J),N)*(Z(I)-Z(J))) - IF ((I.EQ.J).AND.(I.EQ.1)) D(I,J) = -d0 - IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) = d0 - DT(J,I) = D(I,J) - END DO - END DO - RETURN - END SUBROUTINE + IF ((I.EQ.J).AND.(I.EQ.1)) D(I,J) = -d0 + IF ((I.EQ.J).AND.(I.EQ.NZ)) D(I,J) = d0 + DT(J,I) = D(I,J) + END DO + END DO + RETURN + end subroutine DGLL - REAL(KIND=RP) FUNCTION HGLL (I,Z,ZGLL,NZ) + REAL(KIND=RP) FUNCTION HGLL (I,Z,ZGLL,NZ) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant L through ! the NZ Gauss-Lobatto Legendre points ZGLL at the point Z. ! !--------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) ZGLL(1), EPS, DZ, Z - EPS = 1.E-5 - DZ = Z - ZGLL(I) - IF (ABS(DZ) .LT. EPS) THEN - HGLL = 1. - RETURN - ENDIF - N = NZ - 1 - ALFAN = (N)*((N)+1.) - HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ & + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) ZGLL(1), EPS, DZ, Z + EPS = 1.E-5 + DZ = Z - ZGLL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGLL = 1. + RETURN + ENDIF + N = NZ - 1 + ALFAN = (N)*((N)+1.) + HGLL = - (1.-Z*Z)*PNDLEG(Z,N)/ & (ALFAN*PNLEG(ZGLL(I),N)*(Z-ZGLL(I))) - RETURN - END FUNCTION + RETURN + end function HGLL - REAL(KIND=RP) FUNCTION HGL (I,Z,ZGL,NZ) + REAL(KIND=RP) FUNCTION HGL (I,Z,ZGL,NZ) !--------------------------------------------------------------------- ! ! Compute the value of the Lagrangian interpolant HGL through ! the NZ Gauss Legendre points ZGL at the point Z. ! !--------------------------------------------------------------------- - REAL(KIND=RP) ZGL(1), Z, EPS, DZ - EPS = 1.E-5 - DZ = Z - ZGL(I) - IF (ABS(DZ) .LT. EPS) THEN - HGL = 1. - RETURN - ENDIF - N = NZ-1 - HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I))) - RETURN - END FUNCTION + REAL(KIND=RP) ZGL(1), Z, EPS, DZ + EPS = 1.E-5 + DZ = Z - ZGL(I) + IF (ABS(DZ) .LT. EPS) THEN + HGL = 1. + RETURN + ENDIF + N = NZ-1 + HGL = PNLEG(Z,NZ)/(PNDLEG(ZGL(I),NZ)*(Z-ZGL(I))) + RETURN + end function HGL - REAL(KIND=RP) FUNCTION PNLEG (Z,N) + REAL(KIND=RP) FUNCTION PNLEG (Z,N) !--------------------------------------------------------------------- ! ! Compute the value of the Nth order Legendre polynomial at Z. @@ -947,33 +947,33 @@ REAL(KIND=RP) FUNCTION PNLEG (Z,N) ! !--------------------------------------------------------------------- ! -! This next statement is to overcome the underflow bug in the i860. +! This next statement is to overcome the underflow bug in the i860. ! It can be removed at a later date. 11 Aug 1990 pff. ! - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) Z, P1, P2, P3 - IF(ABS(Z) .LT. 1.0E-25) Z = 0.0 + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) Z, P1, P2, P3 + IF(ABS(Z) .LT. 1.0E-25) Z = 0.0 - P1 = 1. - IF (N.EQ.0) THEN - PNLEG = P1 - RETURN - ENDIF - P2 = Z - P3 = P2 - DO 10 K = 1, N-1 - FK = (K) - P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) - P1 = P2 - P2 = P3 - 10 CONTINUE - PNLEG = P3 - if (n.eq.0) pnleg = 1. - RETURN - END FUNCTION + P1 = 1. + IF (N.EQ.0) THEN + PNLEG = P1 + RETURN + ENDIF + P2 = Z + P3 = P2 + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P1 = P2 + P2 = P3 +10 CONTINUE + PNLEG = P3 + if (n.eq.0) pnleg = 1. + RETURN + end function PNLEG - REAL(KIND=RP) FUNCTION PNDLEG (Z,N) + REAL(KIND=RP) FUNCTION PNDLEG (Z,N) !---------------------------------------------------------------------- ! ! Compute the derivative of the Nth order Legendre polynomial at Z. @@ -981,28 +981,28 @@ REAL(KIND=RP) FUNCTION PNDLEG (Z,N) ! Based on the recursion formula for the Legendre polynomials. ! !---------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) P1, P2, P1D, P2D, P3D, Z - P1 = 1. - P2 = Z - P1D = 0. - P2D = 1. - P3D = 1. - DO 10 K = 1, N-1 - FK = (K) - P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) - P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.) - P1 = P2 - P2 = P3 - P1D = P2D - P2D = P3D - 10 CONTINUE - PNDLEG = P3D - IF (N.eq.0) pndleg = 0. - RETURN - END FUNCTION + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) P1, P2, P1D, P2D, P3D, Z + P1 = 1. + P2 = Z + P1D = 0. + P2D = 1. + P3D = 1. + DO 10 K = 1, N-1 + FK = (K) + P3 = ((2.*FK+1.)*Z*P2 - FK*P1)/(FK+1.) + P3D = ((2.*FK+1.)*P2 + (2.*FK+1.)*Z*P2D - FK*P1D)/(FK+1.) + P1 = P2 + P2 = P3 + P1D = P2D + P2D = P3D +10 CONTINUE + PNDLEG = P3D + IF (N.eq.0) pndleg = 0. + RETURN + end function PNDLEG - SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2) + SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2) !----------------------------------------------------------------------- ! ! Compute the (one-dimensional) derivative matrix D and its @@ -1014,32 +1014,32 @@ SUBROUTINE DGLLGL (D,DT,ZM1,ZM2,IM12,NZM1,NZM2,ND1,ND2) ! Note: D and DT are rectangular matrices. ! !----------------------------------------------------------------------- - REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1) - REAL(KIND=RP) EPS, ZP, ZQ - IF (NZM1.EQ.1) THEN - D (1,1) = 0. - DT(1,1) = 0. - RETURN - ENDIF - EPS = 1.E-6 - NM1 = NZM1-1 - DO IP = 1, NZM2 - DO JQ = 1, NZM1 - ZP = ZM2(IP) - ZQ = ZM1(JQ) - IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN - D(IP,JQ) = 0. - ELSE - D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) & + REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZM1(ND1), ZM2(ND2), IM12(ND2,ND1) + REAL(KIND=RP) EPS, ZP, ZQ + IF (NZM1.EQ.1) THEN + D (1,1) = 0. + DT(1,1) = 0. + RETURN + ENDIF + EPS = 1.E-6 + NM1 = NZM1-1 + DO IP = 1, NZM2 + DO JQ = 1, NZM1 + ZP = ZM2(IP) + ZQ = ZM1(JQ) + IF ((ABS(ZP) .LT. EPS).AND.(ABS(ZQ) .LT. EPS)) THEN + D(IP,JQ) = 0. + ELSE + D(IP,JQ) = (PNLEG(ZP,NM1)/PNLEG(ZQ,NM1) & -IM12(IP,JQ))/(ZP-ZQ) - ENDIF - DT(JQ,IP) = D(IP,JQ) - END DO - END DO - RETURN - END SUBROUTINE + ENDIF + DT(JQ,IP) = D(IP,JQ) + END DO + END DO + RETURN + end subroutine DGLLGL - SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) + SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) !----------------------------------------------------------------------- ! ! Compute the (one-dimensional) derivative matrix D and its @@ -1052,50 +1052,50 @@ SUBROUTINE DGLJGJ (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) ! Single precision version. ! !----------------------------------------------------------------------- - REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1) - PARAMETER (NMAX=84) - PARAMETER (NDD = NMAX) - REAL(KIND=RP) DD(NDD,NDD), DTD(NDD,NDD) - REAL(KIND=RP) ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD) - REAL(KIND=RP) ALPHAD, BETAD + REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2), IGLG(ND2,ND1) + PARAMETER (NMAX=84) + PARAMETER (NDD = NMAX) + REAL(KIND=RP) DD(NDD,NDD), DTD(NDD,NDD) + REAL(KIND=RP) ZGD(NDD), ZGLD(NDD), IGLGD(NDD,NDD) + REAL(KIND=RP) ALPHAD, BETAD - IF (NPGL.LE.1) THEN + IF (NPGL.LE.1) THEN WRITE(6,*) 'DGLJGJ: Minimum number of Gauss-Lobatto points is 2' call neko_error - ENDIF - IF (NPGL.GT.NMAX) THEN - WRITE(6,*) 'Polynomial degree too high in DGLJGJ' - WRITE(6,*) 'Maximum polynomial degree is',NMAX - WRITE(6,*) 'Here NPGL=',NPGL - call neko_error - ENDIF - IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN - WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + ENDIF + IF (NPGL.GT.NMAX) THEN + WRITE(6,*) 'Polynomial degree too high in DGLJGJ' + WRITE(6,*) 'Maximum polynomial degree is',NMAX + WRITE(6,*) 'Here NPGL=',NPGL + call neko_error + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJ: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - ALPHAD = ALPHA - BETAD = BETA - DO I=1,NPG - ZGD(I) = ZG(I) - DO J=1,NPGL - IGLGD(I,J) = IGLG(I,J) - END DO - END DO - DO 200 I=1,NPGL - ZGLD(I) = ZGL(I) - 200 CONTINUE - CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD) - DO I=1,NPG - DO J=1,NPGL - D(I,J) = DD(I,J) - DT(J,I) = DTD(J,I) - END DO - END DO - RETURN - END SUBROUTINE + ALPHAD = ALPHA + BETAD = BETA + DO I=1,NPG + ZGD(I) = ZG(I) + DO J=1,NPGL + IGLGD(I,J) = IGLG(I,J) + END DO + END DO + DO 200 I=1,NPGL + ZGLD(I) = ZGL(I) +200 CONTINUE + CALL DGLJGJD (DD,DTD,ZGLD,ZGD,IGLGD,NPGL,NPG,NDD,NDD,ALPHAD,BETAD) + DO I=1,NPG + DO J=1,NPGL + D(I,J) = DD(I,J) + DT(J,I) = DTD(J,I) + END DO + END DO + RETURN + end subroutine DGLJGJ - SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) + SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) !----------------------------------------------------------------------- ! ! Compute the (one-dimensional) derivative matrix D and its @@ -1108,48 +1108,48 @@ SUBROUTINE DGLJGJD (D,DT,ZGL,ZG,IGLG,NPGL,NPG,ND1,ND2,ALPHA,BETA) ! Double precision version. ! !----------------------------------------------------------------------- - IMPLICIT REAL(KIND=RP) (A-H,O-Z) - REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2) - REAL(KIND=RP) IGLG(ND2,ND1), ALPHA, BETA + IMPLICIT REAL(KIND=RP) (A-H,O-Z) + REAL(KIND=RP) D(ND2,ND1), DT(ND1,ND2), ZGL(ND1), ZG(ND2) + REAL(KIND=RP) IGLG(ND2,ND1), ALPHA, BETA - IF (NPGL.LE.1) THEN + IF (NPGL.LE.1) THEN WRITE(6,*) 'DGLJGJD: Minimum number of Gauss-Lobatto points is 2' call neko_error - ENDIF - IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN - WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1' - call neko_error - ENDIF + ENDIF + IF ((ALPHA.LE.-1.).OR.(BETA.LE.-1.)) THEN + WRITE(6,*) 'DGLJGJD: Alpha and Beta must be greater than -1' + call neko_error + ENDIF - EPS = 1.e-6 - ONE = 1. - TWO = 2. - NGL = NPGL-1 - DN = ((NGL)) - EIGVAL = -DN*(DN+ALPHA+BETA+ONE) + EPS = 1.e-6 + ONE = 1. + TWO = 2. + NGL = NPGL-1 + DN = ((NGL)) + EIGVAL = -DN*(DN+ALPHA+BETA+ONE) - DO I=1,NPG - DO J=1,NPGL - DZ = ABS(ZG(I)-ZGL(J)) - IF (DZ.LT.EPS) THEN - D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ & + DO I=1,NPG + DO J=1,NPGL + DZ = ABS(ZG(I)-ZGL(J)) + IF (DZ.LT.EPS) THEN + D(I,J) = (ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)))/ & (TWO*(ONE-ZG(I)**2)) - ELSE - CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I)) - CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J)) - FACI = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)) - FACJ = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J)) - CONST = EIGVAL*PJ+FACJ*PDJ - D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) & + ELSE + CALL JACOBF (PI,PDI,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZG(I)) + CALL JACOBF (PJ,PDJ,PM1,PDM1,PM2,PDM2,NGL,ALPHA,BETA,ZGL(J)) + FACI = ALPHA*(ONE+ZG(I))-BETA*(ONE-ZG(I)) + FACJ = ALPHA*(ONE+ZGL(J))-BETA*(ONE-ZGL(J)) + CONST = EIGVAL*PJ+FACJ*PDJ + D(I,J) = ((EIGVAL*PI+FACI*PDI)*(ZG(I)-ZGL(J)) & -(ONE-ZG(I)**2)*PDI)/(CONST*(ZG(I)-ZGL(J))**2) - ENDIF - DT(J,I) = D(I,J) - END DO - END DO - RETURN - END SUBROUTINE + ENDIF + DT(J,I) = D(I,J) + END DO + END DO + RETURN + end subroutine DGLJGJD - SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) + SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) !---------------------------------------------------------------------- ! ! Compute the one-dimensional interpolation operator (matrix) I12 @@ -1159,23 +1159,23 @@ SUBROUTINE IGLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) ! Z2 : NZ2 points on mesh M. ! !-------------------------------------------------------------------- - REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2), ZI - IF (NZ1 .EQ. 1) THEN - I12 (1,1) = 1. - IT12(1,1) = 1. - RETURN - ENDIF - DO I=1,NZ2 - ZI = Z2(I) - DO J=1,NZ1 - I12 (I,J) = HGL(J,ZI,Z1,NZ1) - IT12(J,I) = I12(I,J) - END DO - END DO - RETURN - END SUBROUTINE + REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2), ZI + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO I=1,NZ2 + ZI = Z2(I) + DO J=1,NZ1 + I12 (I,J) = HGL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + END DO + END DO + RETURN + end subroutine IGLM - SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) + SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) !---------------------------------------------------------------------- ! ! Compute the one-dimensional interpolation operator (matrix) I12 @@ -1185,23 +1185,23 @@ SUBROUTINE IGLLM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2) ! Z2 : NZ2 points on mesh M. ! !-------------------------------------------------------------------- - REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI - IF (NZ1 .EQ. 1) THEN - I12 (1,1) = 1. - IT12(1,1) = 1. - RETURN - ENDIF - DO I=1,NZ2 - ZI = Z2(I) - DO J=1,NZ1 - I12 (I,J) = HGLL(J,ZI,Z1,NZ1) - IT12(J,I) = I12(I,J) - END DO - END DO - RETURN - END SUBROUTINE + REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO I=1,NZ2 + ZI = Z2(I) + DO J=1,NZ1 + I12 (I,J) = HGLL(J,ZI,Z1,NZ1) + IT12(J,I) = I12(I,J) + END DO + END DO + RETURN + end subroutine IGLLM - SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) + SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) !---------------------------------------------------------------------- ! ! Compute the one-dimensional interpolation operator (matrix) I12 @@ -1212,23 +1212,23 @@ SUBROUTINE IGJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) ! Single precision version. ! !-------------------------------------------------------------------- - REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA - IF (NZ1 .EQ. 1) THEN - I12 (1,1) = 1. - IT12(1,1) = 1. - RETURN - ENDIF - DO I=1,NZ2 - ZI = Z2(I) - DO J=1,NZ1 - I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA) - IT12(J,I) = I12(I,J) - END DO - END DO - RETURN - END SUBROUTINE + REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO I=1,NZ2 + ZI = Z2(I) + DO J=1,NZ1 + I12 (I,J) = HGJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + END DO + END DO + RETURN + end subroutine IGJM - SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) + SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) !---------------------------------------------------------------------- ! ! Compute the one-dimensional interpolation operator (matrix) I12 @@ -1239,19 +1239,19 @@ SUBROUTINE IGLJM (I12,IT12,Z1,Z2,NZ1,NZ2,ND1,ND2,ALPHA,BETA) ! Single precision version. ! !-------------------------------------------------------------------- - REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA - IF (NZ1 .EQ. 1) THEN - I12 (1,1) = 1. - IT12(1,1) = 1. - RETURN - ENDIF - DO I=1,NZ2 - ZI = Z2(I) - DO J=1,NZ1 - I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA) - IT12(J,I) = I12(I,J) - END DO - END DO - RETURN - END SUBROUTINE -end module + REAL(KIND=RP) I12(ND2,ND1),IT12(ND1,ND2),Z1(ND1),Z2(ND2),ZI,ALPHA,BETA + IF (NZ1 .EQ. 1) THEN + I12 (1,1) = 1. + IT12(1,1) = 1. + RETURN + ENDIF + DO I=1,NZ2 + ZI = Z2(I) + DO J=1,NZ1 + I12 (I,J) = HGLJ(J,ZI,Z1,NZ1,ALPHA,BETA) + IT12(J,I) = I12(I,J) + END DO + END DO + RETURN + end subroutine IGLJM +end module speclib diff --git a/src/sem/spectral_error_indicator.f90 b/src/sem/spectral_error_indicator.f90 index 637e11e270e..f86a3dafedd 100644 --- a/src/sem/spectral_error_indicator.f90 +++ b/src/sem/spectral_error_indicator.f90 @@ -33,7 +33,6 @@ !> Implements type spectral_error_indicator_t. module spectral_error_indicator use num_types, only: rp - use logger, only: neko_log, LOG_SIZE use field, only: field_t use coefs, only: coef_t use field_list, only: field_list_t @@ -52,11 +51,11 @@ module spectral_error_indicator !> Provides tools to calculate the spectral error indicator !! @details - !! This is a posteriori error measure, based on the local properties of + !! This is a posteriori error measure, based on the local properties of !! the spectral solution, which was developed by Mavriplis. This method - !! formally only gives an indication of the error. + !! formally only gives an indication of the error. type, public :: spectral_error_indicator_t - !> Pointers to main fields + !> Pointers to main fields type(field_t), pointer :: u => null() type(field_t), pointer :: v => null() type(field_t), pointer :: w => null() @@ -140,22 +139,22 @@ subroutine spec_err_ind_init(this, u,v,w,coef) LZ1 => coef%Xh%lz, & SERI_SMALL => this%SERI_SMALL, & SERI_SMALLR => this%SERI_SMALLR, & - SERI_SMALLG => this%SERI_SMALLG, & - SERI_SMALLS => this%SERI_SMALLS, & + SERI_SMALLG => this%SERI_SMALLG, & + SERI_SMALLS => this%SERI_SMALLS, & SERI_NP => this%SERI_NP, & SERI_NP_MAX => this%SERI_NP_MAX, & SERI_ELR => this%SERI_ELR & - ) + ) ! correctness check if (SERI_NP.gt.SERI_NP_MAX) then - if (pe_rank.eq.0) write(*,*) 'SETI_NP greater than SERI_NP_MAX' - endif - il = SERI_NP+SERI_ELR - jl = min(LX1,LY1) - jl = min(jl,LZ1) - if (il.gt.jl) then - if (pe_rank.eq.0) write(*,*) 'SERI_NP+SERI_ELR greater than L?1' - endif + if (pe_rank.eq.0) write(*,*) 'SETI_NP greater than SERI_NP_MAX' + endif + il = SERI_NP+SERI_ELR + jl = min(LX1,LY1) + jl = min(jl,LZ1) + if (il.gt.jl) then + if (pe_rank.eq.0) write(*,*) 'SERI_NP+SERI_ELR greater than L?1' + endif end associate !> Initialize the list that holds the fields to write @@ -173,27 +172,27 @@ subroutine spec_err_ind_free(this) class(spectral_error_indicator_t), intent(inout) :: this if(allocated(this%eind_u)) then - deallocate(this%eind_u) + deallocate(this%eind_u) end if if(allocated(this%eind_v)) then - deallocate(this%eind_v) + deallocate(this%eind_v) end if if(allocated(this%eind_w)) then - deallocate(this%eind_w) + deallocate(this%eind_w) end if if(allocated(this%sig_u)) then - deallocate(this%sig_u) + deallocate(this%sig_u) end if if(allocated(this%sig_w)) then - deallocate(this%sig_w) + deallocate(this%sig_w) end if if(allocated(this%sig_w)) then - deallocate(this%sig_w) + deallocate(this%sig_w) end if call this%u_hat%free() @@ -204,11 +203,11 @@ subroutine spec_err_ind_free(this) nullify(this%u) nullify(this%v) nullify(this%w) - - !> finalize data related to writing + + !> finalize data related to writing call list_final3(this%speri_l) call file_free(this%mf_speri) - + end subroutine spec_err_ind_free !> Transform a field u > u_hat into physical or spectral space @@ -223,9 +222,8 @@ subroutine transform_to_spec_or_phys(u_hat, u, wk, coef, space) type(field_t), intent(inout) :: u type(field_t), intent(inout) :: wk type(coef_t), intent(inout) :: coef - character(len=4), intent(in) :: space + character(len=4), intent(in) :: space integer :: i, j, k, e, nxyz, nelv, n - character(len=LOG_SIZE) :: log_buf !> Define some constants nxyz = coef%Xh%lx*coef%Xh%lx*coef%Xh%lx @@ -234,28 +232,28 @@ subroutine transform_to_spec_or_phys(u_hat, u, wk, coef, space) !> Copy field to working array if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & - (NEKO_BCKND_OPENCL .eq. 1)) then - call device_copy(wk%x_d, u%x_d, n) + (NEKO_BCKND_OPENCL .eq. 1)) then + call device_copy(wk%x_d, u%x_d, n) else - call copy(wk%x,u%x,n) + call copy(wk%x,u%x,n) end if select case(space) - case('spec') - call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, & + case('spec') + call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, & coef%Xh%lx,coef%Xh%vinv, & coef%Xh%vinvt, coef%Xh%vinvt, nelv) - case('phys') - call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, & + case('phys') + call tnsr3d(u_hat%x, coef%Xh%lx, wk%x, & coef%Xh%lx,coef%Xh%v, & coef%Xh%vt, coef%Xh%vt, nelv) end select ! Synchronize if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & - (NEKO_BCKND_OPENCL .eq. 1)) then + (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(u_hat%x,u_hat%x_d, n, & + call device_memcpy(u_hat%x,u_hat%x_d, n, & DEVICE_TO_HOST, sync=.true.) end if @@ -272,8 +270,8 @@ subroutine spec_err_ind_get(this,coef) call transform_to_spec_or_phys(this%u_hat, this%u, this%wk, coef, 'spec') call transform_to_spec_or_phys(this%v_hat, this%v, this%wk, coef, 'spec') call transform_to_spec_or_phys(this%w_hat, this%w, this%wk, coef, 'spec') - - + + ! Get the spectral error indicator call calculate_indicators(this, coef, this%eind_u, this%sig_u, coef%msh%nelv, & coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & @@ -284,7 +282,7 @@ subroutine spec_err_ind_get(this,coef) call calculate_indicators(this, coef, this%eind_w, this%sig_w, coef%msh%nelv, & coef%Xh%lx, coef%Xh%ly, coef%Xh%lz, & this%w_hat%x) - + end subroutine spec_err_ind_get !> Write error indicators in a field file. @@ -292,15 +290,15 @@ end subroutine spec_err_ind_get subroutine spec_err_ind_write(this, t) class(spectral_error_indicator_t), intent(inout) :: this real(kind=rp), intent(in) :: t - + integer i, e integer lx, ly, lz, nelv - + lx = this%u_hat%Xh%lx ly = this%u_hat%Xh%ly lz = this%u_hat%Xh%lz nelv = this%u_hat%msh%nelv - + !> Copy the element indicator into all points of the field do e = 1,nelv do i = 1,lx*ly*lx @@ -313,7 +311,7 @@ subroutine spec_err_ind_write(this, t) !> Write the file !! Remember that the list is already ponting to the fields !! that were just modified. - call this%mf_speri%write(this%speri_l,t) + call this%mf_speri%write(this%speri_l,t) end subroutine spec_err_ind_write @@ -336,19 +334,19 @@ subroutine calculate_indicators(this, coef, eind, sig, lnelt, LX1, LY1, LZ1, var real(kind=rp) :: eind(lnelt) real(kind=rp) :: sig(lnelt) real(kind=rp) :: var(LX1,LY1,LZ1,lnelt) - + real(kind=rp) :: xa(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz) real(kind=rp) :: xb(coef%Xh%lx,coef%Xh%ly,coef%Xh%lz) integer :: i, e - + ! zero arrays call rzero(eind,lnelt) call rzero(sig,lnelt) - + ! Get the indicator call speri_var(this, eind,sig,var,lnelt,xa,xb, LX1, LY1, LZ1) - end subroutine calculate_indicators + end subroutine calculate_indicators !> Calculate the indicator in a specified variable @@ -372,7 +370,7 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1) real(kind=rp) :: var(LX1,LY1,LZ1,nell) real(kind=rp) :: xa(LX1,LY1,LZ1) real(kind=rp) :: xb(LX1,LY1,LZ1) - + !> local variables integer :: il, jl, kl, ll, j_st, j_en, ii !> polynomial coefficients @@ -380,7 +378,7 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1) !> Legendre coefficients; first value coeff(1,1,1) real(kind=rp) :: coef11 !> copy of last SERI_NP columns of coefficients - real(kind=rp) :: coefx(this%SERI_NP_MAX,LY1,LZ1), & + real(kind=rp) :: coefx(this%SERI_NP_MAX,LY1,LZ1), & coefy(this%SERI_NP_MAX,LX1,LZ1), & coefz(this%SERI_NP_MAX,LX1,LY1) !> estimated error @@ -392,83 +390,83 @@ subroutine speri_var(this, est,sig,var,nell,xa,xb,LX1,LY1,LZ1) !> loop over elements do il = 1,nell - !> go to Legendre space (done in two operations) - !! and square the coefficient - do ii = 1, LX1*LY1*LZ1 - coeff(ii,1,1) = var(ii,1,1,il) * var(ii,1,1,il) - end do - - !> lower left corner - coef11 = coeff(1,1,1) - - !> small value; nothing to od - if (coef11.ge.this%SERI_SMALL) then - !> extrapolate coefficients - !> X - direction - !> copy last SERI_NP collumns (or less if NX1 is smaller) - !> SERI_ELR allows to exclude last row - j_st = max(1,LX1-this%SERI_NP+1-this%SERI_ELR) - j_en = max(1,LX1-this%SERI_ELR) - do ll = 1,LZ1 - do kl = 1,LY1 - do jl = j_st,j_en - coefx(j_en-jl+1,kl,ll) = coeff(jl,kl,ll) - enddo + !> go to Legendre space (done in two operations) + !! and square the coefficient + do ii = 1, LX1*LY1*LZ1 + coeff(ii,1,1) = var(ii,1,1,il) * var(ii,1,1,il) + end do + + !> lower left corner + coef11 = coeff(1,1,1) + + !> small value; nothing to od + if (coef11.ge.this%SERI_SMALL) then + !> extrapolate coefficients + !> X - direction + !> copy last SERI_NP collumns (or less if NX1 is smaller) + !> SERI_ELR allows to exclude last row + j_st = max(1,LX1-this%SERI_NP+1-this%SERI_ELR) + j_en = max(1,LX1-this%SERI_ELR) + do ll = 1,LZ1 + do kl = 1,LY1 + do jl = j_st,j_en + coefx(j_en-jl+1,kl,ll) = coeff(jl,kl,ll) + enddo + enddo enddo - enddo - !> get extrapolated values - call speri_extrap(this,estx,sigx,coef11,coefx, & + !> get extrapolated values + call speri_extrap(this,estx,sigx,coef11,coefx, & j_st,j_en,LY1,LZ1) - - !> Y - direction - !> copy last SERI_NP collumns (or less if NY1 is smaller) - !> SERI_ELR allows to exclude last row - j_st = max(1,LY1-this%SERI_NP+1-this%SERI_ELR) - j_en = max(1,LY1-this%SERI_ELR) - do ll = 1,LZ1 - do kl = j_st,j_en - do jl = 1,LX1 - coefy(j_en-kl+1,jl,ll) = coeff(jl,kl,ll) - enddo + + !> Y - direction + !> copy last SERI_NP collumns (or less if NY1 is smaller) + !> SERI_ELR allows to exclude last row + j_st = max(1,LY1-this%SERI_NP+1-this%SERI_ELR) + j_en = max(1,LY1-this%SERI_ELR) + do ll = 1,LZ1 + do kl = j_st,j_en + do jl = 1,LX1 + coefy(j_en-kl+1,jl,ll) = coeff(jl,kl,ll) + enddo + enddo enddo - enddo - !> get extrapolated values - call speri_extrap(this, esty,sigy,coef11,coefy, & + !> get extrapolated values + call speri_extrap(this, esty,sigy,coef11,coefy, & j_st,j_en,LX1,LZ1) - - !> Z - direction - !> copy last SERI_NP collumns (or less if NZ1 is smaller) - !> SERI_ELR allows to exclude last row - j_st = max(1,LZ1-this%SERI_NP+1-this%SERI_ELR) - j_en = max(1,LZ1-this%SERI_ELR) - do ll = j_st,j_en - do kl = 1,LY1 - do jl = 1,LX1 - coefz(j_en-ll+1,jl,kl) = coeff(jl,kl,ll) - enddo + + !> Z - direction + !> copy last SERI_NP collumns (or less if NZ1 is smaller) + !> SERI_ELR allows to exclude last row + j_st = max(1,LZ1-this%SERI_NP+1-this%SERI_ELR) + j_en = max(1,LZ1-this%SERI_ELR) + do ll = j_st,j_en + do kl = 1,LY1 + do jl = 1,LX1 + coefz(j_en-ll+1,jl,kl) = coeff(jl,kl,ll) + enddo + enddo enddo - enddo - !> get extrapolated values - call speri_extrap(this, estz,sigz,coef11,coefz, & + !> get extrapolated values + call speri_extrap(this, estz,sigz,coef11,coefz, & j_st,j_en,LX1,LY1) - !> average - est(il) = sqrt(estx + esty + estz) - sig(il) = third*(sigx + sigy + sigz) - - else - !> for testing - estx = 0.0 - esty = 0.0 - estz = 0.0 - sigx = -1.0 - sigy = -1.0 - sigz = -1.0 - !> for testing; end - - est(il) = 0.0 - sig(il) = -1.0 - endif + !> average + est(il) = sqrt(estx + esty + estz) + sig(il) = third*(sigx + sigy + sigz) + + else + !> for testing + estx = 0.0 + esty = 0.0 + estz = 0.0 + sigx = -1.0 + sigy = -1.0 + sigz = -1.0 + !> for testing; end + + est(il) = 0.0 + sig(il) = -1.0 + endif end do @@ -504,15 +502,15 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, & real(kind=rp) :: sumtmp(4), cffl(this%SERI_NP_MAX) real(kind=rp) :: stmp, estt, clog, ctmp, cave, erlog logical :: cuse(this%SERI_NP_MAX) - + associate(SERI_SMALL => this%SERI_SMALL, & SERI_SMALLR => this%SERI_SMALLR, & - SERI_SMALLG => this%SERI_SMALLG, & - SERI_SMALLS => this%SERI_SMALLS, & + SERI_SMALLG => this%SERI_SMALLG, & + SERI_SMALLS => this%SERI_SMALLS, & SERI_NP => this%SERI_NP, & SERI_NP_MAX => this%SERI_NP_MAX, & SERI_ELR => this%SERI_ELR & - ) + ) ! initial values estx = 0.0 sigx = -1.0 @@ -533,121 +531,121 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, & ! loop over all face points nzlt = max(1,nzl - SERI_ELR) ! for 2D runs do il=1,nzlt - ! weight - rtmp3 = 1.0/(2.0*(il-1)+1.0) - do jl=1,nyl - SERI_ELR - - ! find min and max coef along single row - cffl(1) = coef(1,jl,il) - cmin = cffl(1) - cmax = cmin - do kl =2,pnr - cffl(kl) = coef(kl,jl,il) - cmin = min(cmin,cffl(kl)) - cmax = max(cmax,cffl(kl)) - enddo - - ! are coefficients sufficiently big - if((cmin.gt.0.0).and.(cmax.gt.smallr)) then - ! mark array position we use in iterpolation - do kl =1,pnr - cuse(kl) = .TRUE. + ! weight + rtmp3 = 1.0/(2.0*(il-1)+1.0) + do jl=1,nyl - SERI_ELR + + ! find min and max coef along single row + cffl(1) = coef(1,jl,il) + cmin = cffl(1) + cmax = cmin + do kl =2,pnr + cffl(kl) = coef(kl,jl,il) + cmin = min(cmin,cffl(kl)) + cmax = max(cmax,cffl(kl)) enddo - ! max n for polynomial order - cnm = real(ix_en) - - ! check if all the points should be taken into account - ! in original code by Catherine Mavriplis this part is written - ! for 4 points, so I place if statement first - if (pnr.eq.4) then - ! should we neglect last values - if ((cffl(1).lt.smallr).and. & + + ! are coefficients sufficiently big + if((cmin.gt.0.0).and.(cmax.gt.smallr)) then + ! mark array position we use in iterpolation + do kl =1,pnr + cuse(kl) = .TRUE. + enddo + ! max n for polynomial order + cnm = real(ix_en) + + ! check if all the points should be taken into account + ! in original code by Catherine Mavriplis this part is written + ! for 4 points, so I place if statement first + if (pnr.eq.4) then + ! should we neglect last values + if ((cffl(1).lt.smallr).and. & (cffl(2).lt.smallr)) then - if (cffl(3).lt.smallr) then - cuse(1) = .FALSE. - cuse(2) = .FALSE. - cnm = real(ix_en-2) - else - cuse(1) = .FALSE. - cnm = real(ix_en-1) - endif - else - ! should we take stronger gradient - if ((cffl(1)/cffl(2).lt.SERI_SMALLG).and. & + if (cffl(3).lt.smallr) then + cuse(1) = .FALSE. + cuse(2) = .FALSE. + cnm = real(ix_en-2) + else + cuse(1) = .FALSE. + cnm = real(ix_en-1) + endif + else + ! should we take stronger gradient + if ((cffl(1)/cffl(2).lt.SERI_SMALLG).and. & (cffl(3)/cffl(4).lt.SERI_SMALLG)) then - cuse(1) = .FALSE. - cuse(3) = .FALSE. - cnm = real(ix_en-1) - elseif ((cffl(2)/cffl(1).lt.SERI_SMALLG).and. & + cuse(1) = .FALSE. + cuse(3) = .FALSE. + cnm = real(ix_en-1) + elseif ((cffl(2)/cffl(1).lt.SERI_SMALLG).and. & (cffl(4)/cffl(3).lt.SERI_SMALLG)) then - cuse(2) = .FALSE. - cuse(4) = .FALSE. - endif - endif - endif - - ! get sigma for given face point - do kl =1,4 - sumtmp(kl) = 0.0 - enddo - ! find new min and count number of points - cmin = cmax - cmax = 0.0 - do kl =1,pnr - if(cuse(kl)) then - rtmp = real(ix_en-kl) - rtmp2 = log(cffl(kl)) - sumtmp(1) = sumtmp(1) +rtmp2 - sumtmp(2) = sumtmp(2) +rtmp - sumtmp(3) = sumtmp(3) +rtmp*rtmp - sumtmp(4) = sumtmp(4) +rtmp2*rtmp - ! find new min and count used points - cmin = min(cffl(kl),cmin) - cmax = cmax + 1.0 - endif - enddo - ! decay rate along single row - stmp = (sumtmp(1)*sumtmp(2) - sumtmp(4)*cmax)/ & + cuse(2) = .FALSE. + cuse(4) = .FALSE. + endif + endif + endif + + ! get sigma for given face point + do kl =1,4 + sumtmp(kl) = 0.0 + enddo + ! find new min and count number of points + cmin = cmax + cmax = 0.0 + do kl =1,pnr + if(cuse(kl)) then + rtmp = real(ix_en-kl) + rtmp2 = log(cffl(kl)) + sumtmp(1) = sumtmp(1) +rtmp2 + sumtmp(2) = sumtmp(2) +rtmp + sumtmp(3) = sumtmp(3) +rtmp*rtmp + sumtmp(4) = sumtmp(4) +rtmp2*rtmp + ! find new min and count used points + cmin = min(cffl(kl),cmin) + cmax = cmax + 1.0 + endif + enddo + ! decay rate along single row + stmp = (sumtmp(1)*sumtmp(2) - sumtmp(4)*cmax)/ & (sumtmp(3)*cmax - sumtmp(2)*sumtmp(2)) - ! for averaging - sigt = sigt + stmp - nsigt = nsigt + 1 - - ! get error estimator depending on calculated decay rate - estt = 0.0 - if (stmp.lt.SERI_SMALLS) then - estt = cmin - else - ! get averaged constant in front of c*exp(-sig*n) - clog = (sumtmp(1)+stmp*sumtmp(2))/cmax - ctmp = exp(clog) - ! average exponent - cave = sumtmp(1)/cmax - ! check quality of approximation comparing is to the constant cave - do kl =1,2 - sumtmp(kl) = 0.0 - enddo - do kl =1,pnr - if(cuse(kl)) then - erlog = clog - stmp*real(ix_en-kl) - sumtmp(1) = sumtmp(1)+ & + ! for averaging + sigt = sigt + stmp + nsigt = nsigt + 1 + + ! get error estimator depending on calculated decay rate + estt = 0.0 + if (stmp.lt.SERI_SMALLS) then + estt = cmin + else + ! get averaged constant in front of c*exp(-sig*n) + clog = (sumtmp(1)+stmp*sumtmp(2))/cmax + ctmp = exp(clog) + ! average exponent + cave = sumtmp(1)/cmax + ! check quality of approximation comparing is to the constant cave + do kl =1,2 + sumtmp(kl) = 0.0 + enddo + do kl =1,pnr + if(cuse(kl)) then + erlog = clog - stmp*real(ix_en-kl) + sumtmp(1) = sumtmp(1)+ & (erlog-log(cffl(kl)))**2 - sumtmp(2) = sumtmp(2)+ & + sumtmp(2) = sumtmp(2)+ & (erlog-cave)**2 - endif - enddo - rtmp = 1.0 - sumtmp(1)/sumtmp(2) - if (rtmp.lt.SERI_SMALLS) then - estt = cmin - else - ! last coefficient is not included in error estimator - estt = ctmp/stmp*exp(-stmp*cnm) - endif - endif - ! add contribution to error estimator; variable weight - estx = estx + estt/(2.0*(jl-1)+1.0)*rtmp3 - endif ! if((cmin.gt.0.0).and.(cmax.gt.smallr)) - enddo + endif + enddo + rtmp = 1.0 - sumtmp(1)/sumtmp(2) + if (rtmp.lt.SERI_SMALLS) then + estt = cmin + else + ! last coefficient is not included in error estimator + estt = ctmp/stmp*exp(-stmp*cnm) + endif + endif + ! add contribution to error estimator; variable weight + estx = estx + estt/(2.0*(jl-1)+1.0)*rtmp3 + endif ! if((cmin.gt.0.0).and.(cmax.gt.smallr)) + enddo enddo ! constant weight ! Multiplication by 4 in 2D / 8 in 3D @@ -659,7 +657,7 @@ subroutine speri_extrap(this,estx,sigx,coef11,coef, & ! final everaging ! sigt = 2*sigma so we divide by 2 if (nsigt.gt.0) then - sigx = 0.5*sigt/nsigt + sigx = 0.5*sigt/nsigt endif end associate diff --git a/src/simulation.f90 b/src/simulation.f90 index b57f744a6bb..db36125d75e 100644 --- a/src/simulation.f90 +++ b/src/simulation.f90 @@ -44,34 +44,42 @@ module simulation use field, only : field_t use profiler use math, only : col2 - use simulation_component_global, only : neko_simcomps - use json_module, only : json_file_t => json_file + use simcomp_executor, only : neko_simcomps use json_utils, only : json_get_or_default + use time_step_controller implicit none private public :: neko_solve - + contains !> Main driver to solve a case @a C subroutine neko_solve(C) - implicit none - type(case_t), intent(inout) :: C + type(case_t), target, intent(inout) :: C real(kind=rp) :: t, cfl real(kind=dp) :: start_time_org, start_time, end_time - character(len=LOG_SIZE) :: log_buf + character(len=LOG_SIZE) :: log_buf integer :: tstep, i character(len=:), allocatable :: restart_file logical :: output_at_end, found + ! for variable_tsteping + real(kind=rp) :: cfl_avrg = 0.0_rp + type(time_step_controller_t) :: dt_controller t = 0d0 tstep = 0 call neko_log%section('Starting simulation') write(log_buf,'(A, E15.7,A,E15.7,A)') 'T : [', 0d0,',',C%end_time,')' call neko_log%message(log_buf) - write(log_buf,'(A, E15.7)') 'dt : ', C%dt - call neko_log%message(log_buf) + call dt_controller%init(C%params) + if (.not. dt_controller%if_variable_dt) then + write(log_buf,'(A, E15.7)') 'dt : ', C%dt + call neko_log%message(log_buf) + else + write(log_buf,'(A, E15.7)') 'CFL : ', dt_controller%set_cfl + call neko_log%message(log_buf) + end if call C%params%get('case.restart_file', restart_file, found) if (found .and. len_trim(restart_file) .gt. 0) then @@ -79,15 +87,11 @@ subroutine neko_solve(C) call simulation_restart(C, t) ! Restart the simulation components - if (allocated(neko_simcomps)) then - do i=1, size(neko_simcomps) - call neko_simcomps(i)%simcomp%restart(t) - end do - end if + call neko_simcomps%restart(t) end if !> Call stats, samplers and user-init before time loop - call neko_log%section('Postprocessing') + call neko_log%section('Postprocessing') call C%q%eval(t, C%dt, tstep) call C%s%sample(t, tstep) @@ -97,13 +101,20 @@ subroutine neko_solve(C) call neko_log%newline() call profiler_start - + cfl = C%fluid%compute_cfl(C%dt) start_time_org = MPI_WTIME() + do while (t .lt. C%end_time .and. (.not. jobctrl_time_limit())) call profiler_start_region('Time-Step') tstep = tstep + 1 start_time = MPI_WTIME() + if (dt_controller%dt_last_change .eq. 0) then + cfl_avrg = cfl + end if + call dt_controller%set_dt(C%dt, cfl, cfl_avrg, tstep) + !calculate the cfl after the possibly varied dt cfl = C%fluid%compute_cfl(C%dt) + call neko_log%status(t, C%end_time) write(log_buf, '(A,I6)') 'Time-step: ', tstep call neko_log%message(log_buf) @@ -111,12 +122,10 @@ subroutine neko_solve(C) write(log_buf, '(A,E15.7,1x,A,E15.7)') 'CFL:', cfl, 'dt:', C%dt call neko_log%message(log_buf) - - ! Fluid step call simulation_settime(t, C%dt, C%ext_bdf, C%tlag, C%dtlag, tstep) - call neko_log%section('Fluid') - call C%fluid%step(t, tstep, C%dt, C%ext_bdf) + call neko_log%section('Fluid') + call C%fluid%step(t, tstep, C%dt, C%ext_bdf, dt_controller) end_time = MPI_WTIME() write(log_buf, '(A,E15.7,A,E15.7)') & 'Elapsed time (s):', end_time-start_time_org, ' Step time:', & @@ -126,22 +135,18 @@ subroutine neko_solve(C) ! Scalar step if (allocated(C%scalar)) then start_time = MPI_WTIME() - call neko_log%section('Scalar') - call C%scalar%step(t, tstep, C%dt, C%ext_bdf) + call neko_log%section('Scalar') + call C%scalar%step(t, tstep, C%dt, C%ext_bdf, dt_controller) end_time = MPI_WTIME() write(log_buf, '(A,E15.7,A,E15.7)') & 'Elapsed time (s):', end_time-start_time_org, ' Step time:', & end_time-start_time call neko_log%end_section(log_buf) - end if + end if - call neko_log%section('Postprocessing') + call neko_log%section('Postprocessing') ! Execute all simulation components - if (allocated(neko_simcomps)) then - do i=1, size(neko_simcomps) - call neko_simcomps(i)%simcomp%compute(t, tstep) - end do - end if + call neko_simcomps%compute(t, tstep) call C%q%eval(t, C%dt, tstep) call C%s%sample(t, tstep) @@ -152,22 +157,21 @@ subroutine neko_solve(C) C%material_properties%cp, & C%material_properties%lambda, & C%params) - + call C%usr%user_check(t, tstep,& C%fluid%u, C%fluid%v, C%fluid%w, C%fluid%p, C%fluid%c_Xh, C%params) call neko_log%end_section() - + call neko_log%end() call profiler_end_region end do - call profiler_stop call json_get_or_default(C%params, 'case.output_at_end',& output_at_end, .true.) call C%s%sample(t, tstep, output_at_end) - + if (.not. (output_at_end) .and. t .lt. C%end_time) then call simulation_joblimit_chkp(C, t) end if @@ -175,7 +179,7 @@ subroutine neko_solve(C) call C%usr%user_finalize_modules(t, C%params) call neko_log%end_section('Normal end.') - + end subroutine neko_solve subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step) @@ -186,7 +190,7 @@ subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step) real(kind=rp), dimension(10) :: dtlag integer, intent(in) :: step integer :: i - + do i = 10, 2, -1 tlag(i) = tlag(i-1) @@ -203,7 +207,7 @@ subroutine simulation_settime(t, dt, ext_bdf, tlag, dtlag, step) t = t + dt call ext_bdf%set_coeffs(dtlag) - + end subroutine simulation_settime !> Restart a case @a C from a given checkpoint @@ -213,7 +217,7 @@ subroutine simulation_restart(C, t) real(kind=rp), intent(inout) :: t integer :: i type(file_t) :: chkpf, previous_meshf - character(len=LOG_SIZE) :: log_buf + character(len=LOG_SIZE) :: log_buf character(len=:), allocatable :: restart_file character(len=:), allocatable :: restart_mesh_file real(kind=rp) :: tol @@ -233,20 +237,17 @@ subroutine simulation_restart(C, t) if (found) C%fluid%chkp%mesh2mesh_tol = tol - C%dtlag(:) = C%dt - C%tlag(:) = t - do i = 1, size(C%tlag) - C%tlag(i) = t - i*C%dtlag(i) - end do - chkpf = file_t(trim(restart_file)) call chkpf%read(C%fluid%chkp) + C%dtlag = C%fluid%chkp%dtlag + C%tlag = C%fluid%chkp%tlag + !Free the previous mesh, dont need it anymore call C%fluid%chkp%previous_mesh%free() do i = 1, size(C%dtlag) call C%ext_bdf%set_coeffs(C%dtlag) end do - + call C%fluid%restart(C%dtlag, C%tlag) if (allocated(C%scalar)) call C%scalar%restart( C%dtlag, C%tlag) @@ -259,7 +260,6 @@ subroutine simulation_restart(C, t) call neko_log%message(log_buf) call neko_log%end_section() - call C%s%set_counter(t) end subroutine simulation_restart @@ -275,8 +275,9 @@ subroutine simulation_joblimit_chkp(C, t) call chkpf%write(C%fluid%chkp, t) write(log_buf, '(A)') '! saving checkpoint >>>' call neko_log%message(log_buf) - + end subroutine simulation_joblimit_chkp end module simulation + diff --git a/src/simulation_components/field_writer.f90 b/src/simulation_components/field_writer.f90 new file mode 100644 index 00000000000..5eb34654fb0 --- /dev/null +++ b/src/simulation_components/field_writer.f90 @@ -0,0 +1,160 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +!> Implements the `field_writer_t` type. + +module field_writer + use num_types, only : rp, dp, sp + use json_module, only : json_file + use simulation_component, only : simulation_component_t + use field_registry, only : neko_field_registry + use field, only : field_t + use operators, only : curl + use case, only : case_t + use fld_file_output, only : fld_file_output_t + use json_utils, only : json_get, json_get_or_default + implicit none + private + + !> A simulation component that writes a 3d field to a file. + type, public, extends(simulation_component_t) :: field_writer_t + !> Output writer. + type(fld_file_output_t), private :: output + + contains + !> Constructor from json, wrapping the actual constructor. + procedure, pass(this) :: init => field_writer_init_from_json + !> Actual constructor. + procedure, pass(this) :: init_from_attributes => & + field_writer_init_from_attributes + !> Destructor. + procedure, pass(this) :: free => field_writer_free + !> Here to compy with the interface, does nothing. + procedure, pass(this) :: compute_ => field_writer_compute + end type field_writer_t + +contains + + !> Constructor from json. + !> @param json JSON object with the parameters. + !! @param case The case object. + subroutine field_writer_init_from_json(this, json, case) + class(field_writer_t), intent(inout) :: this + type(json_file), intent(inout) :: json + class(case_t), intent(inout), target :: case + character(len=:), allocatable :: filename + character(len=:), allocatable :: precision + character(len=20), allocatable :: fields(:) + + call this%init_base(json, case) + call json_get(json, "fields", fields) + + if (json%valid_path("output_filename")) then + call json_get(json, "output_filename", filename) + if (json%valid_path("output_precision")) then + call json_get(json, "output_precision", precision) + if (precision == "double") then + call field_writer_init_from_attributes(this, fields, filename, dp) + else + call field_writer_init_from_attributes(this, fields, filename, sp) + end if + else + call field_writer_init_from_attributes(this, fields, filename) + end if + else + call field_writer_init_from_attributes(this, fields) + end if + end subroutine field_writer_init_from_json + + !> Actual constructor. + !! @param fields Array of field names to be sampled. + !! @param filename The name of the file save the fields to. Optional, if not + !! provided, fields are added to the main output file. + !! @param precision The real precision of the output data. Optional, defaults + !! to single precision. + subroutine field_writer_init_from_attributes(this, fields, filename, precision) + class(field_writer_t), intent(inout) :: this + character(len=20), allocatable, intent(in) :: fields(:) + character(len=*), intent(in), optional :: filename + integer, intent(in), optional :: precision + character(len=20) :: fieldi + integer :: i + + ! Regsiter fields if they don't exist. + do i=1, size(fields) + fieldi = trim(fields(i)) + call neko_field_registry%add_field(this%case%fluid%dm_Xh, fieldi,& + ignore_existing=.true.) + end do + + if (present(filename)) then + if (present(precision)) then + call this%output%init(precision, filename, size(fields)) + else + call this%output%init(sp, filename, size(fields)) + end if + do i=1, size(fields) + fieldi = trim(fields(i)) + this%output%fields%fields(i)%f => & + neko_field_registry%get_field(fieldi) + end do + + call this%case%s%add(this%output, & + this%output_controller%control_value, & + this%output_controller%control_mode) + else + do i=1, size(fields) + fieldi = trim(fields(i)) + call this%case%f_out%fluid%append(neko_field_registry%get_field(fieldi)) + end do + end if + + end subroutine field_writer_init_from_attributes + + !> Destructor. + subroutine field_writer_free(this) + class(field_writer_t), intent(inout) :: this + call this%free_base() + end subroutine field_writer_free + + !> Here to comply with the interface, does nothing. + !! @param t The time value. + !! @param tstep The current time-step + subroutine field_writer_compute(this, t, tstep) + class(field_writer_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + end subroutine field_writer_compute + +end module field_writer diff --git a/src/simulation_components/lambda2.f90 b/src/simulation_components/lambda2.f90 index 8050a9a3c53..349da645b2f 100644 --- a/src/simulation_components/lambda2.f90 +++ b/src/simulation_components/lambda2.f90 @@ -39,10 +39,10 @@ module lambda2 use json_module, only : json_file use simulation_component, only : simulation_component_t use field_registry, only : neko_field_registry - use field, only : field_t, field_ptr_t - use field_list, only : field_list_t + use field, only : field_t use operators, only : lambda2op use case, only : case_t + use field_writer, only : field_writer_t use device implicit none private @@ -62,6 +62,9 @@ module lambda2 type(field_t) :: temp1 type(field_t) :: temp2 + !> Output writer. + type(field_writer_t) :: writer + contains !> Constructor from json. procedure, pass(this) :: init => lambda2_init_from_json @@ -80,9 +83,16 @@ module lambda2 subroutine lambda2_init_from_json(this, json, case) class(lambda2_t), intent(inout) :: this type(json_file), intent(inout) :: json - class(case_t), intent(inout), target ::case + class(case_t), intent(inout), target ::case + character(len=20) :: fields(1) + + ! Add fields keyword to the json so that the field_writer picks it up. + ! Will also add fields to the registry. + fields(1) = "lambda2" + call json%add("fields", fields) call this%init_base(json, case) + call this%writer%init(json, case) call lambda2_init_from_attributes(this) end subroutine lambda2_init_from_json @@ -91,17 +101,10 @@ end subroutine lambda2_init_from_json subroutine lambda2_init_from_attributes(this) class(lambda2_t), intent(inout) :: this - this%u => neko_field_registry%get_field_by_name("u") - this%v => neko_field_registry%get_field_by_name("v") - this%w => neko_field_registry%get_field_by_name("w") - - if (.not. neko_field_registry%field_exists("lambda2")) then - call neko_field_registry%add_field(this%u%dof, "lambda2") - end if - this%lambda2 => neko_field_registry%get_field_by_name("lambda2") - !Store lambda2 in the next free field in the fluid output. - !If running without scalar this means the temperature field. - call this%case%f_out%fluid%append(this%lambda2) + this%u => neko_field_registry%get_field("u") + this%v => neko_field_registry%get_field("v") + this%w => neko_field_registry%get_field("w") + this%lambda2 => neko_field_registry%get_field("lambda2") end subroutine lambda2_init_from_attributes !> Destructor. diff --git a/src/simulation_components/les_simcomp.f90 b/src/simulation_components/les_simcomp.f90 new file mode 100644 index 00000000000..92886be5fe7 --- /dev/null +++ b/src/simulation_components/les_simcomp.f90 @@ -0,0 +1,107 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +!> Implements the `les_simcomp_t` type. + +module les_simcomp + use num_types, only : rp + use json_module, only : json_file + use simulation_component, only : simulation_component_t + use field_registry, only : neko_field_registry + use field, only : field_t + use operators, only : curl + use case, only : case_t + use les_model, only : les_model_t + use les_model_fctry, only : les_model_factory + use json_utils, only : json_get + implicit none + private + + !> A simulation component that drives the computation of the SGS + !! viscosity. + type, public, extends(simulation_component_t) :: les_simcomp_t + !> The LES model. + class(les_model_t), allocatable :: les_model + contains + !> Constructor from json, wrapping the actual constructor. + procedure, pass(this) :: init => les_simcomp_init_from_json + !> Destructor. + procedure, pass(this) :: free => les_simcomp_free + !> Compute the les_simcomp field. + procedure, pass(this) :: compute_ => les_simcomp_compute + end type les_simcomp_t + +contains + + !> Constructor from json. + subroutine les_simcomp_init_from_json(this, json, case) + class(les_simcomp_t), intent(inout) :: this + type(json_file), intent(inout) :: json + class(case_t), intent(inout), target :: case + character(len=:), allocatable :: name + + call this%free() + + call json_get(json, "model", name) + + call this%init_base(json, case) + + call les_model_factory(this%les_model, name, case%fluid%dm_Xh,& + case%fluid%c_Xh, json) + + end subroutine les_simcomp_init_from_json + + !> Destructor. + subroutine les_simcomp_free(this) + class(les_simcomp_t), intent(inout) :: this + call this%free_base() + + if (allocated(this%les_model)) then + call this%les_model%free() + deallocate(this%les_model) + end if + end subroutine les_simcomp_free + + !> Compute the les_simcomp field. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine les_simcomp_compute(this, t, tstep) + class(les_simcomp_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + call this%les_model%compute(t, tstep) + + end subroutine les_simcomp_compute + +end module les_simcomp diff --git a/src/simulation_components/probes.F90 b/src/simulation_components/probes.F90 index eb5cbb1c799..ef88980985b 100644 --- a/src/simulation_components/probes.F90 +++ b/src/simulation_components/probes.F90 @@ -38,8 +38,7 @@ module probes use num_types, only: rp use matrix, only: matrix_t use logger, only: neko_log, LOG_SIZE - use utils, only: neko_error, neko_warning - use field, only: field_t + use utils, only: neko_error use field_list, only: field_list_t use simulation_component use field_registry, only : neko_field_registry @@ -47,7 +46,6 @@ module probes use json_module, only : json_file use json_utils, only : json_get use global_interpolation, only: global_interpolation_t - use math, only: rzero, copy use tensor, only: trsp use comm use device @@ -58,7 +56,7 @@ module probes use, intrinsic :: iso_c_binding implicit none private - + type, public, extends(simulation_component_t) :: probes_t !> Number of output fields integer :: n_fields = 0 @@ -86,7 +84,7 @@ module probes real(kind=rp), allocatable :: global_output_values(:,:) !> Output variables type(file_t) :: fout - type(matrix_t) :: mat_out + type(matrix_t) :: mat_out contains !> Initialize from json procedure, pass(this) :: init => probes_init_from_json @@ -99,16 +97,16 @@ module probes procedure, pass(this) :: setup_offset => probes_setup_offset !> Interpolate each probe from its `r,s,t` coordinates. procedure, pass(this) :: compute_ => probes_evaluate_and_write - + end type probes_t - + contains - + !> Constructor from json. subroutine probes_init_from_json(this, json, case) class(probes_t), intent(inout) :: this type(json_file), intent(inout) :: json - class(case_t), intent(inout), target :: case + class(case_t), intent(inout), target :: case real(kind=rp), allocatable :: xyz(:,:) character(len=:), allocatable :: output_file character(len=:), allocatable :: points_file @@ -119,18 +117,18 @@ subroutine probes_init_from_json(this, json, case) !> Read from case file call json%info('fields', n_children=this%n_fields) - call json_get(json, 'fields', this%which_fields) + call json_get(json, 'fields', this%which_fields) !> Should be extended to not only csv !! but also be possible to define in userfile for example call json_get(json, 'points_file', points_file) - call json_get(json, 'output_file', output_file) + call json_get(json, 'output_file', output_file) allocate(this%sampled_fields%fields(this%n_fields)) do i = 1, this%n_fields this%sampled_fields%fields(i)%f => neko_field_registry%get_field(& trim(this%which_fields(i))) end do - !> This is distributed as to make it similar to parallel file + !> This is distributed as to make it similar to parallel file !! formats latera !! Reads all into rank 0 call read_probe_locations(this, this%xyz, this%n_local_probes, & @@ -149,6 +147,7 @@ subroutine probes_init_from_attributes(this, dof, output_file) class(probes_t), intent(inout) :: this type(dofmap_t), intent(in) :: dof character(len=:), allocatable, intent(inout) :: output_file + character(len=1024) :: header_line real(kind=rp), allocatable :: global_output_coords(:,:) integer :: i, ierr type(matrix_t) :: mat_coords @@ -165,7 +164,7 @@ subroutine probes_init_from_attributes(this, dof, output_file) allocate(this%out_values(this%n_local_probes,this%n_fields)) allocate(this%out_values_d(this%n_fields)) allocate(this%out_vals_trsp(this%n_fields,this%n_local_probes)) - + if (NEKO_BCKND_DEVICE .eq. 1) then do i = 1, this%n_fields this%out_values_d(i) = c_null_ptr @@ -180,34 +179,42 @@ subroutine probes_init_from_attributes(this, dof, output_file) this%fout = file_t(trim(output_file)) select type(ft => this%fout%file_type) - type is (csv_file_t) - !> Necessary for not-parallel csv format... - !! offsets and n points per pe - !! Needed at root for sequential csv i/o - allocate(this%n_local_probes_tot(pe_size)) - allocate(this%n_local_probes_tot_offset(pe_size)) - call this%setup_offset() - if (pe_rank .eq. 0) then - allocate(global_output_coords(3,& + type is (csv_file_t) + + ! Build the header + write(header_line, '(I0,A,I0)') this%n_global_probes, ",", this%n_fields + do i = 1, this%n_fields + header_line = trim(header_line) // "," // trim(this%which_fields(i)) + end do + call this%fout%set_header(header_line) + + !> Necessary for not-parallel csv format... + !! offsets and n points per pe + !! Needed at root for sequential csv i/o + allocate(this%n_local_probes_tot(pe_size)) + allocate(this%n_local_probes_tot_offset(pe_size)) + call this%setup_offset() + if (pe_rank .eq. 0) then + allocate(global_output_coords(3,& this%n_global_probes)) - call this%mat_out%init(this%n_global_probes, this%n_fields) - allocate(this%global_output_values(this%n_fields,& + call this%mat_out%init(this%n_global_probes, this%n_fields) + allocate(this%global_output_values(this%n_fields,& this%n_global_probes)) - call mat_coords%init(this%n_global_probes,3) - end if - call MPI_Gatherv(this%xyz, 3*this%n_local_probes,& + call mat_coords%init(this%n_global_probes,3) + end if + call MPI_Gatherv(this%xyz, 3*this%n_local_probes,& MPI_DOUBLE_PRECISION, global_output_coords,& 3*this%n_local_probes_tot,& 3*this%n_local_probes_tot_offset,& - MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) - if (pe_rank .eq. 0) then - call trsp(mat_coords%x, this%n_global_probes,& + MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) + if (pe_rank .eq. 0) then + call trsp(mat_coords%x, this%n_global_probes,& global_output_coords, 3) - !! Write the data to the file - call this%fout%write(mat_coords) - end if + !! Write the data to the file + call this%fout%write(mat_coords) + end if class default - call neko_error("Invalid data. Expected csv_file_t.") + call neko_error("Invalid data. Expected csv_file_t.") end select end subroutine probes_init_from_attributes @@ -223,15 +230,15 @@ subroutine probes_free(this) if (allocated(this%out_values)) then deallocate(this%out_values) end if - + if (allocated(this%out_vals_trsp)) then deallocate(this%out_vals_trsp) end if - + if (allocated(this%sampled_fields%fields)) then deallocate(this%sampled_fields%fields) end if - + if (allocated(this%n_local_probes_tot)) then deallocate(this%n_local_probes_tot) end if @@ -239,7 +246,7 @@ subroutine probes_free(this) if (allocated(this%n_local_probes_tot_offset)) then deallocate(this%n_local_probes_tot_offset) end if - + if (allocated(this%global_output_values)) then deallocate(this%global_output_values) end if @@ -272,7 +279,7 @@ subroutine probes_show(this) end do call neko_log%end_section() call neko_log%newline() - + end subroutine probes_show !> Show the status of processor/element owner and error code for each point @@ -326,7 +333,7 @@ subroutine probes_evaluate_and_write(this, t, tstep) !> Check controller to determine if we must write - + do i = 1,this%n_fields call this%global_interp%evaluate(this%out_values(:,i), & this%sampled_fields%fields(i)%f%x) @@ -349,13 +356,13 @@ subroutine probes_evaluate_and_write(this, t, tstep) MPI_DOUBLE_PRECISION, this%global_output_values,& this%n_fields*this%n_local_probes_tot,& this%n_fields*this%n_local_probes_tot_offset,& - MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) + MPI_DOUBLE_PRECISION, 0, NEKO_COMM, ierr) if (pe_rank .eq. 0) then call trsp(this%mat_out%x, this%n_global_probes, & this%global_output_values, this%n_fields) call this%fout%write(this%mat_out, t) end if - else + else call neko_error('probes sim comp, parallel io need implementation') end if @@ -380,17 +387,17 @@ subroutine read_probe_locations(this, xyz, n_local_probes, n_global_probes, poin file_in = file_t(trim(points_file)) !> Reads on rank 0 and distributes the probes across the different ranks select type(ft => file_in%file_type) - type is (csv_file_t) - call read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, ft) - this%seq_io = .true. - class default - call neko_error("Invalid data. Expected csv_file_t.") + type is (csv_file_t) + call read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, ft) + this%seq_io = .true. + class default + call neko_error("Invalid data. Expected csv_file_t.") end select !> Close the file call file_free(file_in) - end subroutine read_probe_locations + end subroutine read_probe_locations !> Read and initialize the number of probes from a `csv` input file !! @param xyz xyz coordinates of the probes @@ -421,7 +428,7 @@ subroutine read_xyz_from_csv(this, xyz, n_local_probes, n_global_probes, f) call mat_in2%init(3,this%n_global_probes) call f%read(mat_in) call trsp(xyz, 3, mat_in%x, this%n_global_probes) - else + else n_local_probes = 0 this%n_local_probes = 0 allocate(xyz(3,this%n_local_probes)) diff --git a/src/simulation_components/simcomp_executor.f90 b/src/simulation_components/simcomp_executor.f90 new file mode 100644 index 00000000000..ea231c2f311 --- /dev/null +++ b/src/simulation_components/simcomp_executor.f90 @@ -0,0 +1,182 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +!> Contains the `simcomp_executor_t` type. +module simcomp_executor + use num_types, only : rp + use simulation_component, only : simulation_component_wrapper_t + use simulation_component_fctry, only : simulation_component_factory + use json_module, only : json_file, json_core, json_value + use json_utils, only : json_get, json_get_or_default, json_extract_item + use case, only : case_t + implicit none + private + + !> Singleton type that serves as a driver for the simulation components. + !! Stores all the components in the case and provides an interface matching + !! that of a single simcomp, which executes the corresponding routines for + !! each stored simcomp. + !! The execution order is based on the order property of each simcomp. + !! By default, the order is by the order of apparence in the case file. + type, public :: simcomp_executor_t + !> The simcomps. + class(simulation_component_wrapper_t), allocatable :: simcomps(:) + !> Index array defining the order of execution, i.e. simcomps(order(1)) is + !! first to execute, and so on. + integer, allocatable :: order(:) + contains + !> Constructor. + procedure, pass(this) :: init => simcomp_executor_init + !> Destructor. + procedure, pass(this) :: free => simcomp_executor_free + !> Execute compute_ for all simcomps. + procedure, pass(this) :: compute => simcomp_executor_compute + !> Execute restart for all simcomps. + procedure, pass(this) :: restart=> simcomp_executor_restart + + end type simcomp_executor_t + + !> Global variable for the simulation component driver. + type(simcomp_executor_t), public :: neko_simcomps + +contains + + !> Constructor. + subroutine simcomp_executor_init(this, case) + class(simcomp_executor_t), intent(inout) :: this + type(case_t), target, intent(inout) :: case + integer :: n_simcomps, i + type(json_core) :: core + type(json_value), pointer :: simcomp_object + type(json_file) :: comp_subdict + logical :: found + ! Help array for finding minimal values + logical, allocatable :: mask(:) + ! The order value for each simcomp in order of appearance in the case file. + integer, allocatable :: read_order(:) + ! Location of the min value + integer :: loc(1) + + call this%free() + + if (case%params%valid_path('case.simulation_components')) then + + call case%params%info('case.simulation_components', n_children=n_simcomps) + allocate(this%simcomps(n_simcomps)) + allocate(this%order(n_simcomps)) + allocate(read_order(n_simcomps)) + allocate(mask(n_simcomps)) + mask = .true. + + call case%params%get_core(core) + call case%params%get('case.simulation_components', simcomp_object, found) + + ! We need a separate loop to figure out the order, so that we can + ! apply the order to the initialization as well. + do i=1, n_simcomps + ! Create a new json containing just the subdict for this simcomp + call json_extract_item(core, simcomp_object, i, comp_subdict) + call json_get_or_default(comp_subdict, "order", read_order(i), i) + end do + + ! Figure out the execution order using a poor man's argsort. + ! Searches for the location of the min value, each time masking out the + ! found location prior to the next search. + do i= 1, n_simcomps + loc = minloc(read_order, mask=mask) + this%order(i) = loc(1) + mask(loc) = .false. + end do + + ! Init in the determined order. + do i=1, n_simcomps + call json_extract_item(core, simcomp_object, this%order(i),& + comp_subdict) + ! Have to add, the simcomp constructor expects it. + if (.not. comp_subdict%valid_path("order")) then + call comp_subdict%add("order", this%order(i)) + end if + call simulation_component_factory(this%simcomps(i)%simcomp, & + comp_subdict, case) + end do + end if + end subroutine simcomp_executor_init + + !> Destructor. + subroutine simcomp_executor_free(this) + class(simcomp_executor_t), intent(inout) :: this + integer :: i + + if (allocated(this%order)) deallocate(this%order) + + if (allocated(this%simcomps)) then + do i=1, size(this%simcomps) + call this%simcomps(i)%simcomp%free + end do + deallocate(this%simcomps) + end if + end subroutine simcomp_executor_free + + !> Execute compute_ for all simcomps. + !! @param t The time value. + !! @param tstep The timestep number. + subroutine simcomp_executor_compute(this, t, tstep) + class(simcomp_executor_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + integer :: i + + if (allocated(this%simcomps)) then + do i=1, size(this%simcomps) + call this%simcomps(this%order(i))%simcomp%compute(t, tstep) + end do + end if + + end subroutine simcomp_executor_compute + + !> Execute restart for all simcomps. + !! @param t The time value. + subroutine simcomp_executor_restart(this, t) + class(simcomp_executor_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer :: i + + if (allocated(this%simcomps)) then + do i=1, size(this%simcomps) + call this%simcomps(this%order(i))%simcomp%restart(t) + end do + end if + + end subroutine simcomp_executor_restart + +end module simcomp_executor diff --git a/src/simulation_components/simulation_component.f90 b/src/simulation_components/simulation_component.f90 index fe2b0468834..102f14a5b9b 100644 --- a/src/simulation_components/simulation_component.f90 +++ b/src/simulation_components/simulation_component.f90 @@ -40,10 +40,10 @@ module simulation_component use json_module, only : json_file use case, only : case_t use time_based_controller, only : time_based_controller_t - use json_utils, only : json_get_or_default + use json_utils, only : json_get_or_default, json_get implicit none private - + !> Base abstract class for simulation components. type, abstract, public :: simulation_component_t !> Pointer to the simulation case. @@ -52,6 +52,8 @@ module simulation_component type(time_based_controller_t) :: compute_controller !> Controller for when to do output. type(time_based_controller_t) :: output_controller + !> The execution order, lowest excutes first. + integer :: order contains !> Constructor for the simulation_component_t (base) class. procedure, pass(this) :: init_base => simulation_component_init_base @@ -70,58 +72,59 @@ module simulation_component !> The main function to be executed during the run. procedure(simulation_component_compute), pass(this), deferred :: compute_ end type simulation_component_t - + !> A helper type that is needed to have an array of polymorphic objects type, public :: simulation_component_wrapper_t - class(simulation_component_t), allocatable :: simcomp + class(simulation_component_t), allocatable :: simcomp end type simulation_component_wrapper_t - + abstract interface !> The common constructor using a JSON dictionary. !! @param json The JSON with properties. - !! @param case The case_t object. - subroutine simulation_component_init(this, json, case) + !! @param case The case_t object. + subroutine simulation_component_init(this, json, case) import simulation_component_t, json_file, case_t class(simulation_component_t), intent(inout) :: this type(json_file), intent(inout) :: json class(case_t), intent(inout), target :: case - end subroutine + end subroutine simulation_component_init end interface abstract interface !> Destructor. - subroutine simulation_component_free(this) + subroutine simulation_component_free(this) import simulation_component_t class(simulation_component_t), intent(inout) :: this - end subroutine + end subroutine simulation_component_free end interface abstract interface !> The main function to be executed during the run. !! @param t The time value. !! @param tstep The current time-step - subroutine simulation_component_compute(this, t, tstep) + subroutine simulation_component_compute(this, t, tstep) import simulation_component_t, rp class(simulation_component_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - end subroutine + end subroutine simulation_component_compute end interface contains !> Constructor for the `simulation_component_t` (base) class. - subroutine simulation_component_init_base(this, json, case) + subroutine simulation_component_init_base(this, json, case) class(simulation_component_t), intent(inout) :: this type(json_file), intent(inout) :: json class(case_t), intent(inout), target :: case character(len=:), allocatable :: compute_control, output_control real(kind=rp) :: compute_value, output_value + integer :: order this%case => case call json_get_or_default(json, "compute_control", compute_control, & "tsteps") - call json_get_or_default(json, "compute_value", compute_value, 1.0_rp) + call json_get_or_default(json, "compute_value", compute_value, 1.0_rp) ! We default to output whenever we execute call json_get_or_default(json, "output_control", output_control, & @@ -129,6 +132,17 @@ subroutine simulation_component_init_base(this, json, case) call json_get_or_default(json, "output_value", output_value, & compute_value) + + if (output_control == "global") then + call json_get(this%case%params, 'case.fluid.output_control', & + output_control) + call json_get(this%case%params, 'case.fluid.output_value', & + output_value) + end if + + call json_get(json, "order", order) + this%order = order + call this%compute_controller%init(case%end_time, compute_control, & compute_value) call this%output_controller%init(case%end_time, output_control, & @@ -137,7 +151,7 @@ subroutine simulation_component_init_base(this, json, case) end subroutine simulation_component_init_base !> Destructor for the `simulation_component_t` (base) class. - subroutine simulation_component_free_base(this) + subroutine simulation_component_free_base(this) class(simulation_component_t), intent(inout) :: this nullify(this%case) @@ -147,21 +161,21 @@ end subroutine simulation_component_free_base !! Serves as the public interface. !! @param t The time value. !! @param tstep The current time-step - subroutine simulation_component_compute_wrapper(this, t, tstep) + subroutine simulation_component_compute_wrapper(this, t, tstep) class(simulation_component_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep if (this%compute_controller%check(t, tstep)) then - call this%compute_(t, tstep) - call this%compute_controller%register_execution() + call this%compute_(t, tstep) + call this%compute_controller%register_execution() end if end subroutine simulation_component_compute_wrapper !> Wrapper for calling `set_counter_` based for the controllers. !! Serves as the public interface. !! @param t The time value. - subroutine simulation_component_restart_wrapper(this, t) + subroutine simulation_component_restart_wrapper(this, t) class(simulation_component_t), intent(inout) :: this real(kind=rp), intent(in) :: t @@ -169,5 +183,5 @@ subroutine simulation_component_restart_wrapper(this, t) call this%output_controller%set_counter(t) end subroutine simulation_component_restart_wrapper - + end module simulation_component diff --git a/src/simulation_components/simulation_component_factory.f90 b/src/simulation_components/simulation_component_fctry.f90 similarity index 67% rename from src/simulation_components/simulation_component_factory.f90 rename to src/simulation_components/simulation_component_fctry.f90 index 2c5028280fc..04f67251fb2 100644 --- a/src/simulation_components/simulation_component_factory.f90 +++ b/src/simulation_components/simulation_component_fctry.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2023, The Neko Authors +! Copyright (c) 2024, The Neko Authors ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -37,38 +37,47 @@ module simulation_component_fctry use vorticity, only : vorticity_t use lambda2, only : lambda2_t use probes, only : probes_t + use les_simcomp, only : les_simcomp_t use json_module, only : json_file use case, only : case_t use json_utils, only : json_get + use logger, only : neko_log + use field_writer, only : field_writer_t implicit none private - + public :: simulation_component_factory - - contains + +contains !> Simulation component factory. Both constructs and initializes the object. !! @param json JSON object initializing the simulation component. subroutine simulation_component_factory(simcomp, json, case) - class(simulation_component_t), allocatable, intent(inout) :: simcomp - type(json_file), intent(inout) :: json - class(case_t), intent(inout), target :: case - character(len=:), allocatable :: simcomp_type - - call json_get(json, "type", simcomp_type) + class(simulation_component_t), allocatable, intent(inout) :: simcomp + type(json_file), intent(inout) :: json + class(case_t), intent(inout), target :: case + character(len=:), allocatable :: simcomp_type + + call json_get(json, "type", simcomp_type) + + if (trim(simcomp_type) .eq. "vorticity") then + allocate(vorticity_t::simcomp) + else if (trim(simcomp_type) .eq. "lambda2") then + allocate(lambda2_t::simcomp) + else if (trim(simcomp_type) .eq. "probes") then + allocate(probes_t::simcomp) + else if (trim(simcomp_type) .eq. "les_model") then + allocate(les_simcomp_t::simcomp) + else if (trim(simcomp_type) .eq. "field_writer") then + allocate(field_writer_t::simcomp) + else + call neko_log%error("Unknown simulation component type: " & + // trim(simcomp_type)) + stop + end if - if (trim(simcomp_type) .eq. "vorticity") then - allocate(vorticity_t::simcomp) - end if - if (trim(simcomp_type) .eq. "lambda2") then - allocate(lambda2_t::simcomp) - end if - if (trim(simcomp_type) .eq. "probes") then - allocate(probes_t::simcomp) - end if - - ! Initialize - call simcomp%init(json, case) + ! Initialize + call simcomp%init(json, case) end subroutine simulation_component_factory diff --git a/src/simulation_components/simulation_component_global.f90 b/src/simulation_components/simulation_component_global.f90 deleted file mode 100644 index c51e7370c75..00000000000 --- a/src/simulation_components/simulation_component_global.f90 +++ /dev/null @@ -1,79 +0,0 @@ -! Copyright (c) 2023, The Neko Authors -! All rights reserved. -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions -! are met: -! -! * Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! -! * Redistributions in binary form must reproduce the above -! copyright notice, this list of conditions and the following -! disclaimer in the documentation and/or other materials provided -! with the distribution. -! -! * Neither the name of the authors nor the names of its -! contributors may be used to endorse or promote products derived -! from this software without specific prior written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -! POSSIBILITY OF SUCH DAMAGE. -! -! -!> Contains the global simulation component array and its init for the given -!! case. -module simulation_component_global - use simulation_component, only : simulation_component_wrapper_t - use simulation_component_fctry, only : simulation_component_factory - use json_module, only : json_file, json_core, json_value - use case, only : case_t - implicit none - private - - class(simulation_component_wrapper_t), public, allocatable :: neko_simcomps(:) - public :: simcomps_global_init - -contains - - !> Populates the global `neko_simcomps` array with objects based on the json - !! configuration and the simulation case. - !! @param case Simulation case. - subroutine simcomps_global_init(case) - type(case_t), intent(inout) :: case - integer :: n_simcomps, i - type(json_core) :: core - type(json_value), pointer :: simcomp_object, comp_pointer - type(json_file) :: comp_subdict - character(len=:), allocatable :: buffer - logical :: found - - if (case%params%valid_path('case.simulation_components')) then - - call case%params%info('case.simulation_components', n_children=n_simcomps) - allocate(neko_simcomps(n_simcomps)) - - call case%params%get_core(core) - call case%params%get('case.simulation_components', simcomp_object, found) - do i=1, n_simcomps - ! Create a new json containing just the subdict for this simcomp - call core%get_child(simcomp_object, i, comp_pointer, found) - call core%print_to_string(comp_pointer, buffer) - call comp_subdict%load_from_string(buffer) - call simulation_component_factory(neko_simcomps(i)%simcomp, & - comp_subdict, case) - end do - end if - end subroutine - -end module simulation_component_global diff --git a/src/simulation_components/vorticity.f90 b/src/simulation_components/vorticity.f90 index 17fef434173..7428e181f94 100644 --- a/src/simulation_components/vorticity.f90 +++ b/src/simulation_components/vorticity.f90 @@ -31,19 +31,22 @@ ! POSSIBILITY OF SUCH DAMAGE. ! ! -!> Implements the `vorticity_t` type. +!> Implements the `vorticity_t` type. module vorticity - use num_types, only : rp + use num_types, only : rp, dp, sp use json_module, only : json_file use simulation_component, only : simulation_component_t use field_registry, only : neko_field_registry use field, only : field_t use operators, only : curl use case, only : case_t + use fld_file_output, only : fld_file_output_t + use json_utils, only : json_get, json_get_or_default + use field_writer, only : field_writer_t implicit none private - + !> A simulation component that computes the vorticity field. !! Added to the field registry as `omega_x`, `omega_y``, and `omega_z`. type, public, extends(simulation_component_t) :: vorticity_t @@ -66,74 +69,81 @@ module vorticity !> Work array. type(field_t) :: temp2 - contains - !> Constructor from json, wrapping the actual constructor. - procedure, pass(this) :: init => vorticity_init_from_json - !> Actual constructor. - procedure, pass(this) :: init_from_attributes => & + !> Output writer. + type(field_writer_t) :: writer + + contains + !> Constructor from json, wrapping the actual constructor. + procedure, pass(this) :: init => vorticity_init_from_json + !> Actual constructor. + procedure, pass(this) :: init_from_attributes => & vorticity_init_from_attributes - !> Destructor. - procedure, pass(this) :: free => vorticity_free - !> Compute the vorticity field. - procedure, pass(this) :: compute_ => vorticity_compute + !> Destructor. + procedure, pass(this) :: free => vorticity_free + !> Compute the vorticity field. + procedure, pass(this) :: compute_ => vorticity_compute end type vorticity_t - - contains - + +contains + !> Constructor from json. subroutine vorticity_init_from_json(this, json, case) - class(vorticity_t), intent(inout) :: this - type(json_file), intent(inout) :: json - class(case_t), intent(inout), target :: case - - call this%init_base(json, case) + class(vorticity_t), intent(inout) :: this + type(json_file), intent(inout) :: json + class(case_t), intent(inout), target :: case + character(len=:), allocatable :: filename + character(len=:), allocatable :: precision + character(len=20) :: fields(3) + + ! Add fields keyword to the json so that the field_writer picks it up. + ! Will also add fields to the registry. + fields(1) = "omega_x" + fields(2) = "omega_y" + fields(3) = "omega_z" + + call json%add("fields", fields) - call vorticity_init_from_attributes(this) + call this%init_base(json, case) + call this%writer%init(json, case) + + call vorticity_init_from_attributes(this) end subroutine vorticity_init_from_json !> Actual constructor. subroutine vorticity_init_from_attributes(this) - class(vorticity_t), intent(inout) :: this - - this%u => neko_field_registry%get_field_by_name("u") - this%v => neko_field_registry%get_field_by_name("v") - this%w => neko_field_registry%get_field_by_name("w") - - if (.not. neko_field_registry%field_exists("omega_x")) then - call neko_field_registry%add_field(this%u%dof, "omega_x") - end if - if (.not. neko_field_registry%field_exists("omega_y")) then - call neko_field_registry%add_field(this%u%dof, "omega_y") - end if - if (.not. neko_field_registry%field_exists("omega_z")) then - call neko_field_registry%add_field(this%u%dof, "omega_z") - end if - this%omega_x => neko_field_registry%get_field_by_name("omega_x") - this%omega_y => neko_field_registry%get_field_by_name("omega_y") - this%omega_z => neko_field_registry%get_field_by_name("omega_z") - - call this%temp1%init(this%u%dof) - call this%temp2%init(this%u%dof) + class(vorticity_t), intent(inout) :: this + + this%u => neko_field_registry%get_field_by_name("u") + this%v => neko_field_registry%get_field_by_name("v") + this%w => neko_field_registry%get_field_by_name("w") + + this%omega_x => neko_field_registry%get_field_by_name("omega_x") + this%omega_y => neko_field_registry%get_field_by_name("omega_y") + this%omega_z => neko_field_registry%get_field_by_name("omega_z") + + call this%temp1%init(this%u%dof) + call this%temp2%init(this%u%dof) + end subroutine vorticity_init_from_attributes !> Destructor. subroutine vorticity_free(this) - class(vorticity_t), intent(inout) :: this - call this%free_base() - call this%temp1%free() - call this%temp2%free() + class(vorticity_t), intent(inout) :: this + call this%free_base() + call this%temp1%free() + call this%temp2%free() end subroutine vorticity_free !> Compute the vorticity field. !! @param t The time value. !! @param tstep The current time-step subroutine vorticity_compute(this, t, tstep) - class(vorticity_t), intent(inout) :: this - real(kind=rp), intent(in) :: t - integer, intent(in) :: tstep + class(vorticity_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep - call curl(this%omega_x, this%omega_y, this%omega_z, this%u, this%v, & + call curl(this%omega_x, this%omega_y, this%omega_z, this%u, this%v, & this%w, this%temp1, this%temp2, this%case%fluid%c_Xh) end subroutine vorticity_compute - + end module vorticity diff --git a/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 new file mode 100644 index 00000000000..9d1174cdb3d --- /dev/null +++ b/src/source_terms/bcknd/cpu/boussinesq_source_term_cpu.f90 @@ -0,0 +1,69 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the cpu kernel for the `boussinesq_source_term_t` type. +module boussinesq_source_term_cpu + use num_types, only : rp + use field_list, only : field_list_t + use field, only : field_t + use math, only : add2s2, cadd + implicit none + private + + public :: boussinesq_source_term_compute_cpu + +contains + + !> Computes the Boussinesq source term on the cpu. + !! @param fields The right-hand side. + !! @param s The scalar field + !! @param ref_value The reference value of the scalar field. + !! @param g The gravity vector. + !! @param beta The thermal expansion coefficient. + subroutine boussinesq_source_term_compute_cpu(fields, s, ref_value, g, beta) + type(field_list_t), intent(inout) :: fields + type(field_t), intent(inout) :: s + real(kind=rp), intent(in) :: ref_value + real(kind=rp), intent(in) :: g(3) + real(kind=rp), intent(in) :: beta + integer :: n_fields, i, n + + n_fields = size(fields%fields) + n = fields%fields(1)%f%dof%size() + + do i=1, n_fields + call add2s2(fields%fields(i)%f%x, s%x, g(i)*beta, n) + call cadd(fields%fields(i)%f%x, -g(i)*beta*ref_value, n) + end do + end subroutine boussinesq_source_term_compute_cpu + +end module boussinesq_source_term_cpu diff --git a/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 new file mode 100644 index 00000000000..8bbf961c6d7 --- /dev/null +++ b/src/source_terms/bcknd/cpu/brinkman_source_term_cpu.f90 @@ -0,0 +1,68 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the cpu kernel for the `brinkman_source_term_t` type. +module brinkman_source_term_cpu + use num_types, only: rp + use field, only: field_t + use field_list, only: field_list_t + use math, only: subcol3 + use field_registry, only: neko_field_registry + implicit none + private + + public :: brinkman_source_term_compute_cpu + +contains + + !> Computes the Brinkman source term on the cpu. + !! @param fields The right-hand side. + !! @param values The values of the source components. + subroutine brinkman_source_term_compute_cpu(fields, brinkman) + type(field_list_t), intent(inout) :: fields + type(field_t), intent(in) :: brinkman + type(field_t), pointer :: u, v, w + integer :: n + + n = fields%fields(1)%f%dof%size() + + u => neko_field_registry%get_field('u') + v => neko_field_registry%get_field('v') + w => neko_field_registry%get_field('w') + + call subcol3(fields%fields(1)%f%x, u%x, brinkman%x, n) + call subcol3(fields%fields(2)%f%x, v%x, brinkman%x, n) + call subcol3(fields%fields(3)%f%x, w%x, brinkman%x, n) + + end subroutine brinkman_source_term_compute_cpu + +end module brinkman_source_term_cpu diff --git a/src/source_terms/bcknd/cpu/const_source_term_cpu.f90 b/src/source_terms/bcknd/cpu/const_source_term_cpu.f90 index acd024d8d19..4510346d3c8 100644 --- a/src/source_terms/bcknd/cpu/const_source_term_cpu.f90 +++ b/src/source_terms/bcknd/cpu/const_source_term_cpu.f90 @@ -42,10 +42,10 @@ module const_source_term_cpu contains - !> Computs the constant source term on the cpu. + !> Computes the constant source term on the cpu. !! @param fields The right-hand side. !! @param values The values of the source components. - subroutine const_source_term_compute_cpu(fields, values) + subroutine const_source_term_compute_cpu(fields, values) type(field_list_t), intent(inout) :: fields real(kind=rp), intent(in) :: values(:) integer :: n_fields, i, n @@ -54,8 +54,8 @@ subroutine const_source_term_compute_cpu(fields, values) n = fields%fields(1)%f%dof%size() do i=1, n_fields - call cadd(fields%fields(i)%f%x, values(i), n) + call cadd(fields%fields(i)%f%x, values(i), n) end do end subroutine const_source_term_compute_cpu - + end module const_source_term_cpu diff --git a/src/source_terms/bcknd/cpu/filters_cpu.f90 b/src/source_terms/bcknd/cpu/filters_cpu.f90 new file mode 100644 index 00000000000..3b2dc2a4bab --- /dev/null +++ b/src/source_terms/bcknd/cpu/filters_cpu.f90 @@ -0,0 +1,84 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> CPU implementations of the filter functions. +module filters_cpu + use num_types, only: rp + implicit none + +contains + + ! ========================================================================== ! + ! Internal functions and subroutines + ! ========================================================================== ! + + !> @brief Apply a smooth step function to a scalar. + elemental function smooth_step_cpu(x, edge0, edge1) result(res) + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: edge0, edge1 + real(kind=rp) :: res, t + + t = clamp_cpu((x - edge0) / (edge1 - edge0), 0.0_rp, 1.0_rp) + + res = t**3 * (t * (6.0_rp * t - 15.0_rp) + 10.0_rp) + + end function smooth_step_cpu + + !> @brief Clamp a value between two limits. + elemental function clamp_cpu(x, lowerlimit, upperlimit) result(res) + real(kind=rp), intent(in) :: x + real(kind=rp), intent(in) :: lowerlimit, upperlimit + real(kind=rp) :: res + + res = max(lowerlimit, min(upperlimit, x)) + end function clamp_cpu + + !> @brief Apply a step function to a scalar. + elemental function step_function_cpu(x, x_step, value0, value1) result(res) + real(kind=rp), intent(in) :: x, x_step, value0, value1 + real(kind=rp) :: res + + res = merge(value0, value1, x > x_step) + + end function step_function_cpu + + !> @brief Apply a permeability function to a scalar. + elemental function permeability_cpu(x, k_0, k_1, q) result(perm) + real(kind=rp), intent(in) :: x, k_0, k_1, q + real(kind=rp) :: perm + + perm = k_0 + (k_1 - k_0) * x * (q + 1.0_rp) / (q + x) + + end function permeability_cpu + + +end module filters_cpu diff --git a/src/source_terms/bcknd/device/boussinesq_source_term_device.f90 b/src/source_terms/bcknd/device/boussinesq_source_term_device.f90 new file mode 100644 index 00000000000..482d566dac4 --- /dev/null +++ b/src/source_terms/bcknd/device/boussinesq_source_term_device.f90 @@ -0,0 +1,70 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the device kernel for the `boussinesq_source_term_t` type. +module boussinesq_source_term_device + use num_types, only : rp + use field_list, only : field_list_t + use field, only : field_t + use device_math, only : device_add2s2, device_cadd + implicit none + private + + public :: boussinesq_source_term_compute_device + +contains + + !> Computes the Boussinesq source term on the device. + !! @param fields The right-hand side. + !! @param s The scalar field + !! @param ref_value The reference value of the scalar field. + !! @param g The gravity vector. + !! @param beta The thermal expansion coefficient. + subroutine boussinesq_source_term_compute_device(fields, s, ref_value, g,& + beta) + type(field_list_t), intent(inout) :: fields + type(field_t), intent(inout) :: s + real(kind=rp), intent(in) :: ref_value + real(kind=rp), intent(in) :: g(3) + real(kind=rp), intent(in) :: beta + integer :: n_fields, i, n + + n_fields = size(fields%fields) + n = fields%fields(1)%f%dof%size() + + do i=1, n_fields + call device_add2s2(fields%fields(i)%f%x_d, s%x_d, g(i)*beta, n) + call device_cadd(fields%fields(i)%f%x_d, -g(i)*beta*ref_value, n) + end do + end subroutine boussinesq_source_term_compute_device + +end module boussinesq_source_term_device diff --git a/src/source_terms/bcknd/device/brinkman_source_term_device.f90 b/src/source_terms/bcknd/device/brinkman_source_term_device.f90 new file mode 100644 index 00000000000..454decceba9 --- /dev/null +++ b/src/source_terms/bcknd/device/brinkman_source_term_device.f90 @@ -0,0 +1,68 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the device kernel for the `brinkman_source_term_t` type. +module brinkman_source_term_device + use num_types, only : rp + use field, only : field_t + use field_list, only : field_list_t + use device_math, only : device_subcol3 + use field_registry, only : neko_field_registry + implicit none + private + + public :: brinkman_source_term_compute_device + +contains + + !> Computes the Brinkman source term on the device. + !! @param fields The right-hand side. + !! @param values The values of the source components. + subroutine brinkman_source_term_compute_device(fields, brinkman) + type(field_list_t), intent(inout) :: fields + type(field_t), intent(in) :: brinkman + integer :: n + type(field_t), pointer :: u, v, w + + n = fields%fields(1)%f%dof%size() + + u => neko_field_registry%get_field('u') + v => neko_field_registry%get_field('v') + w => neko_field_registry%get_field('w') + + call device_subcol3(fields%fields(1)%f%x_d, u%x_d, brinkman%x_d, n) + call device_subcol3(fields%fields(2)%f%x_d, v%x_d, brinkman%x_d, n) + call device_subcol3(fields%fields(3)%f%x_d, w%x_d, brinkman%x_d, n) + + end subroutine brinkman_source_term_compute_device + +end module brinkman_source_term_device diff --git a/src/source_terms/bcknd/device/const_source_term_device.f90 b/src/source_terms/bcknd/device/const_source_term_device.f90 index e01da4b2e7a..16bcfc217fe 100644 --- a/src/source_terms/bcknd/device/const_source_term_device.f90 +++ b/src/source_terms/bcknd/device/const_source_term_device.f90 @@ -42,10 +42,10 @@ module const_source_term_device contains - !> Computs the constant source term on the device. + !> Computes the constant source term on the device. !! @param fields The right-hand side. !! @param values The values of the source components. - subroutine const_source_term_compute_device(fields, values) + subroutine const_source_term_compute_device(fields, values) type(field_list_t), intent(inout) :: fields real(kind=rp), intent(in) :: values(:) integer :: n_fields, i, n @@ -57,5 +57,5 @@ subroutine const_source_term_compute_device(fields, values) call device_cadd(fields%fields(i)%f%x_d, values(i), n) end do end subroutine const_source_term_compute_device - + end module const_source_term_device diff --git a/src/source_terms/boussinesq_source_term.f90 b/src/source_terms/boussinesq_source_term.f90 new file mode 100644 index 00000000000..8e242b33e53 --- /dev/null +++ b/src/source_terms/boussinesq_source_term.f90 @@ -0,0 +1,180 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the `boussinesq_source_term_t` type. +module boussinesq_source_term + use num_types, only : rp + use field_list, only : field_list_t + use field, only : field_t + use json_module, only : json_file + use json_utils, only: json_get, json_get_or_default + use source_term, only : source_term_t + use coefs, only : coef_t + use neko_config, only : NEKO_BCKND_DEVICE + use utils, only : neko_error + use boussinesq_source_term_cpu, only : boussinesq_source_term_compute_cpu + use boussinesq_source_term_device, only : boussinesq_source_term_compute_device + use field_registry, only : neko_field_registry + implicit none + private + + !> Bouyancy source term accroding to the Boussinesq approximation. + !! @details Called "boussinesq" in the JSON. + !! Controlled by the following parameters: + !! - "scalar_field": The name of the scalar that drives the source term, + !! defaults to "s". + !! - "ref_value": The reference value of the scalar. + !! - "g": The gravity vector. + !! - "beta": The the thermal expansion coefficeint, defaults to `1/ref_value`. + type, public, extends(source_term_t) :: boussinesq_source_term_t + !> The scalar field used to drive the source term, typically temperature. + type(field_t), pointer :: s => null() + !> The reference value of the scalar field. + real(kind=rp) :: ref_value = 0 + !> Gravity vector + real(kind=rp) :: g(3) + !> Thermal expantion coefficient + real(kind=rp) :: beta + contains + !> The common constructor using a JSON object. + procedure, pass(this) :: init => boussinesq_source_term_init_from_json + !> The constructor from type components. + procedure, pass(this) :: init_from_compenents => & + boussinesq_source_term_init_from_components + !> Destructor. + procedure, pass(this) :: free => boussinesq_source_term_free + !> Computes the source term and adds the result to `fields`. + procedure, pass(this) :: compute_ => boussinesq_source_term_compute + end type boussinesq_source_term_t + +contains + !> The common constructor using a JSON object. + !! @param json The JSON object for the source. + !! @param fields A list of fields for adding the source values. + !! @param coef The SEM coeffs. + subroutine boussinesq_source_term_init_from_json(this, json, fields, coef) + class(boussinesq_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + type(field_list_t), intent(inout), target :: fields + type(coef_t), intent(inout) :: coef + real(kind=rp), allocatable :: values(:) + real(kind=rp) :: start_time, end_time, ref_value + character(len=:), allocatable :: scalar_name + real(kind=rp), allocatable :: g(:) + real(kind=rp) :: beta + + if (.not. size(fields%fields) == 3) then + call neko_error("Boussinesq term expects 3 fields to work on.") + end if + + call json_get_or_default(json, "scalar_field", start_time, 0.0_rp) + call json_get_or_default(json, "end_time", end_time, huge(0.0_rp)) + + call json_get_or_default(json, "scalar_field", scalar_name, "s") + call json_get(json, "g", g) + + if (.not. size(g) == 3) then + call neko_error("The gravity vector should have 3 components") + end if + + call json_get(json, "reference_value", ref_value) + call json_get_or_default(json, "beta", beta, 1.0_rp/ref_value) + + call boussinesq_source_term_init_from_components(this, fields, scalar_name,& + ref_value, g, beta, coef, start_time, end_time) + + end subroutine boussinesq_source_term_init_from_json + + !> The constructor from type components. + !! @param fields A list of fields for adding the source values. + !! @param scalar_name The name of the scalar field driving the source. + !! @param ref_value The reference value of the scalar field. + !! @param g The gravity vector. + !! @param beta The thermal expansion coefficient. + !! @param coef The SEM coeffs. + !! @param start_time When to start adding the source term. + !! @param end_time When to stop adding the source term. + subroutine boussinesq_source_term_init_from_components(this, fields, & + scalar_name, ref_value, g, beta, coef, start_time, end_time) + class(boussinesq_source_term_t), intent(inout) :: this + class(field_list_t), intent(inout), target :: fields + character(len=*), intent(in) :: scalar_name + real(kind=rp), intent(in) :: ref_value + real(kind=rp), intent(in) :: g(3) + real(kind=rp), intent(in) :: beta + type(coef_t) :: coef + real(kind=rp), intent(in) :: start_time + real(kind=rp), intent(in) :: end_time + + call this%free() + call this%init_base(fields, coef, start_time, end_time) + + if (.not. neko_field_registry%field_exists(scalar_name)) then + call neko_field_registry%add_field(this%fields%fields(1)%f%dof, "s") + end if + this%s => neko_field_registry%get_field("s") + + this%ref_value = ref_value + this%g = g + this%beta = beta + end subroutine boussinesq_source_term_init_from_components + + !> Destructor. + subroutine boussinesq_source_term_free(this) + class(boussinesq_source_term_t), intent(inout) :: this + + call this%free_base() + nullify(this%s) + end subroutine boussinesq_source_term_free + + !> Computes the source term and adds the result to `fields`. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine boussinesq_source_term_compute(this, t, tstep) + class(boussinesq_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + integer :: n_fields, i, n + + n_fields = size(this%fields%fields) + n = this%fields%fields(1)%f%dof%size() + + if (NEKO_BCKND_DEVICE .eq. 1) then + call boussinesq_source_term_compute_device(this%fields, this%s,& + this%ref_value, this%g, this%beta) + else + call boussinesq_source_term_compute_cpu(this%fields, this%s,& + this%ref_value, this%g, this%beta) + end if + end subroutine boussinesq_source_term_compute + +end module boussinesq_source_term diff --git a/src/source_terms/brinkman/filters.f90 b/src/source_terms/brinkman/filters.f90 new file mode 100644 index 00000000000..879f89313ef --- /dev/null +++ b/src/source_terms/brinkman/filters.f90 @@ -0,0 +1,102 @@ +! Copyright (c) 2024, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> A module containing filter functions and subroutines. These functions +!! are used to modify fields in a way that is useful for various +!! simulations. +module filters + use field, only: field_t + use num_types, only: rp + implicit none + + private + public :: smooth_step_field, permeability_field, step_function_field + +contains + + !> @brief Apply a smooth step function to a field. + !! @details The smooth step function is defined as: + !! \f[ + !! t = (x - edge0) / (edge1 - edge0) + !! f(t) = \begin{cases} + !! t^3 (t (6x - 15) + 10), & t \in [0, 1] \\ + !! 0, & t \leq 0 \\ + !! 1, & t \geq 1 \\ + !! \end{cases} + !! \f] + !! @note The step can be inverted by swapping edge0 and edge1. + !! + !! @param[in,out] F Field to be modified. + !! @param[in] edge0 Edge giving output 0. + !! @param[in] edge1 Edge giving output 1. + subroutine smooth_step_field(F, edge0, edge1) + use filters_cpu, only: smooth_step_cpu + + type(field_t), intent(inout) :: F + real(kind=rp), intent(in) :: edge0, edge1 + + F%x = smooth_step_cpu(F%x, edge0, edge1) + end subroutine smooth_step_field + + !> @brief Apply a permeability function to a field. + !! @details The permeability function is defined as: + !! \f[ k(x) = k_0 + (k_1 - k_0) x \frac{q + 1}{q + x}} \f] + !! @param[in,out] F Field to be modified. + !! @param[in] k_0 Permeability at x=0. + !! @param[in] k_1 Permeability at x=1. + !! @param[in] q Penalty factor. + subroutine permeability_field(F_out, x, k_0, k_1, q) + use filters_cpu, only: permeability_cpu + + type(field_t), intent(inout) :: F_out + type(field_t), intent(in) :: x + real(kind=rp), intent(in) :: k_0, k_1 + real(kind=rp), intent(in) :: q + + F_out%x = permeability_cpu(x%x, k_0, k_1, q) + end subroutine permeability_field + + !> @brief Apply a step function to a field. + !! @param[in,out] F Field to be modified. + !! @param[in] x0 Position of the step. + !! @param[in] value0 Value of the field before the step. + !! @param[in] value1 Value of the field after the step. + subroutine step_function_field(F, x0, value0, value1) + use filters_cpu, only: step_function_cpu + + type(field_t), intent(inout) :: F + real(kind=rp), intent(in) :: x0, value0, value1 + + F%x = step_function_cpu(F%x, x0, value0, value1) + end subroutine step_function_field + +end module filters diff --git a/src/source_terms/brinkman_source_term.f90 b/src/source_terms/brinkman_source_term.f90 new file mode 100644 index 00000000000..6f24167f443 --- /dev/null +++ b/src/source_terms/brinkman_source_term.f90 @@ -0,0 +1,408 @@ +! Copyright (c) 2023, The Neko Authors +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! +! * Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above +! copyright notice, this list of conditions and the following +! disclaimer in the documentation and/or other materials provided +! with the distribution. +! +! * Neither the name of the authors nor the names of its +! contributors may be used to endorse or promote products derived +! from this software without specific prior written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +! COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +! INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +! BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +! LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +! LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +! ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +!> Implements the `brinkman_source_term_t` type. +module brinkman_source_term + use num_types, only: rp, dp + use field, only: field_t + use field_list, only: field_list_t + use json_module, only: json_file + use json_utils, only: json_get, json_get_or_default, json_extract_item + use field_registry, only: neko_field_registry + use source_term, only: source_term_t + use coefs, only: coef_t + use neko_config, only: NEKO_BCKND_DEVICE + use utils, only: neko_error + use brinkman_source_term_cpu, only: brinkman_source_term_compute_cpu + use brinkman_source_term_device, only: brinkman_source_term_compute_device + implicit none + private + + !> A Brinkman source term. + !! The region and strength are controlled by assigning regions types and + !! brinkman limits to the source term. + type, public, extends(source_term_t) :: brinkman_source_term_t + private + + !> The value of the source term. + type(field_t), pointer :: indicator => null() + !> Brinkman permeability field. + type(field_t), pointer :: brinkman => null() + contains + !> The common constructor using a JSON object. + procedure, public, pass(this) :: init => brinkman_source_term_init_from_json + !> Destructor. + procedure, public, pass(this) :: free => brinkman_source_term_free + !> Computes the source term and adds the result to `fields`. + procedure, public, pass(this) :: compute_ => brinkman_source_term_compute + + ! ----------------------------------------------------------------------- ! + ! Private methods + procedure, pass(this) :: init_boundary_mesh + procedure, pass(this) :: init_point_zone + end type brinkman_source_term_t + +contains + + ! ========================================================================== ! + ! Public methods + + !> The common constructor using a JSON object. + !! @param json The JSON object for the source. + !! @param fields A list of fields for adding the source values. + !! @param coef The SEM coeffs. + subroutine brinkman_source_term_init_from_json(this, json, fields, coef) + use file, only: file_t + use tri_mesh, only: tri_mesh_t + use device, only: device_memcpy, HOST_TO_DEVICE + use filters, only: smooth_step_field, step_function_field, permeability_field + use signed_distance, only: signed_distance_field + use profiler, only: profiler_start_region, profiler_end_region + use json_module, only: json_core, json_value + implicit none + + class(brinkman_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + type(field_list_t), intent(inout), target :: fields + type(coef_t), intent(inout) :: coef + real(kind=rp) :: start_time, end_time + + character(len=:), allocatable :: filter_type + real(kind=rp), dimension(:), allocatable :: brinkman_limits + real(kind=rp) :: brinkman_penalty + + type(json_value), pointer :: json_object_list + type(json_core) :: core + + character(len=:), allocatable :: object_type + type(json_file) :: object_settings + integer :: n_regions + integer :: i + + ! Mandatory fields for the general source term + call json_get_or_default(json, "start_time", start_time, 0.0_rp) + call json_get_or_default(json, "end_time", end_time, huge(0.0_rp)) + + ! Read the options for the permeability field + call json_get(json, 'brinkman.limits', brinkman_limits) + call json_get(json, 'brinkman.penalty', brinkman_penalty) + + if (size(brinkman_limits) .ne. 2) then + call neko_error('brinkman_limits must be a 2 element array of reals') + end if + + call this%free() + call this%init_base(fields, coef, start_time, end_time) + + ! ------------------------------------------------------------------------ ! + ! Allocate the permeability and indicator field + + if (neko_field_registry%field_exists('brinkman_indicator') & + .or. neko_field_registry%field_exists('brinkman')) then + call neko_error('Brinkman field already exists.') + end if + + call neko_field_registry%add_field(coef%dof, 'brinkman_indicator') + this%indicator => neko_field_registry%get_field_by_name('brinkman_indicator') + + call neko_field_registry%add_field(coef%dof, 'brinkman') + this%brinkman => neko_field_registry%get_field_by_name('brinkman') + + ! ------------------------------------------------------------------------ ! + ! Select which constructor should be called + + call json%get('objects', json_object_list) + call json%info('objects', n_children=n_regions) + call json%get_core(core) + + do i=1, n_regions + call json_extract_item(core, json_object_list, i, object_settings) + call json_get_or_default(object_settings, 'type', object_type, 'none') + + select case (object_type) + case ('boundary_mesh') + call this%init_boundary_mesh(object_settings) + case ('point_zone') + call this%init_point_zone(object_settings) + + case ('none') + call object_settings%print() + call neko_error('Brinkman source term objects require a region type') + case default + call neko_error('Brinkman source term unknown region type') + end select + + end do + + ! Run filter on the full indicator field to smooth it out. + call json_get_or_default(json, 'filter.type', filter_type, 'none') + + select case (filter_type) + case ('none') + ! Do nothing + case default + call neko_error('Brinkman source term unknown filter type') + end select + + ! ------------------------------------------------------------------------ ! + ! Compute the permeability field + + call permeability_field(this%brinkman, this%indicator, & + & brinkman_limits(1), brinkman_limits(2), brinkman_penalty) + + ! Copy the permeability field to the device + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_memcpy(this%brinkman%x, this%brinkman%x_d, & + this%brinkman%dof%size(), HOST_TO_DEVICE, .true.) + end if + + end subroutine brinkman_source_term_init_from_json + + !> Destructor. + subroutine brinkman_source_term_free(this) + class(brinkman_source_term_t), intent(inout) :: this + + this%brinkman => null() + call this%free_base() + end subroutine brinkman_source_term_free + + !> Computes the source term and adds the result to `fields`. + !! @param t The time value. + !! @param tstep The current time-step. + subroutine brinkman_source_term_compute(this, t, tstep) + use device, only: device_memcpy, HOST_TO_DEVICE + implicit none + + class(brinkman_source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + if (NEKO_BCKND_DEVICE .eq. 1) then + call brinkman_source_term_compute_device(this%fields, this%brinkman) + else + call brinkman_source_term_compute_cpu(this%fields, this%brinkman) + end if + end subroutine brinkman_source_term_compute + + ! ========================================================================== ! + ! Private methods + + !> Initializes the source term from a boundary mesh. + subroutine init_boundary_mesh(this, json) + use file, only: file_t + use tri_mesh, only: tri_mesh_t + use device, only: device_memcpy, HOST_TO_DEVICE + use filters, only: smooth_step_field, step_function_field, permeability_field + use signed_distance, only: signed_distance_field + use profiler, only: profiler_start_region, profiler_end_region + use aabb + implicit none + + class(brinkman_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + + ! Options + character(len=:), allocatable :: mesh_file_name + character(len=:), allocatable :: distance_transform + character(len=:), allocatable :: filter_type + character(len=:), allocatable :: mesh_transform + + ! Read the options for the boundary mesh + type(file_t) :: mesh_file + type(tri_mesh_t) :: boundary_mesh + real(kind=rp) :: scalar_r + real(kind=dp) :: scalar_d + + ! Mesh transform options variables + real(kind=dp), dimension(:), allocatable :: box_min, box_max + logical :: keep_aspect_ratio + real(kind=dp), dimension(3) :: scaling + real(kind=dp), dimension(3) :: translation + type(field_t) :: temp_field + type(aabb_t) :: mesh_box, target_box + integer :: idx_p + + ! ------------------------------------------------------------------------ ! + ! Read the options for the boundary mesh + + call json_get(json, 'name', mesh_file_name) + + ! Settings on how to filter the design field + call json_get(json, 'distance_transform.type', distance_transform) + + ! ------------------------------------------------------------------------ ! + ! Load the immersed boundary mesh + + mesh_file = file_t(mesh_file_name) + call mesh_file%read(boundary_mesh) + + if (boundary_mesh%nelv .eq. 0) then + call neko_error('No elements in the boundary mesh') + end if + + ! ------------------------------------------------------------------------ ! + ! Transform the mesh if specified. + + call json_get_or_default(json, 'mesh_transform.type', mesh_transform, 'none') + + select case (mesh_transform) + case ('none') + ! Do nothing + case ('bounding_box') + call json_get(json, 'mesh_transform.box_min', box_min) + call json_get(json, 'mesh_transform.box_max', box_max) + call json_get_or_default(json, 'mesh_transform.keep_aspect_ratio', & + keep_aspect_ratio, .true.) + + if (size(box_min) .ne. 3 .or. size(box_max) .ne. 3) then + call neko_error('Case file: mesh_transform. & + &box_min and box_max must be 3 element arrays of reals') + end if + + call target_box%init(box_min, box_max) + + mesh_box = get_aabb(boundary_mesh) + + scaling = target_box%get_diagonal() / mesh_box%get_diagonal() + if (keep_aspect_ratio) then + scaling = minval(scaling) + end if + + translation = - scaling * mesh_box%get_min() + target_box%get_min() + + do idx_p = 1, boundary_mesh%mpts + boundary_mesh%points(idx_p)%x = & + scaling * boundary_mesh%points(idx_p)%x + translation + end do + + case default + call neko_error('Unknown mesh transform') + end select + + ! ------------------------------------------------------------------------ ! + ! Compute the permeability field + + ! Assign the signed distance field to all GLL points in the permeability + ! field. Initally we just run a brute force loop over all GLL points and + ! compute the signed distance function. This should be replaced with a + ! more efficient method, such as a tree search. + + call temp_field%init(this%indicator%dof) + + ! Select how to transform the distance field to a design field + select case (distance_transform) + case ('smooth_step') + call json_get(json, 'distance_transform.value', scalar_d) + scalar_r = real(scalar_d, kind=rp) + + call signed_distance_field(temp_field, boundary_mesh, scalar_d) + call smooth_step_field(temp_field, scalar_r, 0.0_rp) + + case ('step') + + call json_get(json, 'distance_transform.value', scalar_d) + + call signed_distance_field(temp_field, boundary_mesh, scalar_d) + call step_function_field(temp_field, scalar_r, 1.0_rp, 0.0_rp) + + case default + call neko_error('Unknown distance transform') + end select + + ! ------------------------------------------------------------------------ ! + ! Run filter on the temporary indicator field to smooth it out. + call json_get_or_default(json, 'filter.type', filter_type, 'none') + + select case (filter_type) + case ('none') + ! Do nothing + case default + call neko_error('Unknown filter type') + end select + + ! Update the global indicator field by max operator + this%indicator%x = max(this%indicator%x, temp_field%x) + + end subroutine init_boundary_mesh + + !> Initializes the source term from a point zone. + subroutine init_point_zone(this, json) + use filters, only: smooth_step_field, step_function_field, permeability_field + use signed_distance, only: signed_distance_field + use profiler, only: profiler_start_region, profiler_end_region + use point_zone, only: point_zone_t + use device, only: device_memcpy, HOST_TO_DEVICE + use point_zone_registry, only: neko_point_zone_registry + implicit none + + class(brinkman_source_term_t), intent(inout) :: this + type(json_file), intent(inout) :: json + + ! Options + character(len=:), allocatable :: zone_name + character(len=:), allocatable :: filter_type + + type(field_t) :: temp_field + class(point_zone_t), pointer :: my_point_zone + integer :: i + + ! ------------------------------------------------------------------------ ! + ! Read the options for the point zone + + call json_get(json,'name', zone_name) + call json_get_or_default(json, 'filter.type', filter_type, 'none') + + ! Compute the indicator field + + call temp_field%init(this%indicator%dof) + + my_point_zone => neko_point_zone_registry%get_point_zone(zone_name) + + do i = 1, my_point_zone%size + temp_field%x(my_point_zone%mask(i), 1, 1, 1) = 1.0_rp + end do + + ! Run filter on the temporary indicator field to smooth it out. + + select case (filter_type) + case ('none') + ! Do nothing + case default + call neko_error('Unknown filter type') + end select + + ! Update the global indicator field by max operator + this%indicator%x = max(this%indicator%x, temp_field%x) + + end subroutine init_point_zone + +end module brinkman_source_term diff --git a/src/source_terms/const_source_term.f90 b/src/source_terms/const_source_term.f90 index 9dd2b96b85e..a09f96fb9c5 100644 --- a/src/source_terms/const_source_term.f90 +++ b/src/source_terms/const_source_term.f90 @@ -35,7 +35,7 @@ module const_source_term use num_types, only : rp use field_list, only : field_list_t use json_module, only : json_file - use json_utils, only: json_get + use json_utils, only: json_get, json_get_or_default use source_term, only : source_term_t use coefs, only : coef_t use neko_config, only : NEKO_BCKND_DEVICE @@ -55,12 +55,12 @@ module const_source_term !> The common constructor using a JSON object. procedure, pass(this) :: init => const_source_term_init_from_json !> The constructor from type components. - procedure, pass(this) :: init_from_compenents => & + procedure, pass(this) :: init_from_compenents => & const_source_term_init_from_components !> Destructor. procedure, pass(this) :: free => const_source_term_free !> Computes the source term and adds the result to `fields`. - procedure, pass(this) :: compute => const_source_term_compute + procedure, pass(this) :: compute_ => const_source_term_compute end type const_source_term_t contains @@ -68,15 +68,21 @@ module const_source_term !! @param json The JSON object for the source. !! @param fields A list of fields for adding the source values. !! @param coef The SEM coeffs. - subroutine const_source_term_init_from_json(this, json, fields, coef) + subroutine const_source_term_init_from_json(this, json, fields, coef) class(const_source_term_t), intent(inout) :: this type(json_file), intent(inout) :: json type(field_list_t), intent(inout), target :: fields type(coef_t), intent(inout) :: coef real(kind=rp), allocatable :: values(:) + real(kind=rp) :: start_time, end_time call json_get(json, "values", values) - call const_source_term_init_from_components(this, fields, values, coef) + call json_get_or_default(json, "start_time", start_time, 0.0_rp) + call json_get_or_default(json, "end_time", end_time, huge(0.0_rp)) + + + call const_source_term_init_from_components(this, fields, values, coef, & + start_time, end_time) end subroutine const_source_term_init_from_json @@ -84,15 +90,19 @@ end subroutine const_source_term_init_from_json !! @param fields A list of fields for adding the source values. !! @param values The array of values, one for each field. !! @param coef The SEM coeffs. + !! @param start_time When to start adding the source term. + !! @param end_time When to stop adding the source term. subroutine const_source_term_init_from_components(this, fields, values, & - coef) + coef, start_time, end_time) class(const_source_term_t), intent(inout) :: this class(field_list_t), intent(inout), target :: fields real(kind=rp), intent(in) :: values(:) type(coef_t) :: coef + real(kind=rp), intent(in) :: start_time + real(kind=rp), intent(in) :: end_time call this%free() - call this%init_base(fields, coef) + call this%init_base(fields, coef, start_time, end_time) if (size(values) .ne. size(fields%fields)) then call neko_error("Number of fields and values inconsistent.") @@ -102,7 +112,7 @@ subroutine const_source_term_init_from_components(this, fields, values, & end subroutine const_source_term_init_from_components !> Destructor. - subroutine const_source_term_free(this) + subroutine const_source_term_free(this) class(const_source_term_t), intent(inout) :: this call this%free_base() @@ -111,7 +121,7 @@ end subroutine const_source_term_free !> Computes the source term and adds the result to `fields`. !! @param t The time value. !! @param tstep The current time-step. - subroutine const_source_term_compute(this, t, tstep) + subroutine const_source_term_compute(this, t, tstep) class(const_source_term_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep @@ -126,5 +136,5 @@ subroutine const_source_term_compute(this, t, tstep) call const_source_term_compute_cpu(this%fields, this%values) end if end subroutine const_source_term_compute - + end module const_source_term diff --git a/src/source_terms/source_term.f90 b/src/source_terms/source_term.f90 index 65a1c9eb74d..11f5e7e9f09 100644 --- a/src/source_terms/source_term.f90 +++ b/src/source_terms/source_term.f90 @@ -39,24 +39,30 @@ module source_term use json_module, only : json_file implicit none private - + !> Base abstract type for source terms. type, abstract, public:: source_term_t !> The fields to be updated with the source term values type(field_list_t) :: fields !> Coefficients for the SEM. type(coef_t), pointer :: coef => null() + !> Start time for adding the source term. + real(kind=rp) :: start_time = 0.0_rp + !> End time for adding the source term. + real(kind=rp) :: end_time = huge(0.0_rp) contains !> Constructor for the source_term_t (base) type. procedure, pass(this) :: init_base => source_term_init_base !> Destructor for the source_term_t (base) type. procedure, pass(this) :: free_base => source_term_free_base + !> Executes `compute_` based on time conditions. + procedure, pass(this) :: compute => source_term_compute_wrapper !> The common constructor using a JSON object. procedure(source_term_init), pass(this), deferred :: init !> Destructor. procedure(source_term_free), pass(this), deferred :: free !> Computes the source term and adds the result to `fields`. - procedure(source_term_compute), pass(this), deferred :: compute + procedure(source_term_compute), pass(this), deferred :: compute_ end type source_term_t @@ -66,56 +72,62 @@ module source_term class(source_term_t), allocatable :: source_term contains !> Destructor. - procedure, pass(this) :: free => source_term_wrapper_free + procedure, pass(this) :: free => source_term_wrapper_free end type source_term_wrapper_t abstract interface - !> The common constructor using a JSON object. - !! @param json The JSON object for the source. - !! @param fields A list of fields for adding the source values. - !! @param coef The SEM coeffs. - subroutine source_term_init(this, json, fields, coef) + !> The common constructor using a JSON object. + !! @param json The JSON object for the source. + !! @param fields A list of fields for adding the source values. + !! @param coef The SEM coeffs. + subroutine source_term_init(this, json, fields, coef) import source_term_t, json_file, field_list_t, coef_t class(source_term_t), intent(inout) :: this type(json_file), intent(inout) :: json type(field_list_t), intent(inout), target :: fields type(coef_t), intent(inout) :: coef - end subroutine + end subroutine source_term_init end interface abstract interface !> Destructor. - subroutine source_term_free(this) + subroutine source_term_free(this) import source_term_t class(source_term_t), intent(inout) :: this - end subroutine + end subroutine source_term_free end interface abstract interface !> Computes the source term and adds the result to `fields`. !! @param t The time value. !! @param tstep The current time-step. - subroutine source_term_compute(this, t, tstep) + subroutine source_term_compute(this, t, tstep) import source_term_t, rp class(source_term_t), intent(inout) :: this real(kind=rp), intent(in) :: t integer, intent(in) :: tstep - end subroutine + end subroutine source_term_compute end interface contains !> Constructor for the `source_term_t` (base) type. - !> @param fields A list of pointers to fields to be updated by the source + !> @param fields A list of pointers to fields to be updated by the source !! term. !> @param coef SEM coefs. - subroutine source_term_init_base(this, fields, coef) + !! @param start_time When to start adding the source term. + !! @param end_time When to stop adding the source term. + subroutine source_term_init_base(this, fields, coef, start_time, end_time) class(source_term_t), intent(inout) :: this type(field_list_t) :: fields type(coef_t), intent(inout), target :: coef + real(kind=rp), intent(in) :: start_time + real(kind=rp), intent(in) :: end_time integer :: n_fields, i this%coef => coef + this%start_time = start_time + this%end_time = end_time n_fields = size(fields%fields) allocate(this%fields%fields(n_fields)) @@ -127,7 +139,7 @@ subroutine source_term_init_base(this, fields, coef) end subroutine source_term_init_base !> Destructor for the `source_term_t` (base) type. - subroutine source_term_free_base(this) + subroutine source_term_free_base(this) class(source_term_t), intent(inout) :: this call this%fields%free() @@ -135,7 +147,7 @@ subroutine source_term_free_base(this) end subroutine source_term_free_base !> Destructor for the `source_term_wrapper_t` type. - subroutine source_term_wrapper_free(this) + subroutine source_term_wrapper_free(this) class(source_term_wrapper_t), intent(inout) :: this integer :: n_fields, i @@ -144,5 +156,18 @@ subroutine source_term_wrapper_free(this) deallocate(this%source_term) end if end subroutine source_term_wrapper_free - + + !> Executes `compute_` based on time conditions. + !> @param t Time value. + !> @param tstep Current time step. + subroutine source_term_compute_wrapper(this, t, tstep) + class(source_term_t), intent(inout) :: this + real(kind=rp), intent(in) :: t + integer, intent(in) :: tstep + + if (t .ge. this%start_time .and. t .le. this%end_time) then + call this%compute_(t, tstep) + end if + + end subroutine source_term_compute_wrapper end module source_term diff --git a/src/source_terms/source_term_factory.f90 b/src/source_terms/source_term_fctry.f90 similarity index 71% rename from src/source_terms/source_term_factory.f90 rename to src/source_terms/source_term_fctry.f90 index cbdb2951625..63766ad4281 100644 --- a/src/source_terms/source_term_factory.f90 +++ b/src/source_terms/source_term_fctry.f90 @@ -35,6 +35,8 @@ module source_term_fctry use source_term, only : source_term_t use const_source_term, only : const_source_term_t + use boussinesq_source_term, only : boussinesq_source_term_t + use brinkman_source_term, only: brinkman_source_term_t use json_module, only : json_file use json_utils, only : json_get use field_list, only : field_list_t @@ -42,30 +44,34 @@ module source_term_fctry use coefs, only : coef_t implicit none private - + public :: source_term_factory - - contains + +contains !> Source term factory. Both constructs and initializes the object. !! @param json JSON object initializing the source term. subroutine source_term_factory(source_term, json, fields, coef) - class(source_term_t), allocatable, intent(inout) :: source_term - type(json_file), intent(inout) :: json - type(field_list_t), intent(inout) :: fields - type(coef_t), intent(inout) :: coef - character(len=:), allocatable :: source_type - - call json_get(json, "type", source_type) + class(source_term_t), allocatable, intent(inout) :: source_term + type(json_file), intent(inout) :: json + type(field_list_t), intent(inout) :: fields + type(coef_t), intent(inout) :: coef + character(len=:), allocatable :: source_type + + call json_get(json, "type", source_type) + + if (trim(source_type) .eq. "constant") then + allocate(const_source_term_t::source_term) + else if (trim(source_type) .eq. "boussinesq") then + allocate(boussinesq_source_term_t::source_term) + else if (trim(source_type) .eq. "brinkman") then + allocate(brinkman_source_term_t::source_term) + else + call neko_error('Unknown source term '//trim(source_type)) + end if - if (trim(source_type) .eq. "constant") then - allocate(const_source_term_t::source_term) - else - call neko_error('Unknown source term '//trim(source_type)) - end if - - ! Initialize - call source_term%init(json, fields, coef) + ! Initialize + call source_term%init(json, fields, coef) end subroutine source_term_factory diff --git a/src/time_schemes/ab_time_scheme.f90 b/src/time_schemes/ab_time_scheme.f90 index 661f2ca4b8b..9df980e17e3 100644 --- a/src/time_schemes/ab_time_scheme.f90 +++ b/src/time_schemes/ab_time_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Adam-Bashforth scheme for time integration. @@ -66,20 +66,20 @@ module ab_time_scheme use utils, only : neko_error implicit none private - + !> Explicit Adam-Bashforth scheme for time integration. !! @details !! For a contant time-step this corresponds to the following schemes for !! order 1 to 3: !! - Order 1: \f$ u^{n+1} = u^n \f$ !! - Order 2: \f$ u^{n+1} = 2u^n - u^{n-1} \f$, linear extrapolation - type, public, extends(time_scheme_t) :: ab_time_scheme_t - contains - !> Compute the scheme coefficients - procedure, nopass :: compute_coeffs => ab_time_scheme_compute_coeffs + type, public, extends(time_scheme_t) :: ab_time_scheme_t + contains + !> Compute the scheme coefficients + procedure, nopass :: compute_coeffs => ab_time_scheme_compute_coeffs end type ab_time_scheme_t - - contains + +contains !> Compute the scheme coefficients !! @param t Timestep values, first element is the current timestep. @@ -92,7 +92,7 @@ subroutine ab_time_scheme_compute_coeffs(coeffs, dt, order) real(kind=rp) dta, dtb, dtc, dtd, dte, dts call rzero(coeffs, 4) - + select case (order) case (1) coeffs(1) = 1.0_rp @@ -100,19 +100,19 @@ subroutine ab_time_scheme_compute_coeffs(coeffs, dt, order) coeffs(2) = -0.5_rp * dt(1) / dt(2) coeffs(1) = 1.0_rp - coeffs(2) case (3) - dts = dt(2) + dt(3) - dta = dt(1) / dt(2) - dtb = dt(2) / dt(3) - dtc = dt(1) / dt(3) - dtd = dts / dt(2) - dte = dt(1) / dts - coeffs(3) = dte*( 0.5_rp*dtb + dtc/3.0_rp ) - coeffs(2) = -0.5_rp * dta - coeffs(3) * dtd - coeffs(1) = 1.0_rp - coeffs(2) - coeffs(3) - case default - call neko_error("The order of the AB time scheme must be 1 to 3.") + dts = dt(2) + dt(3) + dta = dt(1) / dt(2) + dtb = dt(2) / dt(3) + dtc = dt(1) / dt(3) + dtd = dts / dt(2) + dte = dt(1) / dts + coeffs(3) = dte*( 0.5_rp*dtb + dtc/3.0_rp ) + coeffs(2) = -0.5_rp * dta - coeffs(3) * dtd + coeffs(1) = 1.0_rp - coeffs(2) - coeffs(3) + case default + call neko_error("The order of the AB time scheme must be 1 to 3.") end select - + end subroutine ab_time_scheme_compute_coeffs -end module ab_time_scheme \ No newline at end of file +end module ab_time_scheme diff --git a/src/time_schemes/bdf_time_scheme.f90 b/src/time_schemes/bdf_time_scheme.f90 index 2ed0a6161f9..3b1c7bfaef0 100644 --- a/src/time_schemes/bdf_time_scheme.f90 +++ b/src/time_schemes/bdf_time_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Backward-differencing scheme for time integration. @@ -66,7 +66,7 @@ module bdf_time_scheme use utils, only : neko_error implicit none private - + !> Implicit backward-differencing scheme for time integration. !! @details !! The explicit forumlas for the coefficients are taken from the following @@ -76,14 +76,14 @@ module bdf_time_scheme !! !! For a contant time-step this corresponds to the following schemes for !! order 1 to 3: - !! - Order 1: \f$ \frac{1}{\Delta t} u^{n+1} - \frac{1}{\Delta t} u^{n} \f$ - !! - Order 2: \f$ \frac{3}{2\Delta t} u^{n+1} - \frac{4}{2\Delta t} u^{n} + - !! \frac{1}{2\Delta t} u^{n-1}\f$ - !! - Order 3: \f$ \frac{11}{6\Delta t} u^{n+1} - \frac{18}{6\Delta t} u^{n} + - !! \frac{9}{6\Delta t} u^{n-1} - \frac{2}{6\Delta t} u^{n-2}\f$ + !! - Order 1: \f$ \frac{1}{\Delta t} u^{n+1} - \frac{1}{\Delta t} u^{n} \f$ + !! - Order 2: \f$ \frac{3}{2\Delta t} u^{n+1} - \frac{4}{2\Delta t} u^{n} + + !! \frac{1}{2\Delta t} u^{n-1}\f$ + !! - Order 3: \f$ \frac{11}{6\Delta t} u^{n+1} - \frac{18}{6\Delta t} u^{n} + + !! \frac{9}{6\Delta t} u^{n-1} - \frac{2}{6\Delta t} u^{n-2}\f$ !! !! It is assumed that all the coefficients but the first one premultiply terms - !! that go to the right-hand side of the equation. + !! that go to the right-hand side of the equation. !! Accordingly, the signs of these coefficients are reversed in the `coeffs` !! array. This is taken into account, for example, in the implemeation of the !! `rhs_maker` class. @@ -95,18 +95,18 @@ module bdf_time_scheme !! the array \f$[1, 1]\f$, and **not** \f$[1/\Delta t, -1/\Delta t]\f$, as one !! might expect. Similar for the second and third order. !! - !! @remark + !! @remark !! The current implementation can be easily extended to schemes of arbitrary !! order, by using the `fd_weights_full` subroutine to compute the !! coefficients. A demonstration of this is implemented in a test in !! `tests/ext_bdf_scheme/test_bdf.pf` - type, public, extends(time_scheme_t) :: bdf_time_scheme_t - contains - !> Compute the scheme coefficients - procedure, nopass :: compute_coeffs => bdf_time_scheme_compute_coeffs + type, public, extends(time_scheme_t) :: bdf_time_scheme_t + contains + !> Compute the scheme coefficients + procedure, nopass :: compute_coeffs => bdf_time_scheme_compute_coeffs end type bdf_time_scheme_t - - contains + +contains !> Compute the scheme coefficients !! @param t Timestep values, first element is the current timestep. @@ -118,7 +118,7 @@ subroutine bdf_time_scheme_compute_coeffs(coeffs, dt, order) integer, intent(in) :: order call rzero(coeffs, 4) - + ! Note, these are true coeffs, multiplied by dt(1) select case (order) case (1) @@ -137,10 +137,10 @@ subroutine bdf_time_scheme_compute_coeffs(coeffs, dt, order) (dt(3) * (dt(2) + dt(3)) * (dt(1) + dt(2) + dt(3))) coeffs(1) = coeffs(2) + coeffs(3) + coeffs(4) coeffs = coeffs * dt(1) - case default - call neko_error("The order of the BDF time scheme must be 1 to 3.") + case default + call neko_error("The order of the BDF time scheme must be 1 to 3.") end select - + end subroutine bdf_time_scheme_compute_coeffs -end module bdf_time_scheme \ No newline at end of file +end module bdf_time_scheme diff --git a/src/time_schemes/ext_time_scheme.f90 b/src/time_schemes/ext_time_scheme.f90 index 1f550967d47..f7a8fbdd023 100644 --- a/src/time_schemes/ext_time_scheme.f90 +++ b/src/time_schemes/ext_time_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,40 +21,40 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Explicit extrapolation scheme for time integration. @@ -66,7 +66,7 @@ module ext_time_scheme use utils, only : neko_error implicit none private - + !> Explicit extrapolation scheme for time integration. !! @details !! Compute the value at the current time-step by evaluating a polynomial @@ -81,16 +81,16 @@ module ext_time_scheme !! gives improved stability when combined with BDF2 in a advection-diffusion !! equation. !! - Order 3: \f$ u^{n+1} = 8/3u^n - 7/3u^{n-1} + 2/3u^{n-2} \f$ - type, public, extends(time_scheme_t) :: ext_time_scheme_t - contains - !> Compute the scheme coefficients - procedure, nopass :: compute_coeffs => ext_time_scheme_compute_coeffs - !> Compute the coefficients for the modified EXT scheme - procedure, nopass :: compute_modified_coeffs => & + type, public, extends(time_scheme_t) :: ext_time_scheme_t + contains + !> Compute the scheme coefficients + procedure, nopass :: compute_coeffs => ext_time_scheme_compute_coeffs + !> Compute the coefficients for the modified EXT scheme + procedure, nopass :: compute_modified_coeffs => & ext_time_scheme_compute_modified_coeffs end type ext_time_scheme_t - contains +contains !> Compute the scheme coefficients !! @param t Timestep values, first element is the current timestep. @@ -102,7 +102,7 @@ subroutine ext_time_scheme_compute_coeffs(coeffs, dt, order) integer, intent(in) :: order call rzero(coeffs, 4) - + select case (order) case (1) coeffs(1) = 1.0_rp @@ -113,8 +113,8 @@ subroutine ext_time_scheme_compute_coeffs(coeffs, dt, order) coeffs(3) = dt(1) / (dt(2) + dt(3)) * (dt(1) + dt(2)) / dt(3) coeffs(2) = - dt(1) / dt(2) * (1.0_rp + dt(2) / dt(3) + dt(1) / dt(3)) coeffs(1) = 1.0_rp - coeffs(2) - coeffs(3) - case default - call neko_error("The order of the EXT time scheme must be 1 to 3.") + case default + call neko_error("The order of the EXT time scheme must be 1 to 3.") end select end subroutine ext_time_scheme_compute_coeffs @@ -125,18 +125,18 @@ subroutine ext_time_scheme_compute_modified_coeffs(coeffs, dt) real(kind=rp), intent(out) :: coeffs(4) real(kind=rp), intent(in) :: dt(10) real(kind=rp) dta, dtb, dtc, dtd, dte, dts - + call rzero(coeffs, 4) - - dts = dt(2) + dt(3) - dta = dt(1) / dt(2) - dtb = dt(2) / dt(3) - dtc = dt(1) / dt(3) - dtd = dts / dt(2) - dte = dt(1) / dts - coeffs(3) = 2.0_rp / 3.0_rp * dtc * (1.0_rp / dtd + dte) - coeffs(2) = -dta - coeffs(3) * dtd - coeffs(1) = 1.0_rp - coeffs(2) - coeffs(3) + + dts = dt(2) + dt(3) + dta = dt(1) / dt(2) + dtb = dt(2) / dt(3) + dtc = dt(1) / dt(3) + dtd = dts / dt(2) + dte = dt(1) / dts + coeffs(3) = 2.0_rp / 3.0_rp * dtc * (1.0_rp / dtd + dte) + coeffs(2) = -dta - coeffs(3) * dtd + coeffs(1) = 1.0_rp - coeffs(2) - coeffs(3) end subroutine ext_time_scheme_compute_modified_coeffs -end module ext_time_scheme \ No newline at end of file +end module ext_time_scheme diff --git a/src/time_schemes/time_scheme.f90 b/src/time_schemes/time_scheme.f90 index 9272caffff5..751ca18c471 100644 --- a/src/time_schemes/time_scheme.f90 +++ b/src/time_schemes/time_scheme.f90 @@ -1,4 +1,4 @@ -! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. +! Copyright (c) 2008-2020, UCHICAGO ARGONNE, LLC. ! ! The UChicago Argonne, LLC as Operator of Argonne National ! Laboratory holds copyright in the Software. The copyright holder @@ -21,58 +21,57 @@ ! may be used to endorse or promote products derived from this software ! without specific prior written permission. ! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF -! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +! FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +! UCHICAGO ARGONNE, LLC, THE U.S. DEPARTMENT OF +! ENERGY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +! TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ! ! Additional BSD Notice ! --------------------- ! 1. This notice is required to be provided under our contract with ! the U.S. Department of Energy (DOE). This work was produced at -! Argonne National Laboratory under Contract +! Argonne National Laboratory under Contract ! No. DE-AC02-06CH11357 with the DOE. ! -! 2. Neither the United States Government nor UCHICAGO ARGONNE, -! LLC nor any of their employees, makes any warranty, +! 2. Neither the United States Government nor UCHICAGO ARGONNE, +! LLC nor any of their employees, makes any warranty, ! express or implied, or assumes any liability or responsibility for the ! accuracy, completeness, or usefulness of any information, apparatus, ! product, or process disclosed, or represents that its use would not ! infringe privately-owned rights. ! -! 3. Also, reference herein to any specific commercial products, process, -! or services by trade name, trademark, manufacturer or otherwise does -! not necessarily constitute or imply its endorsement, recommendation, -! or favoring by the United States Government or UCHICAGO ARGONNE LLC. -! The views and opinions of authors expressed -! herein do not necessarily state or reflect those of the United States -! Government or UCHICAGO ARGONNE, LLC, and shall +! 3. Also, reference herein to any specific commercial products, process, +! or services by trade name, trademark, manufacturer or otherwise does +! not necessarily constitute or imply its endorsement, recommendation, +! or favoring by the United States Government or UCHICAGO ARGONNE LLC. +! The views and opinions of authors expressed +! herein do not necessarily state or reflect those of the United States +! Government or UCHICAGO ARGONNE, LLC, and shall ! not be used for advertising or product endorsement purposes. ! !> Base class for time integration schemes module time_scheme use neko_config use num_types, only : rp - use utils, only : neko_warning use, intrinsic :: iso_c_binding implicit none private - + !> Base abstract class for time integration schemes type, abstract, public :: time_scheme_t contains !> Compute the coefficients procedure(compute_coeffs), deferred, nopass :: compute_coeffs end type time_scheme_t - + abstract interface !> Interface for setting the scheme coefficients !! @param t Timestep values, first element is the current timestep. @@ -83,7 +82,7 @@ subroutine compute_coeffs(coeffs, dt, order) real(kind=rp), intent(out) :: coeffs(4) real(kind=rp), intent(in) :: dt(10) integer, intent(in) :: order - end subroutine + end subroutine compute_coeffs end interface - + end module time_scheme diff --git a/src/time_schemes/time_scheme_controller.f90 b/src/time_schemes/time_scheme_controller.f90 index a90be291e23..9116f5c1b26 100644 --- a/src/time_schemes/time_scheme_controller.f90 +++ b/src/time_schemes/time_scheme_controller.f90 @@ -31,7 +31,7 @@ ! POSSIBILITY OF SUCH DAMAGE. ! !> Compound scheme for the advection and diffusion operators in a transport -!! equation. +!! equation. module time_scheme_controller use neko_config use num_types, only : rp @@ -45,7 +45,7 @@ module time_scheme_controller !> Implements the logic to compute the time coefficients for the advection and !! diffusion operators in a transport equation. !! @details - !! Uses the BDF scheme for the diffusion, where as the term for advection + !! Uses the BDF scheme for the diffusion, where as the term for advection !! the scheme depends on the orders of the BDF and advection schemes. !! - Order 1 advection !! - BDF1 for diffusion -> Adam-Bashforth scheme. @@ -57,22 +57,22 @@ module time_scheme_controller !! - BDF2 for diffusion -> Modified explcit extrapolation scheme. !! - BDF3 for diffusion -> Explciit extrapolation scheme. !! The order of the BDF scheme in the above logic is set by the user, whereas - !! the advection scheme is set to forward Euler when BDF is order 1, + !! the advection scheme is set to forward Euler when BDF is order 1, !! and otherwise to a 3rd order scheme (excluding the first 2 timesteps). !! This means that some of the options in the above list never get realized, !! particularly order 2 and 3 advection for 1st order BDF. They remain in the !! code so as to have the orinigal Nek5000 logic in place for possible !! adoption in the future. - !! An important detail here is the handling of the first timesteps where a + !! An important detail here is the handling of the first timesteps where a !! high-order scheme cannot be constructed. The parameters `nadv` and `ndiff`, !! which are initialized to 0, hold the current order of the respective !! scheme. - !! @note the advection scheme also applies to source terms. + !! @note the advection scheme also applies to source terms. type, public :: time_scheme_controller_t type(ext_time_scheme_t) :: ext type(ab_time_scheme_t) :: ab type(bdf_time_scheme_t) :: bdf - + !> Time coefficients for the advection operator real(kind=rp) :: advection_coeffs(4) = 0 !> Time coefficients for the diffusion operator @@ -88,10 +88,10 @@ module time_scheme_controller !> Order of the diffusion scheme integer :: diffusion_time_order !> Device pointer for `advection_coeffs` - type(c_ptr) :: advection_coeffs_d = C_NULL_PTR + type(c_ptr) :: advection_coeffs_d = C_NULL_PTR !> Device pointer for `diffusion_coeffs` - type(c_ptr) :: diffusion_coeffs_d = C_NULL_PTR - + type(c_ptr) :: diffusion_coeffs_d = C_NULL_PTR + contains !> Constructor procedure, pass(this) :: init => time_scheme_controller_init @@ -102,7 +102,7 @@ module time_scheme_controller time_scheme_controller_set_coeffs end type time_scheme_controller_t - contains +contains !> Contructor !! @param torder Desired order of the scheme: 1, 2, 3. @@ -110,10 +110,10 @@ module time_scheme_controller subroutine time_scheme_controller_init(this, torder) implicit none class(time_scheme_controller_t) :: this - integer :: torder - + integer :: torder + this%diffusion_time_order = torder - + ! Force 1st order advection when diffusion is 1st order if (torder .eq. 1) then this%advection_time_order = 1 @@ -139,7 +139,7 @@ subroutine time_scheme_controller_free(this) end subroutine time_scheme_controller_free !> Set the time coefficients - !! @details Implements all necessary logic to handle + !! @details Implements all necessary logic to handle !! @param t Timestep values, first element is the current timestep. subroutine time_scheme_controller_set_coeffs(this, dt) implicit none @@ -155,18 +155,18 @@ subroutine time_scheme_controller_set_coeffs(this, dt) adv_coeffs_d => this%advection_coeffs_d, & diff_coeffs => this%diffusion_coeffs, & diff_coeffs_d => this%diffusion_coeffs_d) - + adv_coeffs_old = adv_coeffs diff_coeffs_old = diff_coeffs - + ! Increment the order of the scheme if below time_order ndiff = ndiff + 1 ndiff = min(ndiff, this%diffusion_time_order) nadv = nadv + 1 nadv = min(nadv, this%advection_time_order) - + call this%bdf%compute_coeffs(diff_coeffs, dt, ndiff) - + if (nadv .eq. 1) then ! Forward euler call this%ext%compute_coeffs(adv_coeffs, dt, nadv) @@ -190,20 +190,22 @@ subroutine time_scheme_controller_set_coeffs(this, dt) call this%ext%compute_coeffs(adv_coeffs, dt, nadv) end if end if - + if (c_associated(adv_coeffs_d)) then if (maxval(abs(adv_coeffs - adv_coeffs_old)) .gt. 1e-10_rp) then - call device_memcpy(adv_coeffs, adv_coeffs_d, 4, HOST_TO_DEVICE) + call device_memcpy(adv_coeffs, adv_coeffs_d, 4, & + HOST_TO_DEVICE, sync=.false.) end if end if if (c_associated(diff_coeffs_d)) then if (maxval(abs(diff_coeffs - diff_coeffs_old)) .gt. 1e-10_rp) then - call device_memcpy(diff_coeffs, diff_coeffs_d, 4, HOST_TO_DEVICE) + call device_memcpy(diff_coeffs, diff_coeffs_d, 4, & + HOST_TO_DEVICE, sync=.false.) end if end if end associate end subroutine time_scheme_controller_set_coeffs -end module time_scheme_controller \ No newline at end of file +end module time_scheme_controller diff --git a/tests/device/device.pf b/tests/device/device.pf index 002070aa76f..4cdd145a91b 100644 --- a/tests/device/device.pf +++ b/tests/device/device.pf @@ -2,7 +2,7 @@ @test subroutine test_device_init use pfunit - use device + use device implicit none call device_init() end subroutine test_device_init @@ -15,13 +15,13 @@ subroutine test_device_sync call device_init() call device_sync() - + end subroutine test_device_sync @test subroutine test_device_alloc - use neko_config - use pfunit + use neko_config + use pfunit use device implicit none type(c_ptr) :: x_d @@ -36,12 +36,12 @@ subroutine test_device_alloc @assertTrue(c_associated(x_d)) end if - + end subroutine test_device_alloc @test subroutine test_device_free - use neko_config + use neko_config use pfunit use device implicit none @@ -49,25 +49,25 @@ subroutine test_device_free integer(c_size_t) :: size_ if (NEKO_BCKND_DEVICE .eq. 1) then - + size_ = 42 * 4 x_d = C_NULL_PTR call device_alloc(x_d, size_) - + @assertTrue(c_associated(x_d)) call device_free(x_d) - + @assertFalse(c_associated(x_d)) end if - + end subroutine test_device_free @test subroutine test_device_associate - use neko_config + use neko_config use pfunit use device implicit none @@ -76,21 +76,22 @@ subroutine test_device_associate integer :: x(42) if (NEKO_BCKND_DEVICE .eq. 1) then - + size_ = size(x) * 4 - + x_d = C_NULL_PTR + call device_alloc(x_d, size_) call device_associate(x, x_d) - + @assertTrue(c_associated(x_d)) end if - + end subroutine test_device_associate @test subroutine test_device_associated - use neko_config + use neko_config use pfunit use device implicit none @@ -99,8 +100,9 @@ subroutine test_device_associated integer :: x(42) if (NEKO_BCKND_DEVICE .eq. 1) then - + size_ = size(x) * 4 + x_d = C_NULL_PTR call device_alloc(x_d, size_) call device_associate(x, x_d) @@ -108,24 +110,26 @@ subroutine test_device_associated @assertTrue(device_associated(x)) end if - + end subroutine test_device_associated @test subroutine test_device_get_ptr - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: x_d, dev integer(c_size_t) :: size_ integer :: x(42) - logical :: same_ptr if (NEKO_BCKND_DEVICE .eq. 1) then size_ = size(x) * 4 - + + x_d = C_NULL_PTR + dev = C_NULL_PTR + call device_alloc(x_d, size_) call device_associate(x, x_d) @@ -135,23 +139,24 @@ subroutine test_device_get_ptr end if - + end subroutine test_device_get_ptr @test subroutine test_device_map - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: x_d, dev integer(c_size_t) :: size_ integer :: x(42) - logical :: same_ptr - + if (NEKO_BCKND_DEVICE .eq. 1) then size_ = size(x) * 4 + x_d = C_NULL_PTR + dev = C_NULL_PTR call device_map(x, x_d, size(x)) @@ -161,23 +166,25 @@ subroutine test_device_map end if - + end subroutine test_device_map @test subroutine test_device_memcpy - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: x_d, y_d, dev integer(c_size_t) :: size_ integer :: x(42), i - logical :: same_ptr if (NEKO_BCKND_DEVICE .eq. 1) then - + size_ = size(x) * 4 + x_d = C_NULL_PTR + y_d = C_NULL_PTR + dev = C_NULL_PTR call device_map(x, x_d, size(x)) @@ -187,12 +194,12 @@ subroutine test_device_memcpy x(i) = i end do - call device_memcpy(x, x_d, size(X), HOST_TO_DEVICE) + call device_memcpy(x, x_d, size(X), HOST_TO_DEVICE, sync=.false.) x = 0 call device_memcpy(x, x_d, size(x), DEVICE_TO_HOST, sync=.true.) - + do i = 1, size(x) @assertEqual(x(i), i) end do @@ -204,23 +211,23 @@ subroutine test_device_memcpy x = 0 call device_memcpy(x, y_d, size(x), DEVICE_TO_HOST, sync=.true.) - + do i = 1, size(x) @assertEqual(x(i), i) end do - + end if end subroutine test_device_memcpy @test subroutine test_device_stream_create - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: stream - + if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then stream = C_NULL_PTR @@ -228,17 +235,17 @@ subroutine test_device_stream_create @assertTrue(c_associated(stream)) end if - + end subroutine test_device_stream_create @test subroutine test_device_stream_destroy - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: stream - + if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then stream = C_NULL_PTR @@ -248,17 +255,17 @@ subroutine test_device_stream_destroy call device_stream_destroy(stream) end if - + end subroutine test_device_stream_destroy @test subroutine test_device_sync_stream - use neko_config + use neko_config use pfunit use device implicit none type(c_ptr) :: stream1, stream2 - + if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1)) then stream1 = C_NULL_PTR @@ -277,5 +284,5 @@ subroutine test_device_sync_stream call device_stream_destroy(stream2) end if - + end subroutine test_device_sync_stream diff --git a/tests/device_math/device_math_parallel.pf b/tests/device_math/device_math_parallel.pf index 3ea325b6b28..ab70603954f 100644 --- a/tests/device_math/device_math_parallel.pf +++ b/tests/device_math/device_math_parallel.pf @@ -44,7 +44,7 @@ contains a = 1.0_rp call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_rzero(a_d, n) @@ -58,7 +58,7 @@ contains a(i) = real(i, rp) end do - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_rzero(a_d, n) @@ -83,7 +83,7 @@ contains a = 0.0_rp call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_rone(a_d, n) @@ -97,7 +97,7 @@ contains a(i) = real(i, rp) end do - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_rone(a_d, n) @@ -128,14 +128,14 @@ contains end do call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_map(b, b_d, size(b)) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_copy(b_d, a_d, n) - call device_memcpy(a, a_d, size(a), DEVICE_TO_HOST) + call device_memcpy(a, a_d, size(a), DEVICE_TO_HOST, sync=.false.) call device_memcpy(b, b_d, size(a), DEVICE_TO_HOST, sync=.true.) do i = 1, n @@ -158,7 +158,7 @@ contains a = 1.0_rp call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_cfill(a_d, c, n) @@ -185,7 +185,7 @@ contains a = 40.0_rp call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_cadd(a_d, c, n) @@ -217,8 +217,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_add2(a_d, b_d, n) @@ -250,8 +250,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_add2s1(a_d, b_d, c1, n) @@ -283,8 +283,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_add2s2(a_d, b_d, c1, n) @@ -320,9 +320,9 @@ contains call device_map(b, b_d, size(b)) call device_map(c, c_d, size(c)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) call device_add3s2(a_d, b_d, c_d, c1, c2, n) @@ -350,7 +350,7 @@ contains call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) call device_invcol1(a_d, n) @@ -381,8 +381,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_invcol2(a_d, b_d, n) @@ -413,8 +413,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_col2(a_d, b_d, n) @@ -448,9 +448,9 @@ contains call device_map(b, b_d, size(b)) call device_map(c, c_d, size(c)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) call device_col3(a_d, b_d, c_d, n) @@ -481,8 +481,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) call device_sub2(a_d, b_d, n) @@ -516,9 +516,9 @@ contains call device_map(b, b_d, size(b)) call device_map(c, c_d, size(c)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) call device_sub3(a_d, b_d, c_d, n) @@ -552,9 +552,9 @@ contains call device_map(b, b_d, size(b)) call device_map(c, c_d, size(c)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) call device_addcol3(a_d, b_d, c_d, n) @@ -591,10 +591,10 @@ contains call device_map(c, c_d, size(c)) call device_map(d, d_d, size(d)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) - call device_memcpy(d, d_d, size(d), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(d, d_d, size(d), HOST_TO_DEVICE, sync=.false.) call device_addcol4(a_d, b_d, c_d, d_d, n) @@ -632,9 +632,9 @@ contains call device_map(b, b_d, size(b)) call device_map(c, c_d, size(c)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) - call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, size(c), HOST_TO_DEVICE, sync=.false.) expected = real(this%getNumProcesses(), rp) * real(n, rp) res = device_glsc3(a_d, b_d, c_d, n) @@ -666,8 +666,8 @@ contains call device_map(a, a_d, size(a)) call device_map(b, b_d, size(b)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) - call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b, b_d, size(b), HOST_TO_DEVICE, sync=.false.) expected = real(this%getNumProcesses(), rp) * real(n, rp) res = device_glsc2(a_d, b_d, n) @@ -696,7 +696,7 @@ contains call device_map(a, a_d, size(a)) - call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE) + call device_memcpy(a, a_d, size(a), HOST_TO_DEVICE, sync=.false.) expected = real(this%getNumProcesses(), rp) * real(n, rp) res = device_glsum(a_d, n) diff --git a/tests/device_mathops/device_mathops.pf b/tests/device_mathops/device_mathops.pf index 2a9e1186ceb..6753565fb0d 100644 --- a/tests/device_mathops/device_mathops.pf +++ b/tests/device_mathops/device_mathops.pf @@ -28,14 +28,14 @@ subroutine test_mathops_opchsign a2 = 2.0_rp a3 = 3.0_rp - call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE) - call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE) - call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE) + call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.) call device_opchsign(a1_d, a2_d, a3_d, gdim, n) - call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST) - call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST) + call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.) call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) do i = 1, n @@ -84,15 +84,15 @@ subroutine test_mathops_opcolv a3 = 3.0_rp c = 42.0_rp - call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE) - call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE) - call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE) - call device_memcpy(c, c_d, n, HOST_TO_DEVICE) + call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.) call device_opcolv(a1_d, a2_d, a3_d, c_d, gdim, n) - call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST) - call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST) + call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.) call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) do i = 1, n @@ -152,18 +152,18 @@ subroutine test_mathops_opcolv3c b3 = 3.0_rp c = 42.0_rp - call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE) - call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE) - call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE) - call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE) - call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE) - call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE) - call device_memcpy(c, c_d, n, HOST_TO_DEVICE) + call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.) call device_opcolv3c(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d, d, n, gdim) - call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST) - call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST) + call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.) call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) do i = 1, n @@ -218,17 +218,17 @@ subroutine test_mathops_opadd2cm b2 = 2.0_rp b3 = 3.0_rp - call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE) - call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE) - call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE) - call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE) - call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE) - call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE) + call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.) call device_opadd2cm(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c, n, gdim) - call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST) - call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST) + call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.) call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) do i = 1, n @@ -286,19 +286,19 @@ subroutine test_mathops_opadd2col b3 = 3.0_rp c = 42.0_rp - call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE) - call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE) - call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE) - call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE) - call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE) - call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE) - call device_memcpy(c, c_d, n, HOST_TO_DEVICE) + call device_memcpy(a1, a1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a2, a2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(a3, a3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b1, b1_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b2, b2_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(b3, b3_d, n, HOST_TO_DEVICE, sync=.false.) + call device_memcpy(c, c_d, n, HOST_TO_DEVICE, sync=.false.) call device_opadd2col(a1_d, a2_d, a3_d, b1_d, b2_d, b3_d, c_d, n, gdim) - call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST) - call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST) - call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) + call device_memcpy(a1, a1_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a2, a2_d, n, DEVICE_TO_HOST, sync=.false.) + call device_memcpy(a3, a3_d, n, DEVICE_TO_HOST, sync=.true.) do i = 1, n @assertEqual(a1(i), 4753.0_rp) diff --git a/tests/field/field_parallel.pf b/tests/field/field_parallel.pf index 319b53c4fa8..e39f4def910 100644 --- a/tests/field/field_parallel.pf +++ b/tests/field/field_parallel.pf @@ -73,6 +73,7 @@ contains call msh%add_element(2, p(2), p(5), p(6), p(4), & p(8), p(11), p(12), p(9)) + call msh%generate_conn() end subroutine test_field_gen_msh @@ -88,6 +89,8 @@ contains call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) + pe_rank = this%getProcessRank() + pe_size = this%getNumProcesses() call test_field_gen_msh(msh) call Xh%init(GLL, lx, lx, lx) diff --git a/tests/gather_scatter/gather_scatter_parallel.pf b/tests/gather_scatter/gather_scatter_parallel.pf index 354cdd4479e..2af79cacc22 100644 --- a/tests/gather_scatter/gather_scatter_parallel.pf +++ b/tests/gather_scatter/gather_scatter_parallel.pf @@ -86,7 +86,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -175,7 +175,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -263,7 +263,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -351,7 +351,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -439,7 +439,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -527,7 +527,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) @@ -615,7 +615,7 @@ contains if ((NEKO_BCKND_HIP .eq. 1) .or. (NEKO_BCKND_CUDA .eq. 1) .or. & (NEKO_BCKND_OPENCL .eq. 1)) then - call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE) + call device_memcpy(x%x, x%x_d, size(x%x), HOST_TO_DEVICE, sync=.false.) end if call gs_h%op(x%x, n, GS_OP_ADD) diff --git a/tests/mean_field/mean_field_parallel.pf b/tests/mean_field/mean_field_parallel.pf index 151d80b3d6f..b9ce60d2049 100644 --- a/tests/mean_field/mean_field_parallel.pf +++ b/tests/mean_field/mean_field_parallel.pf @@ -73,6 +73,7 @@ contains call msh%add_element(2, p(2), p(5), p(6), p(4), & p(8), p(11), p(12), p(9)) + call msh%generate_conn() end subroutine test_mean_field_gen_msh @@ -87,6 +88,8 @@ contains integer :: ierr call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) + pe_rank = this%getProcessRank() + pe_size = this%getNumProcesses() call test_mean_field_gen_msh(msh) call Xh%init(GLL, lx, lx, lx) @@ -125,7 +128,8 @@ contains end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), DEVICE_TO_HOST, sync=.true.) + call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), & + DEVICE_TO_HOST, sync=.true.) end if @assertEqual(maxval(mf%mf%x), 42.0_rp) @@ -139,7 +143,8 @@ contains end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), DEVICE_TO_HOST, sync=.true.) + call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), & + DEVICE_TO_HOST, sync=.true.) end if diff --git a/tests/mean_sqr_field/mean_sqr_field_parallel.pf b/tests/mean_sqr_field/mean_sqr_field_parallel.pf index 6692e0e188f..b1427596d7c 100644 --- a/tests/mean_sqr_field/mean_sqr_field_parallel.pf +++ b/tests/mean_sqr_field/mean_sqr_field_parallel.pf @@ -74,6 +74,7 @@ contains call msh%add_element(2, p(2), p(5), p(6), p(4), & p(8), p(11), p(12), p(9)) + call msh%generate_conn() end subroutine test_mean_sqr_field_gen_msh @@ -89,6 +90,8 @@ contains call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) + pe_rank = this%getProcessRank() + pe_size = this%getNumProcesses() call test_mean_sqr_field_gen_msh(msh) call Xh%init(GLL, lx, lx, lx) @@ -126,7 +129,8 @@ contains end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), DEVICE_TO_HOST, sync=.true.) + call device_memcpy(mf%mf%x, mf%mf%x_d, size(mf%mf%x), & + DEVICE_TO_HOST, sync=.true.) end if @assertEqual(maxval(mf%mf%x), 1764.0_rp) @@ -140,7 +144,8 @@ contains end do if (NEKO_BCKND_DEVICE .eq. 1) then - call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), DEVICE_TO_HOST, sync=.true.) + call device_memcpy(mf2%mf%x, mf2%mf%x_d, size(mf2%mf%x), & + DEVICE_TO_HOST, sync=.true.) end if @assertEqual(maxval(mf2%mf%x), 38.5_rp) diff --git a/tests/point_interpolation/point_interpolation_parallel.pf b/tests/point_interpolation/point_interpolation_parallel.pf index ff9cc498099..90add943ed5 100644 --- a/tests/point_interpolation/point_interpolation_parallel.pf +++ b/tests/point_interpolation/point_interpolation_parallel.pf @@ -15,8 +15,32 @@ module point_interpolation_parallel use neko_config implicit none + @TestCase + type, extends(MPITestCase) :: interpolation_test + contains + procedure :: setUp + procedure :: tearDown + end type interpolation_test + contains + subroutine setUp(this) + class(interpolation_test), intent(inout) :: this + integer :: ierr + + call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_init + end if + end subroutine setUp + + subroutine tearDown(this) + class(interpolation_test), intent(inout) :: this + if (NEKO_BCKND_DEVICE .eq. 1) then + call device_finalize + end if + end subroutine tearDown + !> Creates 8 points for a cube with diagonal (1, 1) - (2, 2) subroutine create_points(p) implicit none @@ -40,7 +64,7 @@ contains call p(8)%set_id(8) end subroutine create_points - subroutine point_interpolation_test_gen_coef(msh, coef, lx) + subroutine gen_coef(msh, coef, lx) implicit none type(mesh_t), intent(inout) :: msh type(coef_t), intent(inout) :: coef @@ -54,32 +78,26 @@ contains call gs%init(dof) call coef%init(gs) - end subroutine point_interpolation_test_gen_coef + end subroutine gen_coef @test(npes=[1]) - subroutine point_interpolation_test_init(this) + subroutine init(this) implicit none - class (MpiTestMethod), intent(inout) :: this + class (interpolation_test), intent(inout) :: this type(point_interpolator_t) :: interp type(coef_t) :: coef type(mesh_t) :: msh integer, parameter :: lx = 4 - integer :: ierr type(linear_dist_t) :: dist type(point_t) :: p(8) type(dofmap_t) :: dof type(gs_t) :: gs type(space_t) :: Xh - - call device_init - call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) - pe_rank = this%getProcessRank() pe_size = this%getNumProcesses() - dist = linear_dist_t(1, this%getProcessRank(), & - this%getNumProcesses(), NEKO_COMM) + dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM) call create_points(p) @@ -96,35 +114,32 @@ contains @assertTrue(associated(interp%Xh)) - call device_finalize - end subroutine point_interpolation_test_init + end subroutine init @test(npes=[1]) - subroutine point_interpolation_test_interpolation(this) + subroutine interpolation(this) implicit none - class (MpiTestMethod), intent(inout) :: this + class (interpolation_test), intent(inout) :: this type(point_interpolator_t) :: interp type(coef_t) :: coef type(mesh_t) :: msh integer, parameter :: lx = 4 - integer :: ierr type(point_t) :: rst(4) ! A list of r,s,t coordinates type(point_t) :: xyz(4), xyz_equal(4) ! the results of the interpolation - real(kind=rp) :: xyz_real(4) + real(kind=rp) :: xyz_real(4), xyz_target(4) type(linear_dist_t) :: dist type(point_t) :: p(8) type(dofmap_t) :: dof type(gs_t) :: gs type(space_t) :: Xh - integer :: e = 1 ! our element index (in this example we only use a 1-element mesh) + ! our element index (in this example we only use a 1-element mesh) + integer :: e = 1 - call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) pe_rank = this%getProcessRank() pe_size = this%getNumProcesses() - dist = linear_dist_t(1, this%getProcessRank(), & - this%getNumProcesses(), NEKO_COMM) + dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM) call create_points(p) call msh%init(3, dist) call msh%add_element(1, p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8)) @@ -135,22 +150,22 @@ contains call coef%init(gs) ! Initialize the r,s,t values - rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/) + rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/) rst(2)%x = (/ 0.4_rp, -0.3_rp, -1.0_rp/) rst(3)%x = (/-1.0_rp, -1.0_rp, -1.0_rp/) - rst(4)%x = (/ 1.0_rp, 1.0_rp, 1.0_rp/) + rst(4)%x = (/ 1.0_rp, 1.0_rp, 1.0_rp/) - xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/) + xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/) xyz_equal(2)%x = (/1.7_rp, 1.35_rp, 1.0_rp/) xyz_equal(3)%x = (/1.0_rp, 1.0_rp, 1.0_rp/) - xyz_equal(4)%x = (/ 2.0_rp, 2.0_rp, 2.0_rp/) + xyz_equal(4)%x = (/ 2.0_rp, 2.0_rp, 2.0_rp/) call interp%init(coef%Xh) ! initialize the interpolator object - + ! We use the interpolation for a vector field, interpolating the coordinates ! themselves xyz = interp%interpolate(rst, coef%dof%x(:,:,:,e), & - coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e)) + coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e)) @assertRelativelyEqual(xyz(1)%x, xyz_equal(1)%x, tolerance=1e-4_rp) @assertRelativelyEqual(xyz(2)%x, xyz_equal(2)%x, tolerance=1e-4_rp) @@ -160,19 +175,19 @@ contains ! We use the interpolation for a scalar field, interpolating the coordinates ! themselves xyz_real = interp%interpolate(rst, coef%dof%x(:,:,:,e)) - @assertRelativelyEqual(xyz_real, (/ 2.0_rp, 1.7_rp, 1.0_rp, 2.0_rp/), tolerance=1e-4_rp) + xyz_target = (/ 2.0_rp, 1.7_rp, 1.0_rp, 2.0_rp/) + @assertRelativelyEqual(xyz_real, xyz_target, tolerance=1e-4_rp) - end subroutine point_interpolation_test_interpolation + end subroutine interpolation @test(npes=[1]) - subroutine point_interpolation_test_interpolation_single_point(this) + subroutine interpolation_single_point(this) implicit none - class (MpiTestMethod), intent(inout) :: this + class (interpolation_test), intent(inout) :: this type(point_interpolator_t) :: interp type(coef_t) :: coef type(mesh_t) :: msh integer, parameter :: lx = 4 - integer :: ierr type(point_t) :: rst(1) ! A list of r,s,t coordinates type(point_t) :: xyz(1), xyz_equal(1) ! the results of the interpolation real(kind=rp) :: xyz_real(1) @@ -182,14 +197,13 @@ contains type(gs_t) :: gs type(space_t) :: Xh - integer :: e = 1 ! our element index (in this example we only use a 1-element mesh) + ! our element index (in this example we only use a 1-element mesh) + integer :: e = 1 - call MPI_Comm_dup(this%getMpiCommunicator(), NEKO_COMM%mpi_val, ierr) pe_rank = this%getProcessRank() pe_size = this%getNumProcesses() - dist = linear_dist_t(1, this%getProcessRank(), & - this%getNumProcesses(), NEKO_COMM) + dist = linear_dist_t(1, pe_rank, pe_size, NEKO_COMM) call create_points(p) call msh%init(3, dist) call msh%add_element(1, p(1), p(2), p(3), p(4), p(5), p(6), p(7), p(8)) @@ -200,16 +214,16 @@ contains call coef%init(gs) ! Initialize the r,s,t values - rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/) + rst(1)%x = (/ 1.0_rp, 0.0_rp, -0.5_rp/) - xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/) + xyz_equal(1)%x = (/2.0_rp, 1.5_rp , 1.25_rp/) call interp%init(coef%Xh) ! initialize the interpolator object - + ! We use the interpolation for a vector field, interpolating the coordinates ! themselves xyz = interp%interpolate(rst, coef%dof%x(:,:,:,e), & - coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e)) + coef%dof%y(:,:,:,e), coef%dof%z(:,:,:,e)) @assertRelativelyEqual(xyz(1)%x, xyz_equal(1)%x, tolerance=1e-4_rp) @@ -218,6 +232,6 @@ contains xyz_real = interp%interpolate(rst, coef%dof%x(:,:,:,e)) @assertRelativelyEqual(xyz_real, (/ 2.0_rp/), tolerance=1e-4_rp) - end subroutine point_interpolation_test_interpolation_single_point + end subroutine interpolation_single_point -end module +end module point_interpolation_parallel diff --git a/tests/scratch_registry/test_scratch_registry.pf b/tests/scratch_registry/test_scratch_registry.pf index 2f4ee27a51c..61c37aa3a55 100644 --- a/tests/scratch_registry/test_scratch_registry.pf +++ b/tests/scratch_registry/test_scratch_registry.pf @@ -7,6 +7,7 @@ module test_scratch_registry use dofmap use math use scratch_registry + use comm, only : NEKO_COMM, pe_rank, pe_size use num_types implicit none @@ -40,10 +41,13 @@ contains call p(11)%set_id(11) p(12) = point_t(2d0, 1d0, 1d0) call p(12)%set_id(12) - + call msh%init(3, 1) call msh%add_element(1, p(1), p(2), p(4), p(3), & p(7), p(8), p(9), p(10)) + pe_rank = 1 + pe_size = 1 + call msh%generate_conn() end subroutine test_scratch_registry_gen_msh @@ -57,6 +61,7 @@ contains integer :: ierr call test_scratch_registry_gen_msh(msh) + call Xh%init(GLL, lx, lx, lx) dm = dofmap_t(msh, Xh)